29 files changed, 685 insertions, 550 deletions
diff --git a/libswscale/Makefile b/libswscale/Makefile
index 5671b2e..942545d 100644
--- a/libswscale/Makefile
+++ b/libswscale/Makefile
@@ -1,3 +1,5 @@
+include $(SUBDIR)../config.mak
+
 NAME = swscale
 FFLIBS = avutil
 
@@ -19,6 +21,8 @@ OBJS-$(HAVE_MMX)           +=  x86/rgb2rgb.o            \
 OBJS-$(HAVE_VIS)           +=  sparc/yuv2rgb_vis.o
 OBJS-$(HAVE_YASM)          +=  x86/scale.o
 
+$(SUBDIR)x86/swscale_mmx.o: CFLAGS += $(NOREDZONE_FLAGS)
+
 TESTPROGS = colorspace swscale
 
 DIRS = bfin mlib ppc sparc x86
diff --git a/libswscale/bfin/internal_bfin.S b/libswscale/bfin/internal_bfin.S
index 9f985e7..cb8d712 100644
--- a/libswscale/bfin/internal_bfin.S
+++ b/libswscale/bfin/internal_bfin.S
@@ -5,20 +5,20 @@
  * Blackfin video color space converter operations
  * convert I420 YV12 to RGB in various formats
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libswscale/bfin/swscale_bfin.c b/libswscale/bfin/swscale_bfin.c
index 0c5f004..870636e 100644
--- a/libswscale/bfin/swscale_bfin.c
+++ b/libswscale/bfin/swscale_bfin.c
@@ -3,20 +3,20 @@
  *
  * Blackfin software video scaler operations
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libswscale/bfin/yuv2rgb_bfin.c b/libswscale/bfin/yuv2rgb_bfin.c
index 68af522..7a7dc7f 100644
--- a/libswscale/bfin/yuv2rgb_bfin.c
+++ b/libswscale/bfin/yuv2rgb_bfin.c
@@ -4,20 +4,20 @@
  * Blackfin video color space converter operations
  * convert I420 YV12 to RGB in various formats
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -28,6 +28,7 @@
 #include <assert.h>
 #include "config.h"
 #include <unistd.h>
+#include "libavutil/pixdesc.h"
 #include "libswscale/rgb2rgb.h"
 #include "libswscale/swscale.h"
 #include "libswscale/swscale_internal.h"
@@ -197,7 +198,7 @@ SwsFunc ff_yuv2rgb_get_func_ptr_bfin(SwsContext *c)
     }
 
     av_log(c, AV_LOG_INFO, "BlackFin accelerated color space converter %s\n",
-           sws_format_name (c->dstFormat));
+           av_get_pix_fmt_name(c->dstFormat));
 
     return f;
 }
diff --git a/libswscale/colorspace-test.c b/libswscale/colorspace-test.c
index 07c1cbd..34095d8 100644
--- a/libswscale/colorspace-test.c
+++ b/libswscale/colorspace-test.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2002 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libswscale/mlib/yuv2rgb_mlib.c b/libswscale/mlib/yuv2rgb_mlib.c
index 526c975..e9f1149 100644
--- a/libswscale/mlib/yuv2rgb_mlib.c
+++ b/libswscale/mlib/yuv2rgb_mlib.c
@@ -3,20 +3,20 @@
  *
  * Copyright (C) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libswscale/options.c b/libswscale/options.c
index 9ba6e5e..65095d5 100644
--- a/libswscale/options.c
+++ b/libswscale/options.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libswscale/ppc/swscale_altivec.c b/libswscale/ppc/swscale_altivec.c
index e73750f..076929c 100644
--- a/libswscale/ppc/swscale_altivec.c
+++ b/libswscale/ppc/swscale_altivec.c
@@ -4,20 +4,20 @@
  * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
  * based on the equivalent C code in swscale.c
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c
index 73c02e9..8e84c26 100644
--- a/libswscale/ppc/yuv2rgb_altivec.c
+++ b/libswscale/ppc/yuv2rgb_altivec.c
@@ -3,20 +3,20 @@
  *
  * copyright (C) 2004 Marc Hoffman <marc.hoffman@analog.com>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -95,6 +95,7 @@ adjustment.
 #include "libswscale/swscale.h"
 #include "libswscale/swscale_internal.h"
 #include "libavutil/cpu.h"
+#include "libavutil/pixdesc.h"
 #include "yuv2rgb_altivec.h"
 
 #undef PROFILE_THE_BEAST
@@ -298,7 +299,7 @@ static int altivec_##name (SwsContext *c,                               \
     vector signed short R1,G1,B1;                                       \
     vector unsigned char R,G,B;                                         \
                                                                         \
-    vector unsigned char *y1ivP, *y2ivP, *uivP, *vivP;                  \
+    const vector unsigned char *y1ivP, *y2ivP, *uivP, *vivP;            \
     vector unsigned char align_perm;                                    \
                                                                         \
     vector signed short                                                 \
@@ -335,10 +336,10 @@ static int altivec_##name (SwsContext *c,                               \
                                                                         \
         for (j=0;j<w/16;j++) {                                          \
                                                                         \
-            y1ivP = (vector unsigned char *)y1i;                        \
-            y2ivP = (vector unsigned char *)y2i;                        \
-            uivP  = (vector unsigned char *)ui;                         \
-            vivP  = (vector unsigned char *)vi;                         \
+            y1ivP = (const vector unsigned char *)y1i;                  \
+            y2ivP = (const vector unsigned char *)y2i;                  \
+            uivP  = (const vector unsigned char *)ui;                   \
+            vivP  = (const vector unsigned char *)vi;                   \
                                                                         \
             align_perm = vec_lvsl (0, y1i);                             \
             y0 = (vector unsigned char)                                 \
@@ -720,7 +721,7 @@ ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
                 static int printed_error_message;
                 if (!printed_error_message) {
                     av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n",
-                           sws_format_name(c->dstFormat));
+                           av_get_pix_fmt_name(c->dstFormat));
                     printed_error_message=1;
                 }
                 return;
@@ -795,7 +796,7 @@ ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
         default:
             /* Unreachable, I think. */
             av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n",
-                   sws_format_name(c->dstFormat));
+                   av_get_pix_fmt_name(c->dstFormat));
             return;
         }
 
diff --git a/libswscale/ppc/yuv2rgb_altivec.h b/libswscale/ppc/yuv2rgb_altivec.h
index b809fe1..163eba6 100644
--- a/libswscale/ppc/yuv2rgb_altivec.h
+++ b/libswscale/ppc/yuv2rgb_altivec.h
@@ -4,20 +4,20 @@
  * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
  * based on the equivalent C code in swscale.c
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libswscale/ppc/yuv2yuv_altivec.c b/libswscale/ppc/yuv2yuv_altivec.c
index 4cd02ff..82c265a 100644
--- a/libswscale/ppc/yuv2yuv_altivec.c
+++ b/libswscale/ppc/yuv2yuv_altivec.c
@@ -4,20 +4,20 @@
  * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
  * based on the equivalent C code in swscale.c
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
index 51cb600..9a7f698 100644
--- a/libswscale/rgb2rgb.c
+++ b/libswscale/rgb2rgb.c
@@ -6,20 +6,20 @@
  * Written by Nick Kurshev.
  * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 #include <inttypes.h>
diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
index 9d051de..e3edac8 100644
--- a/libswscale/rgb2rgb.h
+++ b/libswscale/rgb2rgb.h
@@ -6,20 +6,20 @@
  *  Written by Nick Kurshev.
  *  YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
index c02015e..0734e88 100644
--- a/libswscale/rgb2rgb_template.c
+++ b/libswscale/rgb2rgb_template.c
@@ -7,20 +7,20 @@
  * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
  * lot of big-endian byte order fixes by Alex Beregszaszi
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -658,6 +658,9 @@ void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, uint8_t *udst,
         ydst += lumStride;
         src  += srcStride;
 
+        if(y+1 == height)
+            break;
+
         for (i=0; i<chromWidth; i++) {
             unsigned int b = src[6*i+0];
             unsigned int g = src[6*i+1];
diff --git a/libswscale/sparc/yuv2rgb_vis.c b/libswscale/sparc/yuv2rgb_vis.c
index 2111ea8..cc98f04 100644
--- a/libswscale/sparc/yuv2rgb_vis.c
+++ b/libswscale/sparc/yuv2rgb_vis.c
@@ -2,20 +2,20 @@
  * VIS optimized software YUV to RGB converter
  * Copyright (c) 2007 Denes Balatoni <dbalatoni@programozo.hu>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libswscale/swscale-test.c b/libswscale/swscale-test.c
index b5cf1d2..888cbab 100644
--- a/libswscale/swscale-test.c
+++ b/libswscale/swscale-test.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 5d90250..31346a3 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -60,6 +60,7 @@ untested special converters
 #include "swscale.h"
 #include "swscale_internal.h"
 #include "rgb2rgb.h"
+#include "libavutil/avassert.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/cpu.h"
 #include "libavutil/avutil.h"
@@ -67,7 +68,6 @@ untested special converters
 #include "libavutil/bswap.h"
 #include "libavutil/pixdesc.h"
 
-#define DITHER1XBPP
 
 #define RGB2YUV_SHIFT 15
 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
@@ -195,6 +195,101 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128)[8][8] = {
 DECLARE_ALIGNED(8, const uint8_t, ff_sws_pb_64)[8] =
 {  64, 64, 64, 64, 64, 64, 64, 64 };
 
+DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={
+{
+  {   0,  1,  0,  1,  0,  1,  0,  1,},
+  {   1,  0,  1,  0,  1,  0,  1,  0,},
+  {   0,  1,  0,  1,  0,  1,  0,  1,},
+  {   1,  0,  1,  0,  1,  0,  1,  0,},
+  {   0,  1,  0,  1,  0,  1,  0,  1,},
+  {   1,  0,  1,  0,  1,  0,  1,  0,},
+  {   0,  1,  0,  1,  0,  1,  0,  1,},
+  {   1,  0,  1,  0,  1,  0,  1,  0,},
+},{
+  {   1,  2,  1,  2,  1,  2,  1,  2,},
+  {   3,  0,  3,  0,  3,  0,  3,  0,},
+  {   1,  2,  1,  2,  1,  2,  1,  2,},
+  {   3,  0,  3,  0,  3,  0,  3,  0,},
+  {   1,  2,  1,  2,  1,  2,  1,  2,},
+  {   3,  0,  3,  0,  3,  0,  3,  0,},
+  {   1,  2,  1,  2,  1,  2,  1,  2,},
+  {   3,  0,  3,  0,  3,  0,  3,  0,},
+},{
+  {   2,  4,  3,  5,  2,  4,  3,  5,},
+  {   6,  0,  7,  1,  6,  0,  7,  1,},
+  {   3,  5,  2,  4,  3,  5,  2,  4,},
+  {   7,  1,  6,  0,  7,  1,  6,  0,},
+  {   2,  4,  3,  5,  2,  4,  3,  5,},
+  {   6,  0,  7,  1,  6,  0,  7,  1,},
+  {   3,  5,  2,  4,  3,  5,  2,  4,},
+  {   7,  1,  6,  0,  7,  1,  6,  0,},
+},{
+  {   4,  8,  7, 11,  4,  8,  7, 11,},
+  {  12,  0, 15,  3, 12,  0, 15,  3,},
+  {   6, 10,  5,  9,  6, 10,  5,  9,},
+  {  14,  2, 13,  1, 14,  2, 13,  1,},
+  {   4,  8,  7, 11,  4,  8,  7, 11,},
+  {  12,  0, 15,  3, 12,  0, 15,  3,},
+  {   6, 10,  5,  9,  6, 10,  5,  9,},
+  {  14,  2, 13,  1, 14,  2, 13,  1,},
+},{
+  {   9, 17, 15, 23,  8, 16, 14, 22,},
+  {  25,  1, 31,  7, 24,  0, 30,  6,},
+  {  13, 21, 11, 19, 12, 20, 10, 18,},
+  {  29,  5, 27,  3, 28,  4, 26,  2,},
+  {   8, 16, 14, 22,  9, 17, 15, 23,},
+  {  24,  0, 30,  6, 25,  1, 31,  7,},
+  {  12, 20, 10, 18, 13, 21, 11, 19,},
+  {  28,  4, 26,  2, 29,  5, 27,  3,},
+},{
+  {  18, 34, 30, 46, 17, 33, 29, 45,},
+  {  50,  2, 62, 14, 49,  1, 61, 13,},
+  {  26, 42, 22, 38, 25, 41, 21, 37,},
+  {  58, 10, 54,  6, 57,  9, 53,  5,},
+  {  16, 32, 28, 44, 19, 35, 31, 47,},
+  {  48,  0, 60, 12, 51,  3, 63, 15,},
+  {  24, 40, 20, 36, 27, 43, 23, 39,},
+  {  56,  8, 52,  4, 59, 11, 55,  7,},
+},{
+  {  18, 34, 30, 46, 17, 33, 29, 45,},
+  {  50,  2, 62, 14, 49,  1, 61, 13,},
+  {  26, 42, 22, 38, 25, 41, 21, 37,},
+  {  58, 10, 54,  6, 57,  9, 53,  5,},
+  {  16, 32, 28, 44, 19, 35, 31, 47,},
+  {  48,  0, 60, 12, 51,  3, 63, 15,},
+  {  24, 40, 20, 36, 27, 43, 23, 39,},
+  {  56,  8, 52,  4, 59, 11, 55,  7,},
+},{
+  {  36, 68, 60, 92, 34, 66, 58, 90,},
+  { 100,  4,124, 28, 98,  2,122, 26,},
+  {  52, 84, 44, 76, 50, 82, 42, 74,},
+  { 116, 20,108, 12,114, 18,106, 10,},
+  {  32, 64, 56, 88, 38, 70, 62, 94,},
+  {  96,  0,120, 24,102,  6,126, 30,},
+  {  48, 80, 40, 72, 54, 86, 46, 78,},
+  { 112, 16,104,  8,118, 22,110, 14,},
+}};
+
+static const uint8_t flat64[8]={64,64,64,64,64,64,64,64};
+
+const uint16_t dither_scale[15][16]={
+{    2,    3,    3,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,},
+{    2,    3,    7,    7,   13,   13,   25,   25,   25,   25,   25,   25,   25,   25,   25,   25,},
+{    3,    3,    4,   15,   15,   29,   57,   57,   57,  113,  113,  113,  113,  113,  113,  113,},
+{    3,    4,    4,    5,   31,   31,   61,  121,  241,  241,  241,  241,  481,  481,  481,  481,},
+{    3,    4,    5,    5,    6,   63,   63,  125,  249,  497,  993,  993,  993,  993,  993, 1985,},
+{    3,    5,    6,    6,    6,    7,  127,  127,  253,  505, 1009, 2017, 4033, 4033, 4033, 4033,},
+{    3,    5,    6,    7,    7,    7,    8,  255,  255,  509, 1017, 2033, 4065, 8129,16257,16257,},
+{    3,    5,    6,    8,    8,    8,    8,    9,  511,  511, 1021, 2041, 4081, 8161,16321,32641,},
+{    3,    5,    7,    8,    9,    9,    9,    9,   10, 1023, 1023, 2045, 4089, 8177,16353,32705,},
+{    3,    5,    7,    8,   10,   10,   10,   10,   10,   11, 2047, 2047, 4093, 8185,16369,32737,},
+{    3,    5,    7,    8,   10,   11,   11,   11,   11,   11,   12, 4095, 4095, 8189,16377,32753,},
+{    3,    5,    7,    9,   10,   12,   12,   12,   12,   12,   12,   13, 8191, 8191,16381,32761,},
+{    3,    5,    7,    9,   10,   12,   13,   13,   13,   13,   13,   13,   14,16383,16383,32765,},
+{    3,    5,    7,    9,   10,   12,   14,   14,   14,   14,   14,   14,   14,   15,32767,32767,},
+{    3,    5,    7,    9,   11,   12,   14,   15,   15,   15,   15,   15,   15,   15,   16,65535,},
+};
+
 static av_always_inline void
 yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc,
                       int lumFilterSize, const int16_t *chrFilter,
@@ -205,9 +300,10 @@ yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc,
 {
     //FIXME Optimize (just quickly written not optimized..)
     int i;
+    int dword= output_bits == 16;
     uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2],
              *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL;
-    int shift = 15 + 16 - output_bits - 1;
+    int shift = 11 + 4*dword + 16 - output_bits - 1;
 
 #define output_pixel(pos, val) \
     if (big_endian) { \
@@ -216,24 +312,24 @@ yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc,
         AV_WL16(pos, av_clip_uint16(val >> shift)); \
     }
     for (i = 0; i < dstW; i++) {
-        int val = 1 << (30-output_bits - 1);
+        int val = 1 << (26-output_bits + 4*dword - 1);
         int j;
 
         for (j = 0; j < lumFilterSize; j++)
-            val += (lumSrc[j][i] * lumFilter[j]) >> 1;
+            val += ((dword ? lumSrc[j][i] : ((int16_t**)lumSrc)[j][i]) * lumFilter[j])>>1;
 
         output_pixel(&yDest[i], val);
     }
 
     if (uDest) {
         for (i = 0; i < chrDstW; i++) {
-            int u = 1 << (30-output_bits - 1);
-            int v = 1 << (30-output_bits - 1);
+            int u = 1 << (26-output_bits + 4*dword - 1);
+            int v = 1 << (26-output_bits + 4*dword - 1);
             int j;
 
             for (j = 0; j < chrFilterSize; j++) {
-                u += (chrUSrc[j][i] * chrFilter[j]) >> 1;
-                v += (chrVSrc[j][i] * chrFilter[j]) >> 1;
+                u += ((dword ? chrUSrc[j][i] : ((int16_t**)chrUSrc)[j][i]) * chrFilter[j]) >> 1;
+                v += ((dword ? chrVSrc[j][i] : ((int16_t**)chrVSrc)[j][i]) * chrFilter[j]) >> 1;
             }
 
             output_pixel(&uDest[i], u);
@@ -243,11 +339,11 @@ yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc,
 
     if (CONFIG_SWSCALE_ALPHA && aDest) {
         for (i = 0; i < dstW; i++) {
-            int val = 1 << (30-output_bits - 1);
+            int val = 1 << (26-output_bits + 4*dword - 1);
             int j;
 
             for (j = 0; j < lumFilterSize; j++)
-                val += (alpSrc[j][i] * lumFilter[j]) >> 1;
+                val += ((dword ? alpSrc[j][i] : ((int16_t**)alpSrc)[j][i]) * lumFilter[j]) >> 1;
 
             output_pixel(&aDest[i], val);
         }
@@ -535,8 +631,8 @@ yuv2gray16_1_c_template(SwsContext *c, const int32_t *buf0,
     int i;
 
     for (i = 0; i < (dstW >> 1); i++) {
-        int Y1 = buf0[i * 2    ] << 1;
-        int Y2 = buf0[i * 2 + 1] << 1;
+        int Y1 = (buf0[i * 2    ]+4)>>3;
+        int Y2 = (buf0[i * 2 + 1]+4)>>3;
 
         output_pixel(&dest[i * 2 + 0], Y1);
         output_pixel(&dest[i * 2 + 1], Y2);
@@ -984,8 +1080,8 @@ yuv2rgb48_1_c_template(SwsContext *c, const int32_t *buf0,
         for (i = 0; i < (dstW >> 1); i++) {
             int Y1 = (buf0[i * 2]    ) >> 2;
             int Y2 = (buf0[i * 2 + 1]) >> 2;
-            int U  = (ubuf0[i] + ubuf1[i] + (-128 << 11)) >> 3;
-            int V  = (vbuf0[i] + vbuf1[i] + (-128 << 11)) >> 3;
+            int U  = (ubuf0[i] + ubuf1[i] + (-128 << 12)) >> 3;
+            int V  = (vbuf0[i] + vbuf1[i] + (-128 << 12)) >> 3;
             int R, G, B;
 
             Y1 -= c->yuv2rgb_y_offset;
@@ -1056,6 +1152,7 @@ yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2,
 
 #define r_b ((target == PIX_FMT_RGB24) ? r : b)
 #define b_r ((target == PIX_FMT_RGB24) ? b : r)
+
         dest[i * 6 + 0] = r_b[Y1];
         dest[i * 6 + 1] =   g[Y1];
         dest[i * 6 + 2] = b_r[Y1];
@@ -1346,9 +1443,9 @@ yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
 
     for (i = 0; i < dstW; i++) {
         int j;
-        int Y = 0;
-        int U = -128 << 19;
-        int V = -128 << 19;
+        int Y = 1<<9;
+        int U = (1<<9)-(128 << 19);
+        int V = (1<<9)-(128 << 19);
         int av_unused A;
         int R, G, B;
 
@@ -1363,7 +1460,7 @@ yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
         U >>= 10;
         V >>= 10;
         if (hasAlpha) {
-            A = 1 << 21;
+            A = 1 << 18;
             for (j = 0; j < lumFilterSize; j++) {
                 A += alpSrc[j][i] * lumFilter[j];
             }
@@ -1406,7 +1503,6 @@ yuv2rgb_full_X_c_template(SwsContext *c, const int16_t *lumFilter,
             dest[1] = B >> 22;
             dest[2] = G >> 22;
             dest[3] = R >> 22;
-            dest += 4;
             break;
         case PIX_FMT_BGR24:
             dest[0] = B >> 22;
@@ -1514,7 +1610,7 @@ rgb48ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
 #undef input_pixel
 
 #define rgb48funcs(pattern, BE_LE, origin) \
-static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, \
+static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, const uint8_t *unused1,\
                                     int width, uint32_t *unused) \
 { \
     const uint16_t *src = (const uint16_t *) _src; \
@@ -1523,7 +1619,7 @@ static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src,
 } \
  \
 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
-                                    const uint8_t *_src1, const uint8_t *_src2, \
+                                    const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
                                     int width, uint32_t *unused) \
 { \
     const uint16_t *src1 = (const uint16_t *) _src1, \
@@ -1533,7 +1629,7 @@ static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
 } \
  \
 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
-                                    const uint8_t *_src1, const uint8_t *_src2, \
+                                    const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
                                     int width, uint32_t *unused) \
 { \
     const uint16_t *src1 = (const uint16_t *) _src1, \
@@ -1552,14 +1648,14 @@ rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
                         (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
 
 static av_always_inline void
-rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
+rgb16_32ToY_c_template(int16_t *dst, const uint8_t *src,
                        int width, enum PixelFormat origin,
                        int shr,   int shg,   int shb, int shp,
                        int maskr, int maskg, int maskb,
                        int rsh,   int gsh,   int bsh, int S)
 {
     const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh,
-              rnd = 33 << (S - 1);
+              rnd = (32<<((S)-1)) + (1<<(S-7));
     int i;
 
     for (i = 0; i < width; i++) {
@@ -1568,12 +1664,12 @@ rgb16_32ToY_c_template(uint8_t *dst, const uint8_t *src,
         int g = (px & maskg) >> shg;
         int r = (px & maskr) >> shr;
 
-        dst[i] = (ry * r + gy * g + by * b + rnd) >> S;
+        dst[i] = (ry * r + gy * g + by * b + rnd) >> ((S)-6);
     }
 }
 
 static av_always_inline void
-rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
+rgb16_32ToUV_c_template(int16_t *dstU, int16_t *dstV,
                         const uint8_t *src, int width,
                         enum PixelFormat origin,
                         int shr,   int shg,   int shb, int shp,
@@ -1582,7 +1678,7 @@ rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
 {
     const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
               rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
-              rnd = 257 << (S - 1);
+              rnd = (256<<((S)-1)) + (1<<(S-7));
     int i;
 
     for (i = 0; i < width; i++) {
@@ -1591,13 +1687,13 @@ rgb16_32ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
         int g = (px & maskg) >> shg;
         int r = (px & maskr) >> shr;
 
-        dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
-        dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
+        dstU[i] = (ru * r + gu * g + bu * b + rnd) >> ((S)-6);
+        dstV[i] = (rv * r + gv * g + bv * b + rnd) >> ((S)-6);
     }
 }
 
 static av_always_inline void
-rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
+rgb16_32ToUV_half_c_template(int16_t *dstU, int16_t *dstV,
                              const uint8_t *src, int width,
                              enum PixelFormat origin,
                              int shr,   int shg,   int shb, int shp,
@@ -1606,7 +1702,7 @@ rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
 {
     const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
               rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
-              rnd = 257 << S, maskgx = ~(maskr | maskb);
+              rnd = (256U<<(S)) + (1<<(S-6)), maskgx = ~(maskr | maskb);
     int i;
 
     maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
@@ -1625,8 +1721,8 @@ rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
         }
         r = (rb & maskr) >> shr;
 
-        dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
-        dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
+        dstU[i] = (ru * r + gu * g + bu * b + (unsigned)rnd) >> ((S)-6+1);
+        dstV[i] = (rv * r + gv * g + bv * b + (unsigned)rnd) >> ((S)-6+1);
     }
 }
 
@@ -1634,26 +1730,29 @@ rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
 
 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
                          maskg, maskb, rsh, gsh, bsh, S) \
-static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
+static void name ## ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, \
                           int width, uint32_t *unused) \
 { \
-    rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
+    rgb16_32ToY_c_template((int16_t*)dst, src, width, fmt, \
+                           shr, shg, shb, shp, \
                            maskr, maskg, maskb, rsh, gsh, bsh, S); \
 } \
  \
 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
-                           const uint8_t *src, const uint8_t *dummy, \
+                           const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \
                            int width, uint32_t *unused) \
 { \
-    rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
+    rgb16_32ToUV_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt,  \
+                            shr, shg, shb, shp, \
                             maskr, maskg, maskb, rsh, gsh, bsh, S); \
 } \
  \
 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
-                                const uint8_t *src, const uint8_t *dummy, \
+                                const uint8_t *unused0, const uint8_t *src, const uint8_t *dummy, \
                                 int width, uint32_t *unused) \
 { \
-    rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
+    rgb16_32ToUV_half_c_template((int16_t*)dstU, (int16_t*)dstV, src, width, fmt, \
+                                 shr, shg, shb, shp, \
                                  maskr, maskg, maskb, rsh, gsh, bsh, S); \
 }
 
@@ -1670,71 +1769,132 @@ rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0,  0, 0,   0x001F, 0x03E0,   0x7
 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0,  0, 0,   0xF800, 0x07E0,   0x001F,  0, 5, 11, RGB2YUV_SHIFT+8);
 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0,  0, 0,   0x7C00, 0x03E0,   0x001F,  0, 5, 10, RGB2YUV_SHIFT+7);
 
-static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
+static void gbr24pToY_c(uint16_t *dst, const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc,
+                        int width, uint32_t *unused)
+{
+    int i;
+    for (i = 0; i < width; i++) {
+        unsigned int g   = gsrc[i];
+        unsigned int b   = bsrc[i];
+        unsigned int r   = rsrc[i];
+
+        dst[i] = (RY*r + GY*g + BY*b + (0x801<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
+    }
+}
+
+static void gbr24pToUV_c(uint16_t *dstU, uint16_t *dstV,
+                         const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc,
+                         int width, enum PixelFormat origin)
+{
+    int i;
+    for (i = 0; i < width; i++) {
+        unsigned int g   = gsrc[i];
+        unsigned int b   = bsrc[i];
+        unsigned int r   = rsrc[i];
+
+        dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
+        dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
+    }
+}
+
+static void gbr24pToUV_half_c(uint16_t *dstU, uint16_t *dstV,
+                         const uint8_t *gsrc, const uint8_t *bsrc, const uint8_t *rsrc,
+                         int width, enum PixelFormat origin)
+{
+    int i;
+    for (i = 0; i < width; i++) {
+        unsigned int g   = gsrc[2*i] + gsrc[2*i+1];
+        unsigned int b   = bsrc[2*i] + bsrc[2*i+1];
+        unsigned int r   = rsrc[2*i] + rsrc[2*i+1];
+
+        dstU[i] = (RU*r + GU*g + BU*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1);
+        dstV[i] = (RV*r + GV*g + BV*b + (0x4001<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-6+1);
+    }
+}
+
+static void abgrToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
 {
     int i;
     for (i=0; i<width; i++) {
-        dst[i]= src[4*i];
+        dst[i]= src[4*i]<<6;
     }
 }
 
-static void rgbaToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)
+static void rgbaToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *unused)
 {
     int i;
     for (i=0; i<width; i++) {
-        dst[i]= src[4*i+3];
+        dst[i]= src[4*i+3]<<6;
     }
 }
 
-static void palToY_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *pal)
+static void palToA_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width, uint32_t *pal)
 {
     int i;
     for (i=0; i<width; i++) {
         int d= src[i];
 
-        dst[i]= pal[d] & 0xFF;
+        dst[i]= (pal[d] >> 24)<<6;
     }
 }
 
-static void palToUV_c(uint8_t *dstU, uint8_t *dstV,
-                      const uint8_t *src1, const uint8_t *src2,
-                      int width, uint32_t *pal)
+static void palToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, long width, uint32_t *pal)
+{
+    int i;
+    for (i=0; i<width; i++) {
+        int d= src[i];
+
+        dst[i]= (pal[d] & 0xFF)<<6;
+    }
+}
+
+static void palToUV_c(uint16_t *dstU, int16_t *dstV,
+                           const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
+                           int width, uint32_t *pal)
 {
     int i;
     assert(src1 == src2);
     for (i=0; i<width; i++) {
         int p= pal[src1[i]];
 
-        dstU[i]= p>>8;
-        dstV[i]= p>>16;
+        dstU[i]= (uint8_t)(p>> 8)<<6;
+        dstV[i]= (uint8_t)(p>>16)<<6;
     }
 }
 
-static void monowhite2Y_c(uint8_t *dst, const uint8_t *src,
-                          int width, uint32_t *unused)
+static void monowhite2Y_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,  int width, uint32_t *unused)
 {
     int i, j;
     for (i=0; i<width/8; i++) {
         int d= ~src[i];
         for(j=0; j<8; j++)
-            dst[8*i+j]= ((d>>(7-j))&1)*255;
+            dst[8*i+j]= ((d>>(7-j))&1)*16383;
+    }
+    if(width&7){
+        int d= ~src[i];
+        for(j=0; j<(width&7); j++)
+            dst[8*i+j]= ((d>>(7-j))&1)*16383;
     }
 }
 
-static void monoblack2Y_c(uint8_t *dst, const uint8_t *src,
-                          int width, uint32_t *unused)
+static void monoblack2Y_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,  int width, uint32_t *unused)
 {
     int i, j;
     for (i=0; i<width/8; i++) {
         int d= src[i];
         for(j=0; j<8; j++)
-            dst[8*i+j]= ((d>>(7-j))&1)*255;
+            dst[8*i+j]= ((d>>(7-j))&1)*16383;
+    }
+    if(width&7){
+        int d= src[i];
+        for(j=0; j<(width&7); j++)
+            dst[8*i+j]= ((d>>(7-j))&1)*16383;
     }
 }
 
 //FIXME yuy2* can read up to 7 samples too much
 
-static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
+static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,  int width,
                       uint32_t *unused)
 {
     int i;
@@ -1742,7 +1902,7 @@ static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
         dst[i]= src[2*i];
 }
 
-static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
                        const uint8_t *src2, int width, uint32_t *unused)
 {
     int i;
@@ -1753,7 +1913,7 @@ static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
     assert(src1 == src2);
 }
 
-static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *unused)
+static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, const uint8_t *unused2,  int width, uint32_t *unused)
 {
     int i;
     const uint16_t *src = (const uint16_t *) _src;
@@ -1763,7 +1923,7 @@ static void bswap16Y_c(uint8_t *_dst, const uint8_t *_src, int width, uint32_t *
     }
 }
 
-static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
+static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, const uint8_t *_src1,
                         const uint8_t *_src2, int width, uint32_t *unused)
 {
     int i;
@@ -1778,7 +1938,7 @@ static void bswap16UV_c(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *_src1,
 
 /* This is almost identical to the previous, end exists only because
  * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
-static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
+static void uyvyToY_c(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,  int width,
                       uint32_t *unused)
 {
     int i;
@@ -1786,7 +1946,7 @@ static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
         dst[i]= src[2*i+1];
 }
 
-static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused0, const uint8_t *src1,
                        const uint8_t *src2, int width, uint32_t *unused)
 {
     int i;
@@ -1808,14 +1968,14 @@ static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
 }
 
 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
-                       const uint8_t *src1, const uint8_t *src2,
+                       const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
                        int width, uint32_t *unused)
 {
     nvXXtoUV_c(dstU, dstV, src1, width);
 }
 
 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
-                       const uint8_t *src1, const uint8_t *src2,
+                       const uint8_t *unused0, const uint8_t *src1, const uint8_t *src2,
                        int width, uint32_t *unused)
 {
     nvXXtoUV_c(dstV, dstU, src1, width);
@@ -1823,7 +1983,7 @@ static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
 
 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
 
-static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
+static void bgr24ToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,
                        int width, uint32_t *unused)
 {
     int i;
@@ -1832,11 +1992,11 @@ static void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
         int g= src[i*3+1];
         int r= src[i*3+2];
 
-        dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
+        dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
     }
 }
 
-static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+static void bgr24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
                         const uint8_t *src2, int width, uint32_t *unused)
 {
     int i;
@@ -1845,13 +2005,13 @@ static void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
         int g= src1[3*i + 1];
         int r= src1[3*i + 2];
 
-        dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
-        dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
+        dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
+        dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
     }
     assert(src1 == src2);
 }
 
-static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+static void bgr24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
                              const uint8_t *src2, int width, uint32_t *unused)
 {
     int i;
@@ -1860,13 +2020,13 @@ static void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
         int g= src1[6*i + 1] + src1[6*i + 4];
         int r= src1[6*i + 2] + src1[6*i + 5];
 
-        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
-        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
+        dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
+        dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
     }
     assert(src1 == src2);
 }
 
-static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
+static void rgb24ToY_c(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2, int width,
                        uint32_t *unused)
 {
     int i;
@@ -1875,11 +2035,11 @@ static void rgb24ToY_c(uint8_t *dst, const uint8_t *src, int width,
         int g= src[i*3+1];
         int b= src[i*3+2];
 
-        dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
+        dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
     }
 }
 
-static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
+static void rgb24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
                         const uint8_t *src2, int width, uint32_t *unused)
 {
     int i;
@@ -1889,13 +2049,13 @@ static void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
         int g= src1[3*i + 1];
         int b= src1[3*i + 2];
 
-        dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
-        dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
+        dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
+        dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
     }
 }
 
-static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
-                             const uint8_t *src2, int width, uint32_t *unused)
+static void rgb24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *unused0, const uint8_t *src1,
+                                    const uint8_t *src2, int width, uint32_t *unused)
 {
     int i;
     assert(src1==src2);
@@ -1904,8 +2064,8 @@ static void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
         int g= src1[6*i + 1] + src1[6*i + 4];
         int b= src1[6*i + 2] + src1[6*i + 5];
 
-        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
-        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
+        dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
+        dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
     }
 }
 
@@ -1919,6 +2079,9 @@ static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t
     int bits = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
     int sh = bits - 4;
 
+    if((isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
+        sh= 9;
+
     for (i = 0; i < dstW; i++) {
         int j;
         int srcPos = filterPos[i];
@@ -1940,6 +2103,9 @@ static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t
     const uint16_t *src = (const uint16_t *) _src;
     int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
 
+    if(sh<15)
+        sh= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
+
     for (i = 0; i < dstW; i++) {
         int j;
         int srcPos = filterPos[i];
@@ -2054,7 +2220,7 @@ static void lumRangeFromJpeg16_c(int16_t *_dst, int width)
     int i;
     int32_t *dst = (int32_t *) _dst;
     for (i = 0; i < width; i++)
-        dst[i] = (dst[i]*14071 + (33561947<<4))>>14;
+        dst[i] = (dst[i]*(14071/4) + (33561947<<4)/4)>>12;
 }
 
 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
@@ -2068,21 +2234,24 @@ static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
         dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
         xpos+=xInc;
     }
+    for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
+        dst[i] = src[srcW-1]*128;
 }
 
 // *** horizontal scale Y line to temp buffer
 static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth,
-                                     const uint8_t *src, int srcW, int xInc,
+                                     const uint8_t *src, const uint8_t *src2, const uint8_t *src3,
+                                     int srcW, int xInc,
                                      const int16_t *hLumFilter,
                                      const int16_t *hLumFilterPos, int hLumFilterSize,
                                      uint8_t *formatConvBuffer,
                                      uint32_t *pal, int isAlpha)
 {
-    void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
+    void (*toYV12)(uint8_t *, const uint8_t *, const uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
     void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
 
     if (toYV12) {
-        toYV12(formatConvBuffer, src, srcW, pal);
+        toYV12(formatConvBuffer, src, src2, src3, srcW, pal);
         src= formatConvBuffer;
     }
 
@@ -2109,17 +2278,21 @@ static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
         dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
         xpos+=xInc;
     }
+    for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
+        dst1[i] = src1[srcW-1]*128;
+        dst2[i] = src2[srcW-1]*128;
+    }
 }
 
 static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth,
-                                     const uint8_t *src1, const uint8_t *src2,
+                                     const uint8_t *src0, const uint8_t *src1, const uint8_t *src2,
                                      int srcW, int xInc, const int16_t *hChrFilter,
                                      const int16_t *hChrFilterPos, int hChrFilterSize,
                                      uint8_t *formatConvBuffer, uint32_t *pal)
 {
     if (c->chrToYV12) {
-        uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->srcBpc, 8) >> 3, 16);
-        c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
+        uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
+        c->chrToYV12(formatConvBuffer, buf2, src0, src1, src2, srcW, pal);
         src1= formatConvBuffer;
         src2= buf2;
     }
@@ -2222,7 +2395,10 @@ find_c_packed_planar_out_funcs(SwsContext *c,
             *yuv2packedX = yuv2bgr24_full_X_c;
             break;
         }
+        if(!*yuv2packedX)
+            goto YUV_PACKED;
     } else {
+        YUV_PACKED:
         switch (dstFormat) {
         case PIX_FMT_GRAY16BE:
             *yuv2packed1 = yuv2gray16BE_1_c;
@@ -2415,12 +2591,13 @@ static int swScale(SwsContext *c, const uint8_t* src[],
     const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
     int lastDstY;
     uint32_t *pal=c->pal_yuv;
+
+    int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat);
     yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
     yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
     yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
     yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
     yuv2packedX_fn yuv2packedX = c->yuv2packedX;
-    int should_dither = is9_OR_10BPS(c->srcFormat) || is16BPS(c->srcFormat);
 
     /* vars which will change and which we need to store back in the context */
     int dstY= c->dstY;
@@ -2517,17 +2694,19 @@ static int swScale(SwsContext *c, const uint8_t* src[],
         //Do horizontal scaling
         while(lastInLumBuf < lastLumSrcY) {
             const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
-            const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
+            const uint8_t *src2= src[1]+(lastInLumBuf + 1 - srcSliceY)*srcStride[1];
+            const uint8_t *src3= src[2]+(lastInLumBuf + 1 - srcSliceY)*srcStride[2];
+            const uint8_t *src4= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
             lumBufIndex++;
             assert(lumBufIndex < 2*vLumBufSize);
             assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
             assert(lastInLumBuf + 1 - srcSliceY >= 0);
-            hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
+            hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, src2, src3, srcW, lumXInc,
                     hLumFilter, hLumFilterPos, hLumFilterSize,
                     formatConvBuffer,
                     pal, 0);
             if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
-                hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
+                hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src4, NULL, NULL, srcW,
                         lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
                         formatConvBuffer,
                         pal, 1);
@@ -2536,6 +2715,7 @@ static int swScale(SwsContext *c, const uint8_t* src[],
                                lumBufIndex,    lastInLumBuf);
         }
         while(lastInChrBuf < lastChrSrcY) {
+            const uint8_t *src0= src[0]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[0];
             const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
             const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
             chrBufIndex++;
@@ -2546,7 +2726,7 @@ static int swScale(SwsContext *c, const uint8_t* src[],
 
             if (c->needs_hcscale)
                 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
-                          chrDstW, src1, src2, chrSrcW, chrXInc,
+                          chrDstW, src0, src1, src2, chrSrcW, chrXInc,
                           hChrFilter, hChrFilterPos, hChrFilterSize,
                           formatConvBuffer, pal);
             lastInChrBuf++;
@@ -2578,6 +2758,7 @@ static int swScale(SwsContext *c, const uint8_t* src[],
             const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
             const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
             const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
+
             if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
                 if ((dstY&chrSkipMask) || isGray(dstFormat))
@@ -2664,8 +2845,8 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
         case PIX_FMT_YUV444P9LE:
         case PIX_FMT_YUV420P9LE:
         case PIX_FMT_YUV422P10LE:
-        case PIX_FMT_YUV444P10LE:
         case PIX_FMT_YUV420P10LE:
+        case PIX_FMT_YUV444P10LE:
         case PIX_FMT_YUV420P16LE:
         case PIX_FMT_YUV422P16LE:
         case PIX_FMT_YUV444P16LE: c->chrToYV12 = bswap16UV_c; break;
@@ -2700,6 +2881,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
         case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
         case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
         case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
+        case PIX_FMT_GBR24P  : c->chrToYV12 = gbr24pToUV_half_c;  break;
         }
     } else {
         switch(srcFormat) {
@@ -2721,6 +2903,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
         case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
         case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
         case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
+        case PIX_FMT_GBR24P  : c->chrToYV12 = gbr24pToUV_c;  break;
         }
     }
 
@@ -2730,9 +2913,9 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
 #if HAVE_BIGENDIAN
     case PIX_FMT_YUV444P9LE:
     case PIX_FMT_YUV420P9LE:
-    case PIX_FMT_YUV444P10LE:
     case PIX_FMT_YUV422P10LE:
     case PIX_FMT_YUV420P10LE:
+    case PIX_FMT_YUV444P10LE:
     case PIX_FMT_YUV420P16LE:
     case PIX_FMT_YUV422P16LE:
     case PIX_FMT_YUV444P16LE:
@@ -2776,6 +2959,7 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
     case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
     case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
     case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
+    case PIX_FMT_GBR24P : c->lumToYV12 = gbr24pToY_c ; break;
     }
     if (c->alpPixBuf) {
         switch (srcFormat) {
@@ -2784,9 +2968,11 @@ static av_cold void sws_init_swScale_c(SwsContext *c)
         case PIX_FMT_ABGR:
         case PIX_FMT_ARGB:  c->alpToYV12 = abgrToA_c; break;
         case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
+        case PIX_FMT_PAL8 : c->alpToYV12 = palToA_c; break;
         }
     }
 
+
     if (c->srcBpc == 8) {
         if (c->dstBpc <= 10) {
             c->hyScale = c->hcScale = hScale8To15_c;
diff --git a/libswscale/swscale.h b/libswscale/swscale.h
index 91378a4..bdde675 100644
--- a/libswscale/swscale.h
+++ b/libswscale/swscale.h
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -55,6 +55,9 @@
 #ifndef FF_API_SWS_CPU_CAPS
 #define FF_API_SWS_CPU_CAPS    (LIBSWSCALE_VERSION_MAJOR < 3)
 #endif
+#ifndef FF_API_SWS_FORMAT_NAME
+#define FF_API_SWS_FORMAT_NAME  (LIBSWSCALE_VERSION_MAJOR < 3)
+#endif
 
 /**
  * Returns the LIBSWSCALE_VERSION_INT constant.
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index a13b89d..0e62b38 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -37,6 +37,8 @@
 
 #define MAX_FILTER_SIZE 256
 
+#define DITHER1XBPP
+
 #if HAVE_BIGENDIAN
 #define ALT32_CORR (-1)
 #else
@@ -344,8 +346,8 @@ typedef struct SwsContext {
 #define V_TEMP                "11*8+4*4*256*2+32"
 #define Y_TEMP                "11*8+4*4*256*2+40"
 #define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48"
-#define UV_OFF_PX             "11*8+4*4*256*3+48"
-#define UV_OFF_BYTE           "11*8+4*4*256*3+56"
+#define UV_OFF                "11*8+4*4*256*3+48"
+#define UV_OFFx2              "11*8+4*4*256*3+56"
 #define DITHER16              "11*8+4*4*256*3+64"
 #define DITHER32              "11*8+4*4*256*3+80"
 
@@ -373,8 +375,8 @@ typedef struct SwsContext {
     // alignment of these values is not necessary, but merely here
     // to maintain the same offset across x8632 and x86-64. Once we
     // use proper offset macros in the asm, they can be removed.
-    DECLARE_ALIGNED(8, ptrdiff_t, uv_off_px);   ///< offset (in pixels) between u and v planes
-    DECLARE_ALIGNED(8, ptrdiff_t, uv_off_byte); ///< offset (in bytes) between u and v planes
+    DECLARE_ALIGNED(8, ptrdiff_t, uv_off); ///< offset (in pixels) between u and v planes
+    DECLARE_ALIGNED(8, ptrdiff_t, uv_offx2); ///< offset (in bytes) between u and v planes
     DECLARE_ALIGNED(8, uint16_t, dither16)[8];
     DECLARE_ALIGNED(8, uint32_t, dither32)[8];
 
@@ -416,12 +418,12 @@ typedef struct SwsContext {
     yuv2packed2_fn yuv2packed2;
     yuv2packedX_fn yuv2packedX;
 
-    void (*lumToYV12)(uint8_t *dst, const uint8_t *src,
+    void (*lumToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3,
                       int width, uint32_t *pal); ///< Unscaled conversion of luma plane to YV12 for horizontal scaler.
-    void (*alpToYV12)(uint8_t *dst, const uint8_t *src,
+    void (*alpToYV12)(uint8_t *dst, const uint8_t *src, const uint8_t *src2, const uint8_t *src3,
                       int width, uint32_t *pal); ///< Unscaled conversion of alpha plane to YV12 for horizontal scaler.
     void (*chrToYV12)(uint8_t *dstU, uint8_t *dstV,
-                      const uint8_t *src1, const uint8_t *src2,
+                      const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
                       int width, uint32_t *pal); ///< Unscaled conversion of chroma planes to YV12 for horizontal scaler.
     /**
      * Scale one horizontal line of input data using a bilinear filter
@@ -515,7 +517,13 @@ SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c);
 SwsFunc ff_yuv2rgb_get_func_ptr_bfin(SwsContext *c);
 void ff_bfin_get_unscaled_swscale(SwsContext *c);
 
+#if FF_API_SWS_FORMAT_NAME
+/**
+ * @deprecated Use av_get_pix_fmt_name() instead.
+ */
+attribute_deprecated
 const char *sws_format_name(enum PixelFormat format);
+#endif
 
 //FIXME replace this with something faster
 #define is16BPS(x)      (           \
@@ -525,6 +533,10 @@ const char *sws_format_name(enum PixelFormat format);
         || (x)==PIX_FMT_BGR48LE     \
         || (x)==PIX_FMT_RGB48BE     \
         || (x)==PIX_FMT_RGB48LE     \
+        || (x)==PIX_FMT_BGRA64BE    \
+        || (x)==PIX_FMT_BGRA64LE    \
+        || (x)==PIX_FMT_RGBA64BE    \
+        || (x)==PIX_FMT_RGBA64LE    \
         || (x)==PIX_FMT_YUV420P16LE \
         || (x)==PIX_FMT_YUV422P16LE \
         || (x)==PIX_FMT_YUV444P16LE \
@@ -532,7 +544,7 @@ const char *sws_format_name(enum PixelFormat format);
         || (x)==PIX_FMT_YUV422P16BE \
         || (x)==PIX_FMT_YUV444P16BE \
     )
-#define is9_OR_10BPS(x) (           \
+#define isNBPS(x)       (           \
            (x)==PIX_FMT_YUV420P9LE  \
         || (x)==PIX_FMT_YUV420P9BE  \
         || (x)==PIX_FMT_YUV444P9BE  \
@@ -543,7 +555,10 @@ const char *sws_format_name(enum PixelFormat format);
         || (x)==PIX_FMT_YUV444P10LE \
         || (x)==PIX_FMT_YUV420P10LE \
         || (x)==PIX_FMT_YUV420P10BE \
+        || (x)==PIX_FMT_YUV422P10LE \
+        || (x)==PIX_FMT_YUV422P10BE \
     )
+#define is9_OR_10BPS isNBPS //for ronald
 #define isBE(x) ((x)&1)
 #define isPlanar8YUV(x) (           \
            (x)==PIX_FMT_YUV410P     \
@@ -564,6 +579,7 @@ const char *sws_format_name(enum PixelFormat format);
         || (x)==PIX_FMT_YUV422P10LE \
         || (x)==PIX_FMT_YUV444P10LE \
         || (x)==PIX_FMT_YUV420P16LE \
+        || (x)==PIX_FMT_YUV422P10LE \
         || (x)==PIX_FMT_YUV422P16LE \
         || (x)==PIX_FMT_YUV444P16LE \
         || (x)==PIX_FMT_YUV420P9BE  \
@@ -572,9 +588,16 @@ const char *sws_format_name(enum PixelFormat format);
         || (x)==PIX_FMT_YUV422P10BE \
         || (x)==PIX_FMT_YUV444P10BE \
         || (x)==PIX_FMT_YUV420P16BE \
+        || (x)==PIX_FMT_YUV422P10BE \
         || (x)==PIX_FMT_YUV422P16BE \
         || (x)==PIX_FMT_YUV444P16BE \
     )
+
+#define isPlanar(x)  (              \
+        isPlanarYUV(x)              \
+        ||  (x)==PIX_FMT_GBR24P     \
+    )
+
 #define isYUV(x)        (           \
            (x)==PIX_FMT_UYVY422     \
         || (x)==PIX_FMT_YUYV422     \
@@ -582,7 +605,7 @@ const char *sws_format_name(enum PixelFormat format);
     )
 #define isGray(x)       (           \
            (x)==PIX_FMT_GRAY8       \
-        || (x)==PIX_FMT_Y400A      \
+        || (x)==PIX_FMT_GRAY8A      \
         || (x)==PIX_FMT_GRAY16BE    \
         || (x)==PIX_FMT_GRAY16LE    \
     )
@@ -593,6 +616,8 @@ const char *sws_format_name(enum PixelFormat format);
 #define isRGBinInt(x)   (           \
            (x)==PIX_FMT_RGB48BE     \
         || (x)==PIX_FMT_RGB48LE     \
+        || (x)==PIX_FMT_RGBA64BE    \
+        || (x)==PIX_FMT_RGBA64LE    \
         || (x)==PIX_FMT_RGB32       \
         || (x)==PIX_FMT_RGB32_1     \
         || (x)==PIX_FMT_RGB24       \
@@ -611,6 +636,8 @@ const char *sws_format_name(enum PixelFormat format);
 #define isBGRinInt(x)   (           \
            (x)==PIX_FMT_BGR48BE     \
         || (x)==PIX_FMT_BGR48LE     \
+        || (x)==PIX_FMT_BGRA64BE    \
+        || (x)==PIX_FMT_BGRA64LE    \
         || (x)==PIX_FMT_BGR32       \
         || (x)==PIX_FMT_BGR32_1     \
         || (x)==PIX_FMT_BGR24       \
@@ -629,6 +656,8 @@ const char *sws_format_name(enum PixelFormat format);
 #define isRGBinBytes(x) (           \
            (x)==PIX_FMT_RGB48BE     \
         || (x)==PIX_FMT_RGB48LE     \
+        || (x)==PIX_FMT_RGBA64BE    \
+        || (x)==PIX_FMT_RGBA64LE    \
         || (x)==PIX_FMT_RGBA        \
         || (x)==PIX_FMT_ARGB        \
         || (x)==PIX_FMT_RGB24       \
@@ -636,6 +665,8 @@ const char *sws_format_name(enum PixelFormat format);
 #define isBGRinBytes(x) (           \
            (x)==PIX_FMT_BGR48BE     \
         || (x)==PIX_FMT_BGR48LE     \
+        || (x)==PIX_FMT_BGRA64BE    \
+        || (x)==PIX_FMT_BGRA64LE    \
         || (x)==PIX_FMT_BGRA        \
         || (x)==PIX_FMT_ABGR        \
         || (x)==PIX_FMT_BGR24       \
@@ -643,13 +674,19 @@ const char *sws_format_name(enum PixelFormat format);
 #define isAnyRGB(x)     (           \
             isRGBinInt(x)           \
         ||  isBGRinInt(x)           \
+        ||  (x)==PIX_FMT_GBR24P     \
     )
 #define isALPHA(x)      (           \
-           (x)==PIX_FMT_BGR32       \
+           (x)==PIX_FMT_BGRA64BE    \
+        || (x)==PIX_FMT_BGRA64LE    \
+        || (x)==PIX_FMT_RGBA64BE    \
+        || (x)==PIX_FMT_RGBA64LE    \
+        || (x)==PIX_FMT_BGR32       \
         || (x)==PIX_FMT_BGR32_1     \
         || (x)==PIX_FMT_RGB32       \
         || (x)==PIX_FMT_RGB32_1     \
-        || (x)==PIX_FMT_Y400A       \
+        || (x)==PIX_FMT_PAL8        \
+        || (x)==PIX_FMT_GRAY8A      \
         || (x)==PIX_FMT_YUVA420P    \
     )
 #define isPacked(x)         (       \
@@ -657,12 +694,16 @@ const char *sws_format_name(enum PixelFormat format);
         || (x)==PIX_FMT_YUYV422     \
         || (x)==PIX_FMT_UYVY422     \
         || (x)==PIX_FMT_Y400A       \
-        || isAnyRGB(x)              \
+        ||  isRGBinInt(x)           \
+        ||  isBGRinInt(x)           \
     )
-#define usePal(x) ((av_pix_fmt_descriptors[x].flags & PIX_FMT_PAL) || (x) == PIX_FMT_Y400A)
+#define usePal(x) ((av_pix_fmt_descriptors[x].flags & PIX_FMT_PAL) || (x) == PIX_FMT_GRAY8A)
 
 extern const uint64_t ff_dither4[2];
 extern const uint64_t ff_dither8[2];
+extern const uint8_t dithers[8][8][8];
+extern const uint16_t dither_scale[15][16];
+
 
 extern const AVClass sws_context_class;
 
diff --git a/libswscale/swscale_unscaled.c b/libswscale/swscale_unscaled.c
index c0161c2..e8a92c2 100644
--- a/libswscale/swscale_unscaled.c
+++ b/libswscale/swscale_unscaled.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -34,48 +34,6 @@
 #include "libavutil/bswap.h"
 #include "libavutil/pixdesc.h"
 
-DECLARE_ALIGNED(8, const uint8_t, dither_8x8_1)[8][8] = {
-    {   0,  1,  0,  1,  0,  1,  0,  1,},
-    {   1,  0,  1,  0,  1,  0,  1,  0,},
-    {   0,  1,  0,  1,  0,  1,  0,  1,},
-    {   1,  0,  1,  0,  1,  0,  1,  0,},
-    {   0,  1,  0,  1,  0,  1,  0,  1,},
-    {   1,  0,  1,  0,  1,  0,  1,  0,},
-    {   0,  1,  0,  1,  0,  1,  0,  1,},
-    {   1,  0,  1,  0,  1,  0,  1,  0,},
-};
-DECLARE_ALIGNED(8, const uint8_t, dither_8x8_3)[8][8] = {
-    {   1,  2,  1,  2,  1,  2,  1,  2,},
-    {   3,  0,  3,  0,  3,  0,  3,  0,},
-    {   1,  2,  1,  2,  1,  2,  1,  2,},
-    {   3,  0,  3,  0,  3,  0,  3,  0,},
-    {   1,  2,  1,  2,  1,  2,  1,  2,},
-    {   3,  0,  3,  0,  3,  0,  3,  0,},
-    {   1,  2,  1,  2,  1,  2,  1,  2,},
-    {   3,  0,  3,  0,  3,  0,  3,  0,},
-};
-DECLARE_ALIGNED(8, const uint8_t, dither_8x8_64)[8][8] = {
-    {  18, 34, 30, 46, 17, 33, 29, 45,},
-    {  50,  2, 62, 14, 49,  1, 61, 13,},
-    {  26, 42, 22, 38, 25, 41, 21, 37,},
-    {  58, 10, 54,  6, 57,  9, 53,  5,},
-    {  16, 32, 28, 44, 19, 35, 31, 47,},
-    {  48,  0, 60, 12, 51,  3, 63, 15,},
-    {  24, 40, 20, 36, 27, 43, 23, 39,},
-    {  56,  8, 52,  4, 59, 11, 55,  7,},
-};
-extern const uint8_t dither_8x8_128[8][8];
-DECLARE_ALIGNED(8, const uint8_t, dither_8x8_256)[8][8] = {
-    {  72, 136, 120, 184,  68, 132, 116, 180,},
-    { 200,   8, 248,  56, 196,   4, 244,  52,},
-    { 104, 168,  88, 152, 100, 164,  84, 148,},
-    { 232,  40, 216,  24, 228,  36, 212,  20,},
-    {  64, 128, 102, 176,  76, 140, 124, 188,},
-    { 192,   0, 240,  48, 204,  12, 252,  60,},
-    {  96, 160,  80, 144, 108, 172,  92, 156,},
-    { 224,  32, 208,  16, 236,  44, 220,  28,},
-};
-
 #define RGB2YUV_SHIFT 15
 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
@@ -263,7 +221,7 @@ static int palToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[],
     uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
     const uint8_t *srcPtr= src[0];
 
-    if (srcFormat == PIX_FMT_Y400A) {
+    if (srcFormat == PIX_FMT_GRAY8A) {
         switch (dstFormat) {
         case PIX_FMT_RGB32  : conv = gray8aToPacked32; break;
         case PIX_FMT_BGR32  : conv = gray8aToPacked32; break;
@@ -285,7 +243,7 @@ static int palToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[],
 
     if (!conv)
         av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
-               sws_format_name(srcFormat), sws_format_name(dstFormat));
+               av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat));
     else {
         for (i=0; i<srcSliceH; i++) {
             conv(srcPtr, dstPtr, c->srcW, (uint8_t *) c->pal_rgb);
@@ -372,7 +330,7 @@ static int rgbToRgbWrapper(SwsContext *c, const uint8_t* src[], int srcStride[],
 
     if (!conv) {
         av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
-               sws_format_name(srcFormat), sws_format_name(dstFormat));
+               av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat));
     } else {
         const uint8_t *srcPtr= src[0];
               uint8_t *dstPtr= dst[0];
@@ -454,27 +412,28 @@ static int packedCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[
     return srcSliceH;
 }
 
-#define clip9(x)  av_clip_uintp2(x,  9)
-#define clip10(x) av_clip_uintp2(x, 10)
-#define DITHER_COPY(dst, dstStride, wfunc, src, srcStride, rfunc, dithers, shift, clip) \
-    for (i = 0; i < height; i++) { \
-        const uint8_t *dither = dithers[i & 7]; \
-        for (j = 0; j < length - 7; j += 8) { \
-            wfunc(&dst[j + 0], clip((rfunc(&src[j + 0]) + dither[0]) >> shift)); \
-            wfunc(&dst[j + 1], clip((rfunc(&src[j + 1]) + dither[1]) >> shift)); \
-            wfunc(&dst[j + 2], clip((rfunc(&src[j + 2]) + dither[2]) >> shift)); \
-            wfunc(&dst[j + 3], clip((rfunc(&src[j + 3]) + dither[3]) >> shift)); \
-            wfunc(&dst[j + 4], clip((rfunc(&src[j + 4]) + dither[4]) >> shift)); \
-            wfunc(&dst[j + 5], clip((rfunc(&src[j + 5]) + dither[5]) >> shift)); \
-            wfunc(&dst[j + 6], clip((rfunc(&src[j + 6]) + dither[6]) >> shift)); \
-            wfunc(&dst[j + 7], clip((rfunc(&src[j + 7]) + dither[7]) >> shift)); \
-        } \
-        for (; j < length; j++) \
-            wfunc(&dst[j],     (rfunc(&src[j]) + dither[j & 7]) >> shift); \
-        dst += dstStride; \
-        src += srcStride; \
+#define DITHER_COPY(dst, dstStride, src, srcStride, bswap, dbswap)\
+    uint16_t scale= dither_scale[dst_depth-1][src_depth-1];\
+    int shift= src_depth-dst_depth + dither_scale[src_depth-2][dst_depth-1];\
+    for (i = 0; i < height; i++) {\
+        const uint8_t *dither= dithers[src_depth-9][i&7];\
+        for (j = 0; j < length-7; j+=8){\
+            dst[j+0] = dbswap((bswap(src[j+0]) + dither[0])*scale>>shift);\
+            dst[j+1] = dbswap((bswap(src[j+1]) + dither[1])*scale>>shift);\
+            dst[j+2] = dbswap((bswap(src[j+2]) + dither[2])*scale>>shift);\
+            dst[j+3] = dbswap((bswap(src[j+3]) + dither[3])*scale>>shift);\
+            dst[j+4] = dbswap((bswap(src[j+4]) + dither[4])*scale>>shift);\
+            dst[j+5] = dbswap((bswap(src[j+5]) + dither[5])*scale>>shift);\
+            dst[j+6] = dbswap((bswap(src[j+6]) + dither[6])*scale>>shift);\
+            dst[j+7] = dbswap((bswap(src[j+7]) + dither[7])*scale>>shift);\
+        }\
+        for (; j < length; j++)\
+            dst[j] = dbswap((bswap(src[j]) + dither[j&7])*scale>>shift);\
+        dst += dstStride;\
+        src += srcStride;\
     }
 
+
 static int planarCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
                              int srcSliceH, uint8_t* dst[], int dstStride[])
 {
@@ -485,6 +444,7 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[
         int height= (plane==0 || plane==3) ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
         const uint8_t *srcPtr= src[plane];
         uint8_t *dstPtr= dst[plane] + dstStride[plane]*y;
+        int shiftonly= plane==1 || plane==2 || (!c->srcRange && plane==0);
 
         if (!dst[plane]) continue;
         // ignore palette for GRAY8
@@ -494,151 +454,84 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[
                 length*=2;
             fillPlane(dst[plane], dstStride[plane], length, height, y, (plane==3) ? 255 : 128);
         } else {
-            if(is9_OR_10BPS(c->srcFormat)) {
+            if(isNBPS(c->srcFormat) || isNBPS(c->dstFormat)
+               || (is16BPS(c->srcFormat) != is16BPS(c->dstFormat))
+            ) {
                 const int src_depth = av_pix_fmt_descriptors[c->srcFormat].comp[plane].depth_minus1+1;
                 const int dst_depth = av_pix_fmt_descriptors[c->dstFormat].comp[plane].depth_minus1+1;
                 const uint16_t *srcPtr2 = (const uint16_t*)srcPtr;
+                uint16_t *dstPtr2 = (uint16_t*)dstPtr;
 
-                if (is16BPS(c->dstFormat)) {
-                    uint16_t *dstPtr2 = (uint16_t*)dstPtr;
-#define COPY9_OR_10TO16(rfunc, wfunc) \
-                    for (i = 0; i < height; i++) { \
-                        for (j = 0; j < length; j++) { \
-                            int srcpx = rfunc(&srcPtr2[j]); \
-                            wfunc(&dstPtr2[j], (srcpx<<(16-src_depth)) | (srcpx>>(2*src_depth-16))); \
-                        } \
-                        dstPtr2 += dstStride[plane]/2; \
-                        srcPtr2 += srcStride[plane]/2; \
+                if (dst_depth == 8) {
+                    if(isBE(c->srcFormat) == HAVE_BIGENDIAN){
+                        DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, , )
+                    } else {
+                        DITHER_COPY(dstPtr, dstStride[plane], srcPtr2, srcStride[plane]/2, av_bswap16, )
                     }
-                    if (isBE(c->dstFormat)) {
-                        if (isBE(c->srcFormat)) {
-                            COPY9_OR_10TO16(AV_RB16, AV_WB16);
-                        } else {
-                            COPY9_OR_10TO16(AV_RL16, AV_WB16);
+                } else if (src_depth == 8) {
+                    for (i = 0; i < height; i++) {
+                        #define COPY816(w)\
+                        if(shiftonly){\
+                            for (j = 0; j < length; j++)\
+                                w(&dstPtr2[j], srcPtr[j]<<(dst_depth-8));\
+                        }else{\
+                            for (j = 0; j < length; j++)\
+                                w(&dstPtr2[j], (srcPtr[j]<<(dst_depth-8)) |\
+                                               (srcPtr[j]>>(2*8-dst_depth)));\
                         }
-                    } else {
-                        if (isBE(c->srcFormat)) {
-                            COPY9_OR_10TO16(AV_RB16, AV_WL16);
+                        if(isBE(c->dstFormat)){
+                            COPY816(AV_WB16)
                         } else {
-                            COPY9_OR_10TO16(AV_RL16, AV_WL16);
+                            COPY816(AV_WL16)
                         }
+                        dstPtr2 += dstStride[plane]/2;
+                        srcPtr  += srcStride[plane];
                     }
-                } else if (is9_OR_10BPS(c->dstFormat)) {
-                    uint16_t *dstPtr2 = (uint16_t*)dstPtr;
-#define COPY9_OR_10TO9_OR_10(loop) \
-                    for (i = 0; i < height; i++) { \
-                        for (j = 0; j < length; j++) { \
-                            loop; \
-                        } \
-                        dstPtr2 += dstStride[plane]/2; \
-                        srcPtr2 += srcStride[plane]/2; \
-                    }
-#define COPY9_OR_10TO9_OR_10_2(rfunc, wfunc) \
-                    if (dst_depth > src_depth) { \
-                        COPY9_OR_10TO9_OR_10(int srcpx = rfunc(&srcPtr2[j]); \
-                            wfunc(&dstPtr2[j], (srcpx << 1) | (srcpx >> 9))); \
-                    } else if (dst_depth < src_depth) { \
-                        DITHER_COPY(dstPtr2, dstStride[plane]/2, wfunc, \
-                                    srcPtr2, srcStride[plane]/2, rfunc, \
-                                    dither_8x8_1, 1, clip9); \
-                    } else { \
-                        COPY9_OR_10TO9_OR_10(wfunc(&dstPtr2[j], rfunc(&srcPtr2[j]))); \
-                    }
-                    if (isBE(c->dstFormat)) {
-                        if (isBE(c->srcFormat)) {
-                            COPY9_OR_10TO9_OR_10_2(AV_RB16, AV_WB16);
-                        } else {
-                            COPY9_OR_10TO9_OR_10_2(AV_RL16, AV_WB16);
-                        }
-                    } else {
-                        if (isBE(c->srcFormat)) {
-                            COPY9_OR_10TO9_OR_10_2(AV_RB16, AV_WL16);
+                } else if (src_depth <= dst_depth) {
+                    for (i = 0; i < height; i++) {
+#define COPY_UP(r,w) \
+    if(shiftonly){\
+        for (j = 0; j < length; j++){ \
+            unsigned int v= r(&srcPtr2[j]);\
+            w(&dstPtr2[j], v<<(dst_depth-src_depth));\
+        }\
+    }else{\
+        for (j = 0; j < length; j++){ \
+            unsigned int v= r(&srcPtr2[j]);\
+            w(&dstPtr2[j], (v<<(dst_depth-src_depth)) | \
+                        (v>>(2*src_depth-dst_depth)));\
+        }\
+    }
+                        if(isBE(c->srcFormat)){
+                            if(isBE(c->dstFormat)){
+                                COPY_UP(AV_RB16, AV_WB16)
+                            } else {
+                                COPY_UP(AV_RB16, AV_WL16)
+                            }
                         } else {
-                            COPY9_OR_10TO9_OR_10_2(AV_RL16, AV_WL16);
+                            if(isBE(c->dstFormat)){
+                                COPY_UP(AV_RL16, AV_WB16)
+                            } else {
+                                COPY_UP(AV_RL16, AV_WL16)
+                            }
                         }
+                        dstPtr2 += dstStride[plane]/2;
+                        srcPtr2 += srcStride[plane]/2;
                     }
                 } else {
-#define W8(a, b) { *(a) = (b); }
-#define COPY9_OR_10TO8(rfunc) \
-                    if (src_depth == 9) { \
-                        DITHER_COPY(dstPtr,  dstStride[plane],   W8, \
-                                    srcPtr2, srcStride[plane]/2, rfunc, \
-                                    dither_8x8_1, 1, av_clip_uint8); \
-                    } else { \
-                        DITHER_COPY(dstPtr,  dstStride[plane],   W8, \
-                                    srcPtr2, srcStride[plane]/2, rfunc, \
-                                    dither_8x8_3, 2, av_clip_uint8); \
-                    }
-                    if (isBE(c->srcFormat)) {
-                        COPY9_OR_10TO8(AV_RB16);
-                    } else {
-                        COPY9_OR_10TO8(AV_RL16);
-                    }
-                }
-            } else if(is9_OR_10BPS(c->dstFormat)) {
-                const int dst_depth = av_pix_fmt_descriptors[c->dstFormat].comp[plane].depth_minus1+1;
-                uint16_t *dstPtr2 = (uint16_t*)dstPtr;
-
-                if (is16BPS(c->srcFormat)) {
-                    const uint16_t *srcPtr2 = (const uint16_t*)srcPtr;
-#define COPY16TO9_OR_10(rfunc, wfunc) \
-                    if (dst_depth == 9) { \
-                        DITHER_COPY(dstPtr2, dstStride[plane]/2, wfunc, \
-                                    srcPtr2, srcStride[plane]/2, rfunc, \
-                                    dither_8x8_128, 7, clip9); \
-                    } else { \
-                        DITHER_COPY(dstPtr2, dstStride[plane]/2, wfunc, \
-                                    srcPtr2, srcStride[plane]/2, rfunc, \
-                                    dither_8x8_64, 6, clip10); \
-                    }
-                    if (isBE(c->dstFormat)) {
-                        if (isBE(c->srcFormat)) {
-                            COPY16TO9_OR_10(AV_RB16, AV_WB16);
+                    if(isBE(c->srcFormat) == HAVE_BIGENDIAN){
+                        if(isBE(c->dstFormat) == HAVE_BIGENDIAN){
+                            DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , )
                         } else {
-                            COPY16TO9_OR_10(AV_RL16, AV_WB16);
+                            DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, , av_bswap16)
                         }
-                    } else {
-                        if (isBE(c->srcFormat)) {
-                            COPY16TO9_OR_10(AV_RB16, AV_WL16);
+                    }else{
+                        if(isBE(c->dstFormat) == HAVE_BIGENDIAN){
+                            DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, )
                         } else {
-                            COPY16TO9_OR_10(AV_RL16, AV_WL16);
+                            DITHER_COPY(dstPtr2, dstStride[plane]/2, srcPtr2, srcStride[plane]/2, av_bswap16, av_bswap16)
                         }
                     }
-                } else /* 8bit */ {
-#define COPY8TO9_OR_10(wfunc) \
-                    for (i = 0; i < height; i++) { \
-                        for (j = 0; j < length; j++) { \
-                            const int srcpx = srcPtr[j]; \
-                            wfunc(&dstPtr2[j], (srcpx<<(dst_depth-8)) | (srcpx >> (16-dst_depth))); \
-                        } \
-                        dstPtr2 += dstStride[plane]/2; \
-                        srcPtr  += srcStride[plane]; \
-                    }
-                    if (isBE(c->dstFormat)) {
-                        COPY8TO9_OR_10(AV_WB16);
-                    } else {
-                        COPY8TO9_OR_10(AV_WL16);
-                    }
-                }
-            } else if(is16BPS(c->srcFormat) && !is16BPS(c->dstFormat)) {
-                const uint16_t *srcPtr2 = (const uint16_t*)srcPtr;
-#define COPY16TO8(rfunc) \
-                    DITHER_COPY(dstPtr,  dstStride[plane],   W8, \
-                                srcPtr2, srcStride[plane]/2, rfunc, \
-                                dither_8x8_256, 8, av_clip_uint8);
-                if (isBE(c->srcFormat)) {
-                    COPY16TO8(AV_RB16);
-                } else {
-                    COPY16TO8(AV_RL16);
-                }
-            } else if(!is16BPS(c->srcFormat) && is16BPS(c->dstFormat)) {
-                for (i=0; i<height; i++) {
-                    for (j=0; j<length; j++) {
-                        dstPtr[ j<<1   ] = srcPtr[j];
-                        dstPtr[(j<<1)+1] = srcPtr[j];
-                    }
-                    srcPtr+= srcStride[plane];
-                    dstPtr+= dstStride[plane];
                 }
             } else if(is16BPS(c->srcFormat) && is16BPS(c->dstFormat)
                   && isBE(c->srcFormat) != isBE(c->dstFormat)) {
@@ -779,7 +672,7 @@ static void reset_ptr(const uint8_t* src[], int format)
 {
     if(!isALPHA(format))
         src[3]=NULL;
-    if(!isPlanarYUV(format)) {
+    if(!isPlanar(format)) {
         src[3]=src[2]=NULL;
 
         if (!usePal(format))
@@ -837,9 +730,10 @@ int sws_scale(struct SwsContext *c, const uint8_t* const srcSlice[],
 
     if (usePal(c->srcFormat)) {
         for (i=0; i<256; i++) {
-            int p, r, g, b,y,u,v;
+            int p, r, g, b, y, u, v, a = 0xff;
             if(c->srcFormat == PIX_FMT_PAL8) {
                 p=((const uint32_t*)(srcSlice[1]))[i];
+                a= (p>>24)&0xFF;
                 r= (p>>16)&0xFF;
                 g= (p>> 8)&0xFF;
                 b=  p     &0xFF;
@@ -855,7 +749,7 @@ int sws_scale(struct SwsContext *c, const uint8_t* const srcSlice[],
                 r= (i>>3    )*255;
                 g= ((i>>1)&3)*85;
                 b= (i&1     )*255;
-            } else if(c->srcFormat == PIX_FMT_GRAY8 || c->srcFormat == PIX_FMT_Y400A) {
+            } else if(c->srcFormat == PIX_FMT_GRAY8 || c->srcFormat == PIX_FMT_GRAY8A) {
                 r = g = b = i;
             } else {
                 assert(c->srcFormat == PIX_FMT_BGR4_BYTE);
@@ -866,33 +760,33 @@ int sws_scale(struct SwsContext *c, const uint8_t* const srcSlice[],
             y= av_clip_uint8((RY*r + GY*g + BY*b + ( 33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
             u= av_clip_uint8((RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
             v= av_clip_uint8((RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
-            c->pal_yuv[i]= y + (u<<8) + (v<<16);
+            c->pal_yuv[i]= y + (u<<8) + (v<<16) + (a<<24);
 
             switch(c->dstFormat) {
             case PIX_FMT_BGR32:
 #if !HAVE_BIGENDIAN
             case PIX_FMT_RGB24:
 #endif
-                c->pal_rgb[i]=  r + (g<<8) + (b<<16);
+                c->pal_rgb[i]=  r + (g<<8) + (b<<16) + (a<<24);
                 break;
             case PIX_FMT_BGR32_1:
 #if HAVE_BIGENDIAN
             case PIX_FMT_BGR24:
 #endif
-                c->pal_rgb[i]= (r + (g<<8) + (b<<16)) << 8;
+                c->pal_rgb[i]= a + (r<<8) + (g<<16) + (b<<24);
                 break;
             case PIX_FMT_RGB32_1:
 #if HAVE_BIGENDIAN
             case PIX_FMT_RGB24:
 #endif
-                c->pal_rgb[i]= (b + (g<<8) + (r<<16)) << 8;
+                c->pal_rgb[i]= a + (b<<8) + (g<<16) + (r<<24);
                 break;
             case PIX_FMT_RGB32:
 #if !HAVE_BIGENDIAN
             case PIX_FMT_BGR24:
 #endif
             default:
-                c->pal_rgb[i]=  b + (g<<8) + (r<<16);
+                c->pal_rgb[i]=  b + (g<<8) + (r<<16) + (a<<24);
             }
         }
     }
diff --git a/libswscale/utils.c b/libswscale/utils.c
index cfe4be9..6c2a8de 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -1,23 +1,25 @@
 /*
  * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#define _SVID_SOURCE //needed for MAP_ANONYMOUS
+#define _DARWIN_C_SOURCE // needed for MAP_ANON
 #include <inttypes.h>
 #include <string.h>
 #include <math.h>
@@ -44,6 +46,7 @@
 #include "libavutil/bswap.h"
 #include "libavutil/opt.h"
 #include "libavutil/pixdesc.h"
+#include "libavutil/avassert.h"
 
 unsigned swscale_version(void)
 {
@@ -52,13 +55,13 @@ unsigned swscale_version(void)
 
 const char *swscale_configuration(void)
 {
-    return LIBAV_CONFIGURATION;
+    return FFMPEG_CONFIGURATION;
 }
 
 const char *swscale_license(void)
 {
 #define LICENSE_PREFIX "libswscale license: "
-    return LICENSE_PREFIX LIBAV_LICENSE + sizeof(LICENSE_PREFIX) - 1;
+    return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
 }
 
 #define RET 0xC3 //near return opcode for x86
@@ -104,6 +107,8 @@ const static FormatEntry format_entries[PIX_FMT_NB] = {
     [PIX_FMT_YUVA420P]    = { 1 , 1 },
     [PIX_FMT_RGB48BE]     = { 1 , 1 },
     [PIX_FMT_RGB48LE]     = { 1 , 1 },
+    [PIX_FMT_RGBA64BE]    = { 0 , 0 },
+    [PIX_FMT_RGBA64LE]    = { 0 , 0 },
     [PIX_FMT_RGB565BE]    = { 1 , 1 },
     [PIX_FMT_RGB565LE]    = { 1 , 1 },
     [PIX_FMT_RGB555BE]    = { 1 , 1 },
@@ -125,6 +130,8 @@ const static FormatEntry format_entries[PIX_FMT_NB] = {
     [PIX_FMT_Y400A]       = { 1 , 0 },
     [PIX_FMT_BGR48BE]     = { 1 , 1 },
     [PIX_FMT_BGR48LE]     = { 1 , 1 },
+    [PIX_FMT_BGRA64BE]    = { 0 , 0 },
+    [PIX_FMT_BGRA64LE]    = { 0 , 0 },
     [PIX_FMT_YUV420P9BE]  = { 1 , 1 },
     [PIX_FMT_YUV420P9LE]  = { 1 , 1 },
     [PIX_FMT_YUV420P10BE] = { 1 , 1 },
@@ -135,6 +142,7 @@ const static FormatEntry format_entries[PIX_FMT_NB] = {
     [PIX_FMT_YUV444P9LE]  = { 1 , 1 },
     [PIX_FMT_YUV444P10BE] = { 1 , 1 },
     [PIX_FMT_YUV444P10LE] = { 1 , 1 },
+    [PIX_FMT_GBR24P]      = { 1 , 0 },
 };
 
 int sws_isSupportedInput(enum PixelFormat pix_fmt)
@@ -151,13 +159,12 @@ int sws_isSupportedOutput(enum PixelFormat pix_fmt)
 
 extern const int32_t ff_yuv2rgb_coeffs[8][4];
 
+#if FF_API_SWS_FORMAT_NAME
 const char *sws_format_name(enum PixelFormat format)
 {
-    if ((unsigned)format < PIX_FMT_NB && av_pix_fmt_descriptors[format].name)
-        return av_pix_fmt_descriptors[format].name;
-    else
-        return "Unknown format";
+    return av_get_pix_fmt_name(format);
 }
+#endif
 
 static double getSplineCoeff(double a, double b, double c, double d, double dist)
 {
@@ -703,7 +710,7 @@ int sws_getColorspaceDetails(struct SwsContext *c, int **inv_table,
                              int *srcRange, int **table, int *dstRange,
                              int *brightness, int *contrast, int *saturation)
 {
-    if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
+    if (!c || isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
 
     *inv_table = c->srcColorspaceTable;
     *table     = c->dstColorspaceTable;
@@ -739,7 +746,7 @@ SwsContext *sws_alloc_context(void)
 
 int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
 {
-    int i;
+    int i, j;
     int usesVFilter, usesHFilter;
     int unscaled;
     SwsFilter dummyFilter= {NULL, NULL, NULL, NULL};
@@ -747,7 +754,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
     int srcH= c->srcH;
     int dstW= c->dstW;
     int dstH= c->dstH;
-    int dst_stride = FFALIGN(dstW * sizeof(int16_t) + 16, 16), dst_stride_px = dst_stride >> 1;
+    int dst_stride = FFALIGN(dstW * sizeof(int16_t)+66, 16);
     int flags, cpu_flags;
     enum PixelFormat srcFormat= c->srcFormat;
     enum PixelFormat dstFormat= c->dstFormat;
@@ -760,11 +767,11 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
     unscaled = (srcW == dstW && srcH == dstH);
 
     if (!sws_isSupportedInput(srcFormat)) {
-        av_log(c, AV_LOG_ERROR, "%s is not supported as input pixel format\n", sws_format_name(srcFormat));
+        av_log(c, AV_LOG_ERROR, "%s is not supported as input pixel format\n", av_get_pix_fmt_name(srcFormat));
         return AVERROR(EINVAL);
     }
     if (!sws_isSupportedOutput(dstFormat)) {
-        av_log(c, AV_LOG_ERROR, "%s is not supported as output pixel format\n", sws_format_name(dstFormat));
+        av_log(c, AV_LOG_ERROR, "%s is not supported as output pixel format\n", av_get_pix_fmt_name(dstFormat));
         return AVERROR(EINVAL);
     }
 
@@ -812,19 +819,6 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
     getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
 
     // reuse chroma for 2 pixels RGB/BGR unless user wants full chroma interpolation
-    if (flags & SWS_FULL_CHR_H_INT &&
-        dstFormat != PIX_FMT_RGBA &&
-        dstFormat != PIX_FMT_ARGB &&
-        dstFormat != PIX_FMT_BGRA &&
-        dstFormat != PIX_FMT_ABGR &&
-        dstFormat != PIX_FMT_RGB24 &&
-        dstFormat != PIX_FMT_BGR24) {
-        av_log(c, AV_LOG_ERROR,
-               "full chroma interpolation for destination format '%s' not yet implemented\n",
-               sws_format_name(dstFormat));
-        flags &= ~SWS_FULL_CHR_H_INT;
-        c->flags = flags;
-    }
     if (isAnyRGB(dstFormat) && !(flags&SWS_FULL_CHR_H_INT)) c->chrDstHSubSample=1;
 
     // drop some chroma lines if the user wants it
@@ -852,7 +846,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
         if (c->swScale) {
             if (flags&SWS_PRINT_INFO)
                 av_log(c, AV_LOG_INFO, "using unscaled %s -> %s special converter\n",
-                       sws_format_name(srcFormat), sws_format_name(dstFormat));
+                       av_get_pix_fmt_name(srcFormat), av_get_pix_fmt_name(dstFormat));
             return 0;
         }
     }
@@ -863,18 +857,18 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
     c->dstBpc = 1 + av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1;
     if (c->dstBpc < 8)
         c->dstBpc = 8;
+    if (isAnyRGB(srcFormat) || srcFormat == PIX_FMT_PAL8)
+        c->srcBpc = 16;
     if (c->dstBpc == 16)
         dst_stride <<= 1;
-    FF_ALLOC_OR_GOTO(c, c->formatConvBuffer,
-                     FFALIGN(srcW, 16) * 2 * FFALIGN(c->srcBpc, 8) >> 3,
-                     fail);
+    FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW*2+78, 16) * 2, fail);
     if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2 && c->srcBpc == 8 && c->dstBpc <= 10) {
         c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
         if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR)) {
             if (flags&SWS_PRINT_INFO)
                 av_log(c, AV_LOG_INFO, "output width is not a multiple of 32 -> no MMX2 scaler\n");
         }
-        if (usesHFilter) c->canMMX2BeUsed=0;
+        if (usesHFilter || isNBPS(c->srcFormat) || is16BPS(c->srcFormat) || isAnyRGB(c->srcFormat)) c->canMMX2BeUsed=0;
     }
     else
         c->canMMX2BeUsed=0;
@@ -894,7 +888,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
             c->chrXInc+= 20;
         }
         //we don't use the x86 asm scaler if MMX is available
-        else if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) {
+        else if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX && c->dstBpc <= 10) {
             c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
             c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20;
         }
@@ -919,7 +913,11 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
             c->chrMmx2FilterCode = av_malloc(c->chrMmx2FilterCodeSize);
 #endif
 
+#ifdef MAP_ANONYMOUS
+            if (c->lumMmx2FilterCode == MAP_FAILED || c->chrMmx2FilterCode == MAP_FAILED)
+#else
             if (!c->lumMmx2FilterCode || !c->chrMmx2FilterCode)
+#endif
                 return AVERROR(ENOMEM);
             FF_ALLOCZ_OR_GOTO(c, c->hLumFilter   , (dstW        /8+8)*sizeof(int16_t), fail);
             FF_ALLOCZ_OR_GOTO(c, c->hChrFilter   , (c->chrDstW  /4+8)*sizeof(int16_t), fail);
@@ -996,7 +994,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
     c->vLumBufSize= c->vLumFilterSize;
     c->vChrBufSize= c->vChrFilterSize;
     for (i=0; i<dstH; i++) {
-        int chrI= i*c->chrDstH / dstH;
+        int chrI= (int64_t)i*c->chrDstH / dstH;
         int nextSlice= FFMAX(c->vLumFilterPos[i   ] + c->vLumFilterSize - 1,
                            ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<c->chrSrcVSubSample));
 
@@ -1021,9 +1019,9 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
         FF_ALLOCZ_OR_GOTO(c, c->lumPixBuf[i+c->vLumBufSize], dst_stride+16, fail);
         c->lumPixBuf[i] = c->lumPixBuf[i+c->vLumBufSize];
     }
-    // 64 / (c->dstBpc & ~7) is the same as 16 / sizeof(scaling_intermediate)
-    c->uv_off_px   = dst_stride_px + 64 / (c->dstBpc &~ 7);
-    c->uv_off_byte = dst_stride + 16;
+    // 64 / c->scalingBpp is the same as 16 / sizeof(scaling_intermediate)
+    c->uv_off   = (dst_stride>>1) + 64 / (c->dstBpc &~ 7);
+    c->uv_offx2 = dst_stride + 16;
     for (i=0; i<c->vChrBufSize; i++) {
         FF_ALLOC_OR_GOTO(c, c->chrUPixBuf[i+c->vChrBufSize], dst_stride*2+32, fail);
         c->chrUPixBuf[i] = c->chrUPixBuf[i+c->vChrBufSize];
@@ -1037,7 +1035,13 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
 
     //try to avoid drawing green stuff between the right end and the stride end
     for (i=0; i<c->vChrBufSize; i++)
-        memset(c->chrUPixBuf[i], 64, dst_stride*2+1);
+        if(av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 == 15){
+            av_assert0(c->dstBpc > 10);
+            for(j=0; j<dst_stride/2+1; j++)
+                ((int32_t*)(c->chrUPixBuf[i]))[j] = 1<<18;
+        } else
+            for(j=0; j<dst_stride+1; j++)
+                ((int16_t*)(c->chrUPixBuf[i]))[j] = 1<<14;
 
     assert(c->chrDstH <= dstH);
 
@@ -1056,7 +1060,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
         else                              av_log(c, AV_LOG_INFO, "ehh flags invalid?! ");
 
         av_log(c, AV_LOG_INFO, "from %s to %s%s ",
-               sws_format_name(srcFormat),
+               av_get_pix_fmt_name(srcFormat),
 #ifdef DITHER1XBPP
                dstFormat == PIX_FMT_BGR555 || dstFormat == PIX_FMT_BGR565 ||
                dstFormat == PIX_FMT_RGB444BE || dstFormat == PIX_FMT_RGB444LE ||
@@ -1064,7 +1068,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
 #else
                "",
 #endif
-               sws_format_name(dstFormat));
+               av_get_pix_fmt_name(dstFormat));
 
         if      (HAVE_MMX2     && cpu_flags & AV_CPU_FLAG_MMX2)    av_log(c, AV_LOG_INFO, "using MMX2\n");
         else if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW)   av_log(c, AV_LOG_INFO, "using 3DNOW\n");
@@ -1465,7 +1469,7 @@ void sws_freeContext(SwsContext *c)
 #endif /* HAVE_MMX */
 
     av_freep(&c->yuvTable);
-    av_free(c->formatConvBuffer);
+    av_freep(&c->formatConvBuffer);
 
     av_free(c);
 }
diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c
index 282618c..ed7f5ad 100644
--- a/libswscale/x86/rgb2rgb.c
+++ b/libswscale/x86/rgb2rgb.c
@@ -6,20 +6,20 @@
  * Written by Nick Kurshev.
  * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c
index c255610..baef3f8 100644
--- a/libswscale/x86/rgb2rgb_template.c
+++ b/libswscale/x86/rgb2rgb_template.c
@@ -7,20 +7,20 @@
  * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
  * lot of big-endian byte order fixes by Alex Beregszaszi
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libswscale/x86/scale.asm b/libswscale/x86/scale.asm
index d355894..09313b9 100644
--- a/libswscale/x86/scale.asm
+++ b/libswscale/x86/scale.asm
@@ -411,11 +411,13 @@ SCALE_FUNC %1, %2, X, X8, %3, 7, %4
 SCALE_FUNCS  8, 15, %1, %2
 SCALE_FUNCS  9, 15, %1, %3
 SCALE_FUNCS 10, 15, %1, %3
+SCALE_FUNCS 14, 15, %1, %3
 SCALE_FUNCS 16, 15, %1, %4
 %endif ; !sse4
 SCALE_FUNCS  8, 19, %1, %2
 SCALE_FUNCS  9, 19, %1, %3
 SCALE_FUNCS 10, 19, %1, %3
+SCALE_FUNCS 14, 19, %1, %3
 SCALE_FUNCS 16, 19, %1, %4
 %endmacro
 
diff --git a/libswscale/x86/swscale_mmx.c b/libswscale/x86/swscale_mmx.c
index dd7aea1..815055e 100644
--- a/libswscale/x86/swscale_mmx.c
+++ b/libswscale/x86/swscale_mmx.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -72,14 +72,14 @@ DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY1Coeff) = 0x0C88000040870C88ULL;
 DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY2Coeff) = 0x20DE4087000020DEULL;
 DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY1Coeff) = 0x20DE0000408720DEULL;
 DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY2Coeff) = 0x0C88408700000C88ULL;
-DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toYOffset) = 0x0008400000084000ULL;
+DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toYOffset) = 0x0008010000080100ULL;
 
 DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUV)[2][4] = {
     {0x38380000DAC83838ULL, 0xECFFDAC80000ECFFULL, 0xF6E40000D0E3F6E4ULL, 0x3838D0E300003838ULL},
     {0xECFF0000DAC8ECFFULL, 0x3838DAC800003838ULL, 0x38380000D0E33838ULL, 0xF6E4D0E30000F6E4ULL},
 };
 
-DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040400000404000ULL;
+DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040010000400100ULL;
 
 //MMX versions
 #if HAVE_MMX
@@ -187,10 +187,12 @@ extern void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt(
     SCALE_FUNC(filter_n,  8, 15, opt); \
     SCALE_FUNC(filter_n,  9, 15, opt); \
     SCALE_FUNC(filter_n, 10, 15, opt); \
+    SCALE_FUNC(filter_n, 14, 15, opt); \
     SCALE_FUNC(filter_n, 16, 15, opt); \
     SCALE_FUNC(filter_n,  8, 19, opt); \
     SCALE_FUNC(filter_n,  9, 19, opt); \
     SCALE_FUNC(filter_n, 10, 19, opt); \
+    SCALE_FUNC(filter_n, 14, 19, opt); \
     SCALE_FUNC(filter_n, 16, 19, opt)
 
 #define SCALE_FUNCS_MMX(opt) \
@@ -233,7 +235,10 @@ void ff_sws_init_swScale_mmx(SwsContext *c)
     } else if (c->srcBpc == 10) { \
         hscalefn = c->dstBpc <= 10 ? ff_hscale10to15_ ## filtersize ## _ ## opt2 : \
                                      ff_hscale10to19_ ## filtersize ## _ ## opt1; \
-    } else /* c->srcBpc == 16 */ { \
+    } else if (c->srcBpc == 14 || ((c->srcFormat==PIX_FMT_PAL8||isAnyRGB(c->srcFormat)) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)) { \
+        hscalefn = c->dstBpc <= 10 ? ff_hscale14to15_ ## filtersize ## _ ## opt2 : \
+                                     ff_hscale14to19_ ## filtersize ## _ ## opt1; \
+    } else { /* c->srcBpc == 16 */ \
         hscalefn = c->dstBpc <= 10 ? ff_hscale16to15_ ## filtersize ## _ ## opt2 : \
                                      ff_hscale16to19_ ## filtersize ## _ ## opt1; \
     } \
diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c
index 50e4e4a..c934c78 100644
--- a/libswscale/x86/swscale_template.c
+++ b/libswscale/x86/swscale_template.c
@@ -1,20 +1,20 @@
 /*
  * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -118,7 +118,7 @@ static void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
     const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
 
     if (uDest) {
-        x86_reg uv_off = c->uv_off_byte >> 1;
+        x86_reg uv_off = c->uv_offx2 >> 1;
         dither_8to16(c, chrDither, 0);
         YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
         dither_8to16(c, chrDither, 1);
@@ -166,17 +166,12 @@ static void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
         "paddd                            %%mm2, %%mm6      \n\t"\
         "paddd                            %%mm0, %%mm7      \n\t"\
         " jnz                                1b             \n\t"\
-        "psrad                              $16, %%mm4      \n\t"\
-        "psrad                              $16, %%mm5      \n\t"\
-        "psrad                              $16, %%mm6      \n\t"\
-        "psrad                              $16, %%mm7      \n\t"\
-        "movq             "VROUNDER_OFFSET"(%0), %%mm0      \n\t"\
+        "psrad                              $19, %%mm4      \n\t"\
+        "psrad                              $19, %%mm5      \n\t"\
+        "psrad                              $19, %%mm6      \n\t"\
+        "psrad                              $19, %%mm7      \n\t"\
         "packssdw                         %%mm5, %%mm4      \n\t"\
         "packssdw                         %%mm7, %%mm6      \n\t"\
-        "paddw                            %%mm0, %%mm4      \n\t"\
-        "paddw                            %%mm0, %%mm6      \n\t"\
-        "psraw                               $3, %%mm4      \n\t"\
-        "psraw                               $3, %%mm6      \n\t"\
         "packuswb                         %%mm6, %%mm4      \n\t"\
         MOVNTQ(%%mm4, (%1, %3))\
         "add                                 $8, %3         \n\t"\
@@ -261,7 +256,7 @@ static void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
     const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
 
     if (uDest) {
-        x86_reg uv_off = c->uv_off_byte >> 1;
+        x86_reg uv_off = c->uv_offx2 >> 1;
         dither_8to32(c, chrDither, 0);
         YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
         dither_8to32(c, chrDither, 1);
@@ -324,7 +319,8 @@ static void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
 
     while (p--) {
         if (dst[p]) {
-            dither_8to16(c, (p == 2 || p == 3) ? chrDither : lumDither, p == 2);
+            int i;
+            for(i=0; i<8; i++) c->dither16[i] = (p == 2 || p == 3) ? lumDither[i] : chrDither[i];
             __asm__ volatile(
                 "mov %2, %%"REG_a"                    \n\t"
                 "movq    "DITHER16"+0(%3), %%mm6      \n\t"
@@ -574,7 +570,7 @@ static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
-    x86_reg uv_off = c->uv_off_byte;
+    x86_reg uv_off = c->uv_offx2;
 
     if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
         YSCALEYUV2PACKEDX_ACCURATE
@@ -607,7 +603,7 @@ static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
-    x86_reg uv_off = c->uv_off_byte;
+    x86_reg uv_off = c->uv_offx2;
 
     if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
         YSCALEYUV2PACKEDX
@@ -664,7 +660,7 @@ static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter,
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
-    x86_reg uv_off = c->uv_off_byte;
+    x86_reg uv_off = c->uv_offx2;
 
     YSCALEYUV2PACKEDX_ACCURATE
     YSCALEYUV2RGBX
@@ -688,7 +684,7 @@ static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
-    x86_reg uv_off = c->uv_off_byte;
+    x86_reg uv_off = c->uv_offx2;
 
     YSCALEYUV2PACKEDX
     YSCALEYUV2RGBX
@@ -741,7 +737,7 @@ static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter,
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
-    x86_reg uv_off = c->uv_off_byte;
+    x86_reg uv_off = c->uv_offx2;
 
     YSCALEYUV2PACKEDX_ACCURATE
     YSCALEYUV2RGBX
@@ -765,7 +761,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
-    x86_reg uv_off = c->uv_off_byte;
+    x86_reg uv_off = c->uv_offx2;
 
     YSCALEYUV2PACKEDX
     YSCALEYUV2RGBX
@@ -898,7 +894,7 @@ static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
-    x86_reg uv_off = c->uv_off_byte;
+    x86_reg uv_off = c->uv_offx2;
 
     YSCALEYUV2PACKEDX_ACCURATE
     YSCALEYUV2RGBX
@@ -922,7 +918,7 @@ static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
-    x86_reg uv_off = c->uv_off_byte;
+    x86_reg uv_off = c->uv_offx2;
 
     YSCALEYUV2PACKEDX
     YSCALEYUV2RGBX
@@ -963,7 +959,7 @@ static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter,
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
-    x86_reg uv_off = c->uv_off_byte;
+    x86_reg uv_off = c->uv_offx2;
 
     YSCALEYUV2PACKEDX_ACCURATE
     /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
@@ -984,7 +980,7 @@ static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
 {
     x86_reg dummy=0;
     x86_reg dstW_reg = dstW;
-    x86_reg uv_off = c->uv_off_byte;
+    x86_reg uv_off = c->uv_offx2;
 
     YSCALEYUV2PACKEDX
     /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
@@ -1002,10 +998,10 @@ static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
     "1:                                 \n\t"\
     "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
     "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "add   "UV_OFF_PX"("#c"), "#index"  \n\t" \
+    "add           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "movq     (%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
     "movq     (%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "sub   "UV_OFF_PX"("#c"), "#index"  \n\t" \
+    "sub           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
     "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
     "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
@@ -1100,8 +1096,8 @@ static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2],
             : "%r8"
         );
 #else
-        *(const uint16_t **)(&c->u_temp)=abuf0;
-        *(const uint16_t **)(&c->v_temp)=abuf1;
+        c->u_temp=(intptr_t)abuf0;
+        c->v_temp=(intptr_t)abuf1;
         __asm__ volatile(
             "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
             "mov        %4, %%"REG_b"               \n\t"
@@ -1233,10 +1229,10 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
     "1:                                 \n\t"\
     "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
     "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "add   "UV_OFF_PX"("#c"), "#index"  \n\t" \
+    "add           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "movq     (%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
     "movq     (%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "sub   "UV_OFF_PX"("#c"), "#index"  \n\t" \
+    "sub           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
     "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
     "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
@@ -1288,9 +1284,9 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2],
     ".p2align              4            \n\t"\
     "1:                                 \n\t"\
     "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
-    "add   "UV_OFF_PX"("#c"), "#index"  \n\t" \
+    "add           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "movq     (%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
-    "sub   "UV_OFF_PX"("#c"), "#index"  \n\t" \
+    "sub           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
     "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
     "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
@@ -1341,10 +1337,10 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2],
     "1:                                 \n\t"\
     "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
     "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "add   "UV_OFF_PX"("#c"), "#index"  \n\t" \
+    "add           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "movq     (%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
     "movq     (%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "sub   "UV_OFF_PX"("#c"), "#index"  \n\t" \
+    "sub           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
     "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
     "psrlw                $5, %%mm3     \n\t" /*FIXME might overflow*/\
@@ -1608,9 +1604,9 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
     ".p2align              4            \n\t"\
     "1:                                 \n\t"\
     "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
-    "add   "UV_OFF_PX"("#c"), "#index"  \n\t" \
+    "add           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "movq     (%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
-    "sub   "UV_OFF_PX"("#c"), "#index"  \n\t" \
+    "sub           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "psraw                $7, %%mm3     \n\t" \
     "psraw                $7, %%mm4     \n\t" \
     "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
@@ -1626,10 +1622,10 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
     "1:                                 \n\t"\
     "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
     "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
-    "add   "UV_OFF_PX"("#c"), "#index"  \n\t" \
+    "add           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "movq     (%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
     "movq     (%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
-    "sub   "UV_OFF_PX"("#c"), "#index"  \n\t" \
+    "sub           "UV_OFFx2"("#c"), "#index"  \n\t" \
     "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
     "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
     "psrlw                $8, %%mm3     \n\t" \
@@ -1678,7 +1674,7 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
 #if !COMPILE_TEMPLATE_MMX2
 //FIXME yuy2* can read up to 7 samples too much
 
-static void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src,
+static void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,
                             int width, uint32_t *unused)
 {
     __asm__ volatile(
@@ -1699,7 +1695,7 @@ static void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src,
 }
 
 static void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV,
-                             const uint8_t *src1, const uint8_t *src2,
+                             const uint8_t *unused1, const uint8_t *src1, const uint8_t *src2,
                              int width, uint32_t *unused)
 {
     __asm__ volatile(
@@ -1728,7 +1724,7 @@ static void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV,
 
 /* This is almost identical to the previous, end exists only because
  * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
-static void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src,
+static void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,
                             int width, uint32_t *unused)
 {
     __asm__ volatile(
@@ -1748,7 +1744,7 @@ static void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src,
 }
 
 static void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV,
-                             const uint8_t *src1, const uint8_t *src2,
+                             const uint8_t *unused1, const uint8_t *src1, const uint8_t *src2,
                              int width, uint32_t *unused)
 {
     __asm__ volatile(
@@ -1802,21 +1798,21 @@ static av_always_inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2,
 }
 
 static void RENAME(nv12ToUV)(uint8_t *dstU, uint8_t *dstV,
-                             const uint8_t *src1, const uint8_t *src2,
+                             const uint8_t *unused1, const uint8_t *src1, const uint8_t *src2,
                              int width, uint32_t *unused)
 {
     RENAME(nvXXtoUV)(dstU, dstV, src1, width);
 }
 
 static void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV,
-                             const uint8_t *src1, const uint8_t *src2,
+                             const uint8_t *unused1, const uint8_t *src1, const uint8_t *src2,
                              int width, uint32_t *unused)
 {
     RENAME(nvXXtoUV)(dstV, dstU, src1, width);
 }
 #endif /* !COMPILE_TEMPLATE_MMX2 */
 
-static av_always_inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src,
+static av_always_inline void RENAME(bgr24ToY_mmx)(int16_t *dst, const uint8_t *src,
                                                   int width, enum PixelFormat srcFormat)
 {
 
@@ -1857,32 +1853,31 @@ static av_always_inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *s
         "paddd                   %%mm3, %%mm2       \n\t"
         "paddd                   %%mm4, %%mm0       \n\t"
         "paddd                   %%mm4, %%mm2       \n\t"
-        "psrad                     $15, %%mm0       \n\t"
-        "psrad                     $15, %%mm2       \n\t"
+        "psrad                     $9, %%mm0       \n\t"
+        "psrad                     $9, %%mm2       \n\t"
         "packssdw                %%mm2, %%mm0       \n\t"
-        "packuswb                %%mm0, %%mm0       \n\t"
-        "movd                %%mm0, (%1, %%"REG_a") \n\t"
-        "add                        $4, %%"REG_a"   \n\t"
+        "movq                %%mm0, (%1, %%"REG_a") \n\t"
+        "add                        $8, %%"REG_a"   \n\t"
         " js                        1b              \n\t"
     : "+r" (src)
-    : "r" (dst+width), "g" ((x86_reg)-width)
+    : "r" (dst+width), "g" ((x86_reg)-2*width)
     : "%"REG_a
     );
 }
 
-static void RENAME(bgr24ToY)(uint8_t *dst, const uint8_t *src,
+static void RENAME(bgr24ToY)(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,
                              int width, uint32_t *unused)
 {
     RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24);
 }
 
-static void RENAME(rgb24ToY)(uint8_t *dst, const uint8_t *src,
+static void RENAME(rgb24ToY)(int16_t *dst, const uint8_t *src, const uint8_t *unused1, const uint8_t *unused2,
                              int width, uint32_t *unused)
 {
     RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24);
 }
 
-static av_always_inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV,
+static av_always_inline void RENAME(bgr24ToUV_mmx)(int16_t *dstU, int16_t *dstV,
                                                    const uint8_t *src, int width,
                                                    enum PixelFormat srcFormat)
 {
@@ -1924,34 +1919,32 @@ static av_always_inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV,
         "paddd                   %%mm3, %%mm2       \n\t"
         "paddd                   %%mm3, %%mm1       \n\t"
         "paddd                   %%mm3, %%mm4       \n\t"
-        "psrad                     $15, %%mm0       \n\t"
-        "psrad                     $15, %%mm2       \n\t"
-        "psrad                     $15, %%mm1       \n\t"
-        "psrad                     $15, %%mm4       \n\t"
+        "psrad                     $9, %%mm0       \n\t"
+        "psrad                     $9, %%mm2       \n\t"
+        "psrad                     $9, %%mm1       \n\t"
+        "psrad                     $9, %%mm4       \n\t"
         "packssdw                %%mm1, %%mm0       \n\t"
         "packssdw                %%mm4, %%mm2       \n\t"
-        "packuswb                %%mm0, %%mm0       \n\t"
-        "packuswb                %%mm2, %%mm2       \n\t"
-        "movd                %%mm0, (%1, %%"REG_a") \n\t"
-        "movd                %%mm2, (%2, %%"REG_a") \n\t"
-        "add                        $4, %%"REG_a"   \n\t"
+        "movq                %%mm0, (%1, %%"REG_a") \n\t"
+        "movq                %%mm2, (%2, %%"REG_a") \n\t"
+        "add                        $8, %%"REG_a"   \n\t"
         " js                        1b              \n\t"
     : "+r" (src)
-    : "r" (dstU+width), "r" (dstV+width), "g" ((x86_reg)-width), "r"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24])
+    : "r" (dstU+width), "r" (dstV+width), "g" ((x86_reg)-2*width), "r"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24])
     : "%"REG_a
     );
 }
 
-static void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV,
-                              const uint8_t *src1, const uint8_t *src2,
+static void RENAME(bgr24ToUV)(int16_t *dstU, int16_t *dstV,
+                              const uint8_t *unused1, const uint8_t *src1, const uint8_t *src2,
                               int width, uint32_t *unused)
 {
     RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24);
     assert(src1 == src2);
 }
 
-static void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV,
-                              const uint8_t *src1, const uint8_t *src2,
+static void RENAME(rgb24ToUV)(int16_t *dstU, int16_t *dstV,
+                              const uint8_t *unused1, const uint8_t *src1, const uint8_t *src2,
                               int width, uint32_t *unused)
 {
     assert(src1==src2);
@@ -2099,9 +2092,8 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
     enum PixelFormat srcFormat = c->srcFormat,
                      dstFormat = c->dstFormat;
 
-    if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) &&
-        dstFormat != PIX_FMT_NV12 && dstFormat != PIX_FMT_NV21) {
-        if (!(c->flags & SWS_BITEXACT)) {
+    if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) && dstFormat != PIX_FMT_NV12
+        && dstFormat != PIX_FMT_NV21 && !(c->flags & SWS_BITEXACT)) {
             if (c->flags & SWS_ACCURATE_RND) {
                 c->yuv2yuv1 = RENAME(yuv2yuv1_ar    );
                 c->yuv2yuvX = RENAME(yuv2yuvX_ar    );
@@ -2116,7 +2108,8 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
                     }
                 }
             } else {
-                c->yuv2yuv1 = RENAME(yuv2yuv1    );
+                int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat);
+                c->yuv2yuv1 = should_dither ? RENAME(yuv2yuv1_ar    ) : RENAME(yuv2yuv1    );
                 c->yuv2yuvX = RENAME(yuv2yuvX    );
                 if (!(c->flags & SWS_FULL_CHR_H_INT)) {
                     switch (c->dstFormat) {
@@ -2129,7 +2122,6 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
                     }
                 }
             }
-        }
         if (!(c->flags & SWS_FULL_CHR_H_INT)) {
             switch (c->dstFormat) {
             case PIX_FMT_RGB32:
diff --git a/libswscale/x86/yuv2rgb_mmx.c b/libswscale/x86/yuv2rgb_mmx.c
index 0eaea77..df0e1a3 100644
--- a/libswscale/x86/yuv2rgb_mmx.c
+++ b/libswscale/x86/yuv2rgb_mmx.c
@@ -7,20 +7,20 @@
  * 1,4,8bpp support and context / deglobalize stuff
  * by Michael Niedermayer (michaelni@gmx.at)
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
diff --git a/libswscale/x86/yuv2rgb_template.c b/libswscale/x86/yuv2rgb_template.c
index 5d1fa5b..926e3fb 100644
--- a/libswscale/x86/yuv2rgb_template.c
+++ b/libswscale/x86/yuv2rgb_template.c
@@ -4,20 +4,20 @@
  * Copyright (C) 2001-2007 Michael Niedermayer
  *           (c) 2010 Konstantin Shishkov
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -43,17 +43,14 @@
     if (h_size * depth > FFABS(dstStride[0]))                        \
         h_size -= 8;                                                 \
                                                                      \
-    if (c->srcFormat == PIX_FMT_YUV422P) {                           \
-        srcStride[1] *= 2;                                           \
-        srcStride[2] *= 2;                                           \
-    }                                                                \
+    vshift = c->srcFormat != PIX_FMT_YUV422P;                        \
                                                                      \
     __asm__ volatile ("pxor %mm4, %mm4\n\t");                        \
     for (y = 0; y < srcSliceH; y++) {                                \
         uint8_t *image    = dst[0] + (y + srcSliceY) * dstStride[0]; \
         const uint8_t *py = src[0] +               y * srcStride[0]; \
-        const uint8_t *pu = src[1] +        (y >> 1) * srcStride[1]; \
-        const uint8_t *pv = src[2] +        (y >> 1) * srcStride[2]; \
+        const uint8_t *pu = src[1] +   (y >> vshift) * srcStride[1]; \
+        const uint8_t *pv = src[2] +   (y >> vshift) * srcStride[2]; \
         x86_reg index = -h_size / 2;                                 \
 
 #define YUV2RGB_INITIAL_LOAD          \
@@ -188,7 +185,7 @@ static inline int RENAME(yuv420_rgb15)(SwsContext *c, const uint8_t *src[],
                                        int srcSliceY, int srcSliceH,
                                        uint8_t *dst[], int dstStride[])
 {
-    int y, h_size;
+    int y, h_size, vshift;
 
     YUV2RGB_LOOP(2)
 
@@ -216,7 +213,7 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[],
                                        int srcSliceY, int srcSliceH,
                                        uint8_t *dst[], int dstStride[])
 {
-    int y, h_size;
+    int y, h_size, vshift;
 
     YUV2RGB_LOOP(2)
 
@@ -306,7 +303,7 @@ static inline int RENAME(yuv420_rgb24)(SwsContext *c, const uint8_t *src[],
                                        int srcSliceY, int srcSliceH,
                                        uint8_t *dst[], int dstStride[])
 {
-    int y, h_size;
+    int y, h_size, vshift;
 
     YUV2RGB_LOOP(3)
 
@@ -324,7 +321,7 @@ static inline int RENAME(yuv420_bgr24)(SwsContext *c, const uint8_t *src[],
                                        int srcSliceY, int srcSliceH,
                                        uint8_t *dst[], int dstStride[])
 {
-    int y, h_size;
+    int y, h_size, vshift;
 
     YUV2RGB_LOOP(3)
 
@@ -368,7 +365,7 @@ static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t *src[],
                                        int srcSliceY, int srcSliceH,
                                        uint8_t *dst[], int dstStride[])
 {
-    int y, h_size;
+    int y, h_size, vshift;
 
     YUV2RGB_LOOP(4)
 
@@ -389,7 +386,7 @@ static inline int RENAME(yuva420_rgb32)(SwsContext *c, const uint8_t *src[],
                                         int srcSliceY, int srcSliceH,
                                         uint8_t *dst[], int dstStride[])
 {
-    int y, h_size;
+    int y, h_size, vshift;
 
     YUV2RGB_LOOP(4)
 
@@ -411,7 +408,7 @@ static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t *src[],
                                        int srcSliceY, int srcSliceH,
                                        uint8_t *dst[], int dstStride[])
 {
-    int y, h_size;
+    int y, h_size, vshift;
 
     YUV2RGB_LOOP(4)
 
@@ -432,7 +429,7 @@ static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[],
                                         int srcSliceY, int srcSliceH,
                                         uint8_t *dst[], int dstStride[])
 {
-    int y, h_size;
+    int y, h_size, vshift;
 
     YUV2RGB_LOOP(4)
 
diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
index cad0933..36182a5 100644
--- a/libswscale/yuv2rgb.c
+++ b/libswscale/yuv2rgb.c
@@ -6,20 +6,20 @@
  * 1,4,8bpp support and context / deglobalize stuff
  * by Michael Niedermayer (michaelni@gmx.at)
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -34,6 +34,7 @@
 #include "swscale_internal.h"
 #include "libavutil/cpu.h"
 #include "libavutil/bswap.h"
+#include "libavutil/pixdesc.h"
 
 extern const uint8_t dither_4x4_16[4][8];
 extern const uint8_t dither_8x8_32[8][8];
@@ -521,7 +522,8 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
     if (t)
         return t;
 
-    av_log(c, AV_LOG_WARNING, "No accelerated colorspace conversion found from %s to %s.\n", sws_format_name(c->srcFormat), sws_format_name(c->dstFormat));
+    av_log(c, AV_LOG_WARNING, "No accelerated colorspace conversion found from %s to %s.\n",
+           av_get_pix_fmt_name(c->srcFormat), av_get_pix_fmt_name(c->dstFormat));
 
     switch (c->dstFormat) {
     case PIX_FMT_BGR48BE: