1 files changed, 118 insertions, 0 deletions
diff --git a/libavcodec/x86/exrdsp.asm b/libavcodec/x86/exrdsp.asm
new file mode 100644
index 0000000..3bf240c
--- /dev/null
+++ b/libavcodec/x86/exrdsp.asm
@@ -0,0 +1,118 @@
+;******************************************************************************
+;* X86 Optimized functions for Open Exr Decoder
+;* Copyright (c) 2006 Industrial Light & Magic, a division of Lucas Digital Ltd. LLC
+;*
+;* reorder_pixels, predictor based on patch by John Loy
+;* port to ASM by Jokyo Images support by CNC - French National Center for Cinema
+;*
+;* predictor AVX/AVX2 by Henrik Gramner
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+cextern pb_15
+cextern pb_80
+
+SECTION .text
+
+;------------------------------------------------------------------------------
+; void ff_reorder_pixels(uint8_t *dst, const uint8_t *src, ptrdiff_t size);
+;------------------------------------------------------------------------------
+
+%macro REORDER_PIXELS 0
+cglobal reorder_pixels, 3,4,3, dst, src1, size, src2
+    lea                              src2q, [src1q+sizeq] ; src2 = src + 2 * half_size
+    add                               dstq, sizeq         ; dst offset by size
+    shr                              sizeq, 1             ; half_size
+    add                              src1q, sizeq         ; offset src by half_size
+    neg                              sizeq                ; size = offset for dst, src1, src2
+.loop:
+
+    mova                                m0, [src1q+sizeq]        ; load first part
+    movu                                m1, [src2q+sizeq]        ; load second part
+    SBUTTERFLY bw, 0, 1, 2                                       ; interleaved
+    mova                 [dstq+2*sizeq   ], xm0                  ; copy to dst
+    mova                 [dstq+2*sizeq+16], xm1
+%if cpuflag(avx2)
+    vperm2i128                          m0, m0, m1, q0301
+    mova                 [dstq+2*sizeq+32], m0
+%endif
+    add     sizeq, mmsize
+    jl .loop
+    RET
+%endmacro
+
+INIT_XMM sse2
+REORDER_PIXELS
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+REORDER_PIXELS
+%endif
+
+
+;------------------------------------------------------------------------------
+; void ff_predictor(uint8_t *src, ptrdiff_t size);
+;------------------------------------------------------------------------------
+
+%macro PREDICTOR 0
+cglobal predictor, 2,2,5, src, size
+    mova             m0, [pb_80]
+    mova            xm1, [pb_15]
+    mova            xm2, xm0
+    add            srcq, sizeq
+    neg           sizeq
+.loop:
+    pxor             m3, m0, [srcq + sizeq]
+    pslldq           m4, m3, 1
+    paddb            m3, m4
+    pslldq           m4, m3, 2
+    paddb            m3, m4
+    pslldq           m4, m3, 4
+    paddb            m3, m4
+    pslldq           m4, m3, 8
+%if mmsize == 32
+    paddb            m3, m4
+    paddb           xm2, xm3
+    vextracti128    xm4, m3, 1
+    mova [srcq + sizeq], xm2
+    pshufb          xm2, xm1
+    paddb           xm2, xm4
+    mova [srcq + sizeq + 16], xm2
+%else
+    paddb            m2, m3
+    paddb            m2, m4
+    mova [srcq + sizeq], m2
+%endif
+    pshufb          xm2, xm1
+    add           sizeq, mmsize
+    jl .loop
+    RET
+%endmacro
+
+INIT_XMM ssse3
+PREDICTOR
+
+INIT_XMM avx
+PREDICTOR
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+PREDICTOR
+%endif