summaryrefslogtreecommitdiffstats
path: root/libavfilter/x86/vf_gradfun.asm
blob: 3581f89fe88e8219d15142ae0d13115210aba4c1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
;******************************************************************************
;* x86-optimized functions for gradfun filter
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************

%include "libavutil/x86/x86util.asm"

SECTION_RODATA

pw_7f: times 8 dw 0x7F
pw_ff: times 8 dw 0xFF

SECTION .text

%macro FILTER_LINE 1
    movh       m0, [r2+r0]
    movh       m1, [r3+r0]
    punpcklbw  m0, m7
    punpcklwd  m1, m1
    psllw      m0, 7
    psubw      m1, m0
    PABSW      m2, m1
    pmulhuw    m2, m5
    psubw      m2, m6
    pminsw     m2, m7
    pmullw     m2, m2
    psllw      m1, 2
    paddw      m0, %1
    pmulhw     m1, m2
    paddw      m0, m1
    psraw      m0, 7
    packuswb   m0, m0
    movh  [r1+r0], m0
%endmacro

INIT_MMX mmxext
cglobal gradfun_filter_line, 6, 6
    movh      m5, r4d
    pxor      m7, m7
    pshufw    m5, m5,0
    mova      m6, [pw_7f]
    mova      m3, [r5]
    mova      m4, [r5+8]
.loop:
    FILTER_LINE m3
    add       r0, 4
    jge .end
    FILTER_LINE m4
    add       r0, 4
    jl .loop
.end:
    REP_RET

INIT_XMM ssse3
cglobal gradfun_filter_line, 6, 6, 8
    movd       m5, r4d
    pxor       m7, m7
    pshuflw    m5, m5, 0
    mova       m6, [pw_7f]
    punpcklqdq m5, m5
    mova       m4, [r5]
.loop:
    FILTER_LINE m4
    add        r0, 8
    jl .loop
    REP_RET

%macro BLUR_LINE 1
cglobal gradfun_blur_line_%1, 6, 6, 8
    mova        m7, [pw_ff]
.loop:
    %1          m0, [r4+r0]
    %1          m1, [r5+r0]
    mova        m2, m0
    mova        m3, m1
    psrlw       m0, 8
    psrlw       m1, 8
    pand        m2, m7
    pand        m3, m7
    paddw       m0, m1
    paddw       m2, m3
    paddw       m0, m2
    paddw       m0, [r2+r0]
    mova        m1, [r1+r0]
    mova   [r1+r0], m0
    psubw       m0, m1
    mova   [r3+r0], m0
    add         r0, 16
    jl .loop
    REP_RET
%endmacro

INIT_XMM sse2
BLUR_LINE movdqa
BLUR_LINE movdqu
OpenPOWER on IntegriCloud