summaryrefslogtreecommitdiffstats
path: root/libavfilter/x86/vf_pullup.asm
blob: 26c2a27d37dc5e5ccaa24a40b902ed0e7bce1709 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
;*****************************************************************************
;* x86-optimized functions for pullup filter
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or modify
;* it under the terms of the GNU General Public License as published by
;* the Free Software Foundation; either version 2 of the License, or
;* (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;* GNU General Public License for more details.
;*
;* You should have received a copy of the GNU General Public License along
;* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
;* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
;******************************************************************************

%include "libavutil/x86/x86util.asm"

SECTION .text

INIT_MMX mmx
cglobal pullup_filter_diff, 3, 5, 8, first, second, size
    mov        r3, 4
    pxor       m4, m4
    pxor       m7, m7

.loop:
    movq       m0, [firstq]
    movq       m2, [firstq]
    add        firstq, sizeq
    movq       m1, [secondq]
    add        secondq, sizeq
    psubusb    m2, m1
    psubusb    m1, m0
    movq       m0, m2
    movq       m3, m1
    punpcklbw  m0, m7
    punpcklbw  m1, m7
    punpckhbw  m2, m7
    punpckhbw  m3, m7
    paddw      m4, m0
    paddw      m4, m1
    paddw      m4, m2
    paddw      m4, m3

    dec        r3
    jnz .loop

    movq       m3, m4
    punpcklwd  m4, m7
    punpckhwd  m3, m7
    paddd      m3, m4
    movd      eax, m3
    psrlq      m3, 32
    movd      r4d, m3
    add       eax, r4d
    RET

INIT_MMX mmx
cglobal pullup_filter_comb, 3, 5, 8, first, second, size
    mov        r3, 4
    pxor       m6, m6
    pxor       m7, m7
    sub        secondq, sizeq

.loop:
    movq       m0, [firstq]
    movq       m1, [secondq]
    punpcklbw  m0, m7
    movq       m2, [secondq+sizeq]
    punpcklbw  m1, m7
    punpcklbw  m2, m7
    paddw      m0, m0
    paddw      m1, m2
    movq       m2, m0
    psubusw    m0, m1
    psubusw    m1, m2
    paddw      m6, m0
    paddw      m6, m1

    movq       m0, [firstq]
    movq       m1, [secondq]
    punpckhbw  m0, m7
    movq       m2, [secondq+sizeq]
    punpckhbw  m1, m7
    punpckhbw  m2, m7
    paddw      m0, m0
    paddw      m1, m2
    movq       m2, m0
    psubusw    m0, m1
    psubusw    m1, m2
    paddw      m6, m0
    paddw      m6, m1

    movq       m0, [secondq+sizeq]
    movq       m1, [firstq]
    punpcklbw  m0, m7
    movq       m2, [firstq+sizeq]
    punpcklbw  m1, m7
    punpcklbw  m2, m7
    paddw      m0, m0
    paddw      m1, m2
    movq       m2, m0
    psubusw    m0, m1
    psubusw    m1, m2
    paddw      m6, m0
    paddw      m6, m1

    movq       m0, [secondq+sizeq]
    movq       m1, [firstq]
    punpckhbw  m0, m7
    movq       m2, [firstq+sizeq]
    punpckhbw  m1, m7
    punpckhbw  m2, m7
    paddw      m0, m0
    paddw      m1, m2
    movq       m2, m0
    psubusw    m0, m1
    psubusw    m1, m2
    paddw      m6, m0
    paddw      m6, m1

    add        firstq, sizeq
    add        secondq, sizeq
    dec        r3
    jnz .loop

    movq       m5, m6
    punpcklwd  m6, m7
    punpckhwd  m5, m7
    paddd      m5, m6
    movd      eax, m5
    psrlq      m5, 32
    movd      r4d, m5
    add       eax, r4d
    RET

INIT_MMX mmx
cglobal pullup_filter_var, 3, 5, 8, first, second, size
    mov        r3, 3
    pxor       m4, m4
    pxor       m7, m7

.loop:
    movq       m0, [firstq]
    movq       m2, [firstq]
    movq       m1, [firstq+sizeq]
    add        firstq, sizeq
    psubusb    m2, m1
    psubusb    m1, m0
    movq       m0, m2
    movq       m3, m1
    punpcklbw  m0, m7
    punpcklbw  m1, m7
    punpckhbw  m2, m7
    punpckhbw  m3, m7
    paddw      m4, m0
    paddw      m4, m1
    paddw      m4, m2
    paddw      m4, m3

    dec        r3
    jnz .loop

    movq       m3, m4
    punpcklwd  m4, m7
    punpckhwd  m3, m7
    paddd      m3, m4
    movd      eax, m3
    psrlq      m3, 32
    movd      r4d, m3
    add       eax, r4d
    shl       eax, 2
    RET
OpenPOWER on IntegriCloud