summaryrefslogtreecommitdiffstats
path: root/libavcodec/aarch64
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2019-03-14 16:20:05 -0300
committerJames Almer <jamrial@gmail.com>2019-03-14 16:20:05 -0300
commitfbd607dd560afe44c3b90de1e6cbe5265cac8f1e (patch)
treed05e0745cd70279ccf61ef8802e39135247ce816 /libavcodec/aarch64
parentd6b62ce1aced9e2456582870382f384581cc7cbb (diff)
parent37394ef01b040605f8e1c98e73aa12b1c0bcba07 (diff)
downloadffmpeg-streaming-fbd607dd560afe44c3b90de1e6cbe5265cac8f1e.zip
ffmpeg-streaming-fbd607dd560afe44c3b90de1e6cbe5265cac8f1e.tar.gz
Merge commit '37394ef01b040605f8e1c98e73aa12b1c0bcba07'
* commit '37394ef01b040605f8e1c98e73aa12b1c0bcba07': aarch64: vp8: Optimize put_epel16_h6v6 with vp8_epel8_v6_y2 Merged-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavcodec/aarch64')
-rw-r--r--libavcodec/aarch64/vp8dsp_neon.S34
1 files changed, 10 insertions, 24 deletions
diff --git a/libavcodec/aarch64/vp8dsp_neon.S b/libavcodec/aarch64/vp8dsp_neon.S
index aefe8fd..c9c5018 100644
--- a/libavcodec/aarch64/vp8dsp_neon.S
+++ b/libavcodec/aarch64/vp8dsp_neon.S
@@ -769,23 +769,6 @@ endfunc
sqrshrun2 \d0\().16b, v22.8h, #7
.endm
-.macro vp8_epel8_v6 d0, s0, s1, s2, s3, s4, s5
- uxtl \s2\().8h, \s2\().8b
- uxtl \s3\().8h, \s3\().8b
- uxtl \s1\().8h, \s1\().8b
- uxtl \s4\().8h, \s4\().8b
- uxtl \s0\().8h, \s0\().8b
- uxtl \s5\().8h, \s5\().8b
- mul \s2\().8h, \s2\().8h, v0.h[2]
- mul \s3\().8h, \s3\().8h, v0.h[3]
- mls \s2\().8h, \s1\().8h, v0.h[1]
- mls \s3\().8h, \s4\().8h, v0.h[4]
- mla \s2\().8h, \s0\().8h, v0.h[0]
- mla \s3\().8h, \s5\().8h, v0.h[5]
- sqadd \s3\().8h, \s2\().8h, \s3\().8h
- sqrshrun \d0\().8b, \s3\().8h, #7
-.endm
-
.macro vp8_epel8_v6_y2 d0, d1, s0, s1, s2, s3, s4, s5, s6
uxtl \s0\().8h, \s0\().8b
uxtl \s3\().8h, \s3\().8b
@@ -942,15 +925,18 @@ function ff_put_vp8_epel16_h6v6_neon, export=1
2:
ld1 {v1.8b - v4.8b}, [x7], #32
ld1 {v16.8b - v19.8b}, [x7], #32
- ld1 {v20.8b - v23.8b}, [x7]
- sub x7, x7, #48
+ ld1 {v20.8b - v23.8b}, [x7], #32
+ ld1 {v24.8b - v25.8b}, [x7]
+ sub x7, x7, #64
- vp8_epel8_v6 v5, v1, v3, v16, v18, v20, v22
- vp8_epel8_v6 v2, v2, v4, v17, v19, v21, v23
- trn1 v2.2d, v5.2d, v2.2d
+ vp8_epel8_v6_y2 v1, v3, v1, v3, v16, v18, v20, v22, v24
+ vp8_epel8_v6_y2 v2, v4, v2, v4, v17, v19, v21, v23, v25
+ trn1 v1.2d, v1.2d, v2.2d
+ trn1 v3.2d, v3.2d, v4.2d
- st1 {v2.16b}, [x0], x1
- subs x4, x4, #1
+ st1 {v1.16b}, [x0], x1
+ st1 {v3.16b}, [x0], x1
+ subs x4, x4, #2
b.ne 2b
add sp, sp, #336+16
OpenPOWER on IntegriCloud