summaryrefslogtreecommitdiffstats
path: root/libavcodec/arm/vp9dsp_init_arm.c
diff options
context:
space:
mode:
authorMartin Storsjö <martin@martin.st>2016-11-10 11:07:39 +0200
committerMartin Storsjö <martin@martin.st>2016-11-10 11:18:22 +0200
commit557c1675cf0e803b2fee43b4c8b58433842c84d0 (patch)
tree4cb2063cbbc569fc45faf05ae35d3c66fe29e69b /libavcodec/arm/vp9dsp_init_arm.c
parent383d96aa2229f644d9bd77b821ed3a309da5e9fc (diff)
downloadffmpeg-streaming-557c1675cf0e803b2fee43b4c8b58433842c84d0.zip
ffmpeg-streaming-557c1675cf0e803b2fee43b4c8b58433842c84d0.tar.gz
arm: vp9mc: Minor adjustments from review of the aarch64 version
This work is sponsored by, and copyright, Google. The speedup for the large horizontal filters is surprisingly big on A7 and A53, while there's a minor slowdown (almost within measurement noise) on A8 and A9. Cortex A7 A8 A9 A53 orig: vp9_put_8tap_smooth_64h_neon: 20270.0 14447.3 19723.9 10910.9 new: vp9_put_8tap_smooth_64h_neon: 20165.8 14466.5 19730.2 10668.8 Signed-off-by: Martin Storsjö <martin@martin.st>
Diffstat (limited to 'libavcodec/arm/vp9dsp_init_arm.c')
-rw-r--r--libavcodec/arm/vp9dsp_init_arm.c2
1 files changed, 1 insertions, 1 deletions
diff --git a/libavcodec/arm/vp9dsp_init_arm.c b/libavcodec/arm/vp9dsp_init_arm.c
index 1b00177..839037a 100644
--- a/libavcodec/arm/vp9dsp_init_arm.c
+++ b/libavcodec/arm/vp9dsp_init_arm.c
@@ -43,7 +43,7 @@ static void op##_##filter##sz##_hv_neon(uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride, \
int h, int mx, int my) \
{ \
- LOCAL_ALIGNED_16(uint8_t, temp, [((sz < 64 ? 2 * sz : 64) + 8) * sz]); \
+ LOCAL_ALIGNED_16(uint8_t, temp, [((1 + (sz < 64)) * sz + 8) * sz]); \
/* We only need h + 7 lines, but the horizontal filter assumes an \
* even number of rows, so filter h + 8 lines here. */ \
ff_vp9_put_##filter##sz##_h_neon(temp, sz, \
OpenPOWER on IntegriCloud