summaryrefslogtreecommitdiffstats
path: root/libavcodec/ppc
diff options
context:
space:
mode:
authorRong Yan <rongyan236@gmail.com>2014-10-10 08:29:58 +0000
committerMichael Niedermayer <michaelni@gmx.at>2014-10-10 14:24:22 +0200
commit0d71bd5a9493a9021d08b46fb0ffb985d44dc178 (patch)
treea394b7950a45dca5cdcb4cfbc17796e5392b8cb1 /libavcodec/ppc
parentc1fa5d1bd4642f75160f7806e7a7756526a119a2 (diff)
downloadffmpeg-streaming-0d71bd5a9493a9021d08b46fb0ffb985d44dc178.zip
ffmpeg-streaming-0d71bd5a9493a9021d08b46fb0ffb985d44dc178.tar.gz
libavcodec/ppc/hpeldsp_altivec.c : fix ff_put_pixels16_altivec() for POWER LE
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/ppc')
-rw-r--r--libavcodec/ppc/hpeldsp_altivec.c34
1 files changed, 34 insertions, 0 deletions
diff --git a/libavcodec/ppc/hpeldsp_altivec.c b/libavcodec/ppc/hpeldsp_altivec.c
index 7c3b5a1..79c2af8 100644
--- a/libavcodec/ppc/hpeldsp_altivec.c
+++ b/libavcodec/ppc/hpeldsp_altivec.c
@@ -36,6 +36,38 @@
#if HAVE_ALTIVEC
/* next one assumes that ((line_size % 16) == 0) */
+#if HAVE_VSX
+void ff_put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
+{
+ register vector unsigned char pixelsv1;
+ register vector unsigned char pixelsv1B;
+ register vector unsigned char pixelsv1C;
+ register vector unsigned char pixelsv1D;
+
+ int i;
+ register ptrdiff_t line_size_2 = line_size << 1;
+ register ptrdiff_t line_size_3 = line_size + line_size_2;
+ register ptrdiff_t line_size_4 = line_size << 2;
+
+// hand-unrolling the loop by 4 gains about 15%
+// mininum execution time goes from 74 to 60 cycles
+// it's faster than -funroll-loops, but using
+// -funroll-loops w/ this is bad - 74 cycles again.
+// all this is on a 7450, tuning for the 7450
+ for (i = 0; i < h; i += 4) {
+ pixelsv1 = vec_vsx_ld( 0, pixels);
+ pixelsv1B = vec_vsx_ld(line_size, pixels);
+ pixelsv1C = vec_vsx_ld(line_size_2, pixels);
+ pixelsv1D = vec_vsx_ld(line_size_3, pixels);
+ vec_vsx_st(pixelsv1, 0, (unsigned char*)block);
+ vec_vsx_st(pixelsv1B, line_size, (unsigned char*)block);
+ vec_vsx_st(pixelsv1C, line_size_2, (unsigned char*)block);
+ vec_st(pixelsv1D, line_size_3, (unsigned char*)block);
+ pixels+=line_size_4;
+ block +=line_size_4;
+ }
+}
+#else
void ff_put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{
register vector unsigned char pixelsv1, pixelsv2;
@@ -76,6 +108,8 @@ void ff_put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t li
}
}
+#endif /* HAVE_VSX */
+
/* next one assumes that ((line_size % 16) == 0) */
#define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) )
void ff_avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
OpenPOWER on IntegriCloud