From 32ad48abd74a997220b841e4e913edeb267aa362 Mon Sep 17 00:00:00 2001
From: Richard Henderson <rth@twiddle.net>
Date: Mon, 18 Aug 2014 10:19:06 -0700
Subject: target-alpha: Add vector implementation for CMPBGE

While conditionalized on SSE2, it's a "portable" gcc generic vector
implementation, which could be enabled on other hosts.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 target-alpha/int_helper.c | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

(limited to 'target-alpha')

diff --git a/target-alpha/int_helper.c b/target-alpha/int_helper.c
index 74f38cb..29e927f 100644
--- a/target-alpha/int_helper.c
+++ b/target-alpha/int_helper.c
@@ -60,6 +60,42 @@ uint64_t helper_zap(uint64_t val, uint64_t mask)
 
 uint64_t helper_cmpbge(uint64_t op1, uint64_t op2)
 {
+#if defined(__SSE2__)
+    uint64_t r;
+
+    /* The cmpbge instruction is heavily used in the implementation of
+       every string function on Alpha.  We can do much better than either
+       the default loop below, or even an unrolled version by using the
+       native vector support.  */
+    {
+        typedef uint64_t Q __attribute__((vector_size(16)));
+        typedef uint8_t B __attribute__((vector_size(16)));
+
+        Q q1 = (Q){ op1, 0 };
+        Q q2 = (Q){ op2, 0 };
+
+        q1 = (Q)((B)q1 >= (B)q2);
+
+        r = q1[0];
+    }
+
+    /* Select only one bit from each byte.  */
+    r &= 0x0101010101010101;
+
+    /* Collect the bits into the bottom byte.  */
+    /* .......A.......B.......C.......D.......E.......F.......G.......H */
+    r |= r >> (8 - 1);
+
+    /* .......A......AB......BC......CD......DE......EF......FG......GH */
+    r |= r >> (16 - 2);
+
+    /* .......A......AB.....ABC....ABCD....BCDE....CDEF....DEFG....EFGH */
+    r |= r >> (32 - 4);
+
+    /* .......A......AB.....ABC....ABCD...ABCDE..ABCDEF.ABCDEFGABCDEFGH */
+    /* Return only the low 8 bits.  */
+    return r & 0xff;
+#else
     uint8_t opa, opb, res;
     int i;
 
@@ -72,6 +108,7 @@ uint64_t helper_cmpbge(uint64_t op1, uint64_t op2)
         }
     }
     return res;
+#endif
 }
 
 uint64_t helper_minub8(uint64_t op1, uint64_t op2)
-- 
cgit v1.1