From 32ad48abd74a997220b841e4e913edeb267aa362 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 18 Aug 2014 10:19:06 -0700 Subject: target-alpha: Add vector implementation for CMPBGE While conditionalized on SSE2, it's a "portable" gcc generic vector implementation, which could be enabled on other hosts. Signed-off-by: Richard Henderson --- target-alpha/int_helper.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'target-alpha') diff --git a/target-alpha/int_helper.c b/target-alpha/int_helper.c index 74f38cb..29e927f 100644 --- a/target-alpha/int_helper.c +++ b/target-alpha/int_helper.c @@ -60,6 +60,42 @@ uint64_t helper_zap(uint64_t val, uint64_t mask) uint64_t helper_cmpbge(uint64_t op1, uint64_t op2) { +#if defined(__SSE2__) + uint64_t r; + + /* The cmpbge instruction is heavily used in the implementation of + every string function on Alpha. We can do much better than either + the default loop below, or even an unrolled version by using the + native vector support. */ + { + typedef uint64_t Q __attribute__((vector_size(16))); + typedef uint8_t B __attribute__((vector_size(16))); + + Q q1 = (Q){ op1, 0 }; + Q q2 = (Q){ op2, 0 }; + + q1 = (Q)((B)q1 >= (B)q2); + + r = q1[0]; + } + + /* Select only one bit from each byte. */ + r &= 0x0101010101010101; + + /* Collect the bits into the bottom byte. */ + /* .......A.......B.......C.......D.......E.......F.......G.......H */ + r |= r >> (8 - 1); + + /* .......A......AB......BC......CD......DE......EF......FG......GH */ + r |= r >> (16 - 2); + + /* .......A......AB.....ABC....ABCD....BCDE....CDEF....DEFG....EFGH */ + r |= r >> (32 - 4); + + /* .......A......AB.....ABC....ABCD...ABCDE..ABCDEF.ABCDEFGABCDEFGH */ + /* Return only the low 8 bits. */ + return r & 0xff; +#else uint8_t opa, opb, res; int i; @@ -72,6 +108,7 @@ uint64_t helper_cmpbge(uint64_t op1, uint64_t op2) } } return res; +#endif } uint64_t helper_minub8(uint64_t op1, uint64_t op2) -- cgit v1.1