summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoravg <avg@FreeBSD.org>2011-05-10 15:08:13 +0000
committeravg <avg@FreeBSD.org>2011-05-10 15:08:13 +0000
commit2d672b24ae05acc7b0254e961a81da4e1b58db62 (patch)
treee730a7ad6f2d7ab32f417e2501e28e7dccae5088
parentf59b74bdc18cd1f0bd842f251a3a29f20688c6ad (diff)
downloadFreeBSD-src-2d672b24ae05acc7b0254e961a81da4e1b58db62.zip
FreeBSD-src-2d672b24ae05acc7b0254e961a81da4e1b58db62.tar.gz
bitcount32: replace lengthy comment with a reference to SWAR
MFC after: 5 days
-rw-r--r--sys/sys/systm.h40
1 files changed, 2 insertions, 38 deletions
diff --git a/sys/sys/systm.h b/sys/sys/systm.h
index edd59c4..80fb0a9 100644
--- a/sys/sys/systm.h
+++ b/sys/sys/systm.h
@@ -374,44 +374,8 @@ int alloc_unrl(struct unrhdr *uh);
void free_unr(struct unrhdr *uh, u_int item);
/*
- * This is about as magic as it gets. fortune(1) has got similar code
- * for reversing bits in a word. Who thinks up this stuff??
- *
- * Yes, it does appear to be consistently faster than:
- * while (i = ffs(m)) {
- * m >>= i;
- * bits++;
- * }
- * and
- * while (lsb = (m & -m)) { // This is magic too
- * m &= ~lsb; // or: m ^= lsb
- * bits++;
- * }
- * Both of these latter forms do some very strange things on gcc-3.1 with
- * -mcpu=pentiumpro and/or -march=pentiumpro and/or -O or -O2.
- * There is probably an SSE or MMX popcnt instruction.
- *
- * I wonder if this should be in libkern?
- *
- * XXX Stop the presses! Another one:
- * static __inline u_int32_t
- * popcnt1(u_int32_t v)
- * {
- * v -= ((v >> 1) & 0x55555555);
- * v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
- * v = (v + (v >> 4)) & 0x0F0F0F0F;
- * return (v * 0x01010101) >> 24;
- * }
- * The downside is that it has a multiply. With a pentium3 with
- * -mcpu=pentiumpro and -march=pentiumpro then gcc-3.1 will use
- * an imull, and in that case it is faster. In most other cases
- * it appears slightly slower.
- *
- * Another variant (also from fortune):
- * #define BITCOUNT(x) (((BX_(x)+(BX_(x)>>4)) & 0x0F0F0F0F) % 255)
- * #define BX_(x) ((x) - (((x)>>1)&0x77777777) \
- * - (((x)>>2)&0x33333333) \
- * - (((x)>>3)&0x11111111))
+ * Population count algorithm using SWAR approach
+ * - "SIMD Within A Register".
*/
static __inline uint32_t
bitcount32(uint32_t x)
OpenPOWER on IntegriCloud