summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpjd <pjd@FreeBSD.org>2005-08-19 22:10:19 +0000
committerpjd <pjd@FreeBSD.org>2005-08-19 22:10:19 +0000
commit863deb3c00d07683cfba7aca2e35643099e42dd7 (patch)
treee6549a4db754594b874a82ce52323ca5ea203760
parentcb1c3eea784bed67621adba223b0549ec5730a77 (diff)
downloadFreeBSD-src-863deb3c00d07683cfba7aca2e35643099e42dd7.zip
FreeBSD-src-863deb3c00d07683cfba7aca2e35643099e42dd7.tar.gz
Avoid code duplication and implement bitcount32() function in systm.h only.
Reviewed by: cperciva MFC after: 3 days
-rw-r--r--sys/amd64/amd64/mp_machdep.c54
-rw-r--r--sys/geom/stripe/g_stripe.c2
-rw-r--r--sys/geom/stripe/g_stripe.h6
-rw-r--r--sys/i386/i386/mp_machdep.c54
-rw-r--r--sys/sys/systm.h52
5 files changed, 55 insertions, 113 deletions
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index a17114d..9d771ad 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -812,58 +812,6 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
ia32_pause();
}
-/*
- * This is about as magic as it gets. fortune(1) has got similar code
- * for reversing bits in a word. Who thinks up this stuff??
- *
- * Yes, it does appear to be consistently faster than:
- * while (i = ffs(m)) {
- * m >>= i;
- * bits++;
- * }
- * and
- * while (lsb = (m & -m)) { // This is magic too
- * m &= ~lsb; // or: m ^= lsb
- * bits++;
- * }
- * Both of these latter forms do some very strange things on gcc-3.1 with
- * -mcpu=pentiumpro and/or -march=pentiumpro and/or -O or -O2.
- * There is probably an SSE or MMX popcnt instruction.
- *
- * I wonder if this should be in libkern?
- *
- * XXX Stop the presses! Another one:
- * static __inline u_int32_t
- * popcnt1(u_int32_t v)
- * {
- * v -= ((v >> 1) & 0x55555555);
- * v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
- * v = (v + (v >> 4)) & 0x0F0F0F0F;
- * return (v * 0x01010101) >> 24;
- * }
- * The downside is that it has a multiply. With a pentium3 with
- * -mcpu=pentiumpro and -march=pentiumpro then gcc-3.1 will use
- * an imull, and in that case it is faster. In most other cases
- * it appears slightly slower.
- *
- * Another variant (also from fortune):
- * #define BITCOUNT(x) (((BX_(x)+(BX_(x)>>4)) & 0x0F0F0F0F) % 255)
- * #define BX_(x) ((x) - (((x)>>1)&0x77777777) \
- * - (((x)>>2)&0x33333333) \
- * - (((x)>>3)&0x11111111))
- */
-static __inline u_int32_t
-popcnt(u_int32_t m)
-{
-
- m = (m & 0x55555555) + ((m & 0xaaaaaaaa) >> 1);
- m = (m & 0x33333333) + ((m & 0xcccccccc) >> 2);
- m = (m & 0x0f0f0f0f) + ((m & 0xf0f0f0f0) >> 4);
- m = (m & 0x00ff00ff) + ((m & 0xff00ff00) >> 8);
- m = (m & 0x0000ffff) + ((m & 0xffff0000) >> 16);
- return m;
-}
-
static void
smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
{
@@ -878,7 +826,7 @@ smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offse
mask &= ~PCPU_GET(cpumask);
if (mask == 0)
return;
- ncpu = popcnt(mask);
+ ncpu = bitcount32(mask);
if (ncpu > othercpus) {
/* XXX this should be a panic offence */
printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
diff --git a/sys/geom/stripe/g_stripe.c b/sys/geom/stripe/g_stripe.c
index 3f66102..eea2ad1 100644
--- a/sys/geom/stripe/g_stripe.c
+++ b/sys/geom/stripe/g_stripe.c
@@ -792,7 +792,7 @@ g_stripe_create(struct g_class *mp, const struct g_stripe_metadata *md,
sc->sc_id = md->md_id;
sc->sc_stripesize = md->md_stripesize;
- sc->sc_stripebits = BITCOUNT(sc->sc_stripesize - 1);
+ sc->sc_stripebits = bitcount32(sc->sc_stripesize - 1);
sc->sc_ndisks = md->md_all;
sc->sc_disks = malloc(sizeof(struct g_consumer *) * sc->sc_ndisks,
M_STRIPE, M_WAITOK | M_ZERO);
diff --git a/sys/geom/stripe/g_stripe.h b/sys/geom/stripe/g_stripe.h
index b343e23..1b6e2e5 100644
--- a/sys/geom/stripe/g_stripe.h
+++ b/sys/geom/stripe/g_stripe.h
@@ -120,10 +120,4 @@ stripe_metadata_decode(const u_char *data, struct g_stripe_metadata *md)
md->md_provsize = le64dec(data + 64);
}
-#ifndef BITCOUNT
-#define BITCOUNT(x) (((BX_(x) + (BX_(x) >> 4)) & 0x0F0F0F0F) % 255)
-#define BX_(x) ((x) - (((x) >> 1) & 0x77777777) - \
- (((x) >> 2) & 0x33333333) - (((x) >> 3) & 0x11111111))
-#endif
-
#endif /* _G_STRIPE_H_ */
diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c
index ab653d7..4873946 100644
--- a/sys/i386/i386/mp_machdep.c
+++ b/sys/i386/i386/mp_machdep.c
@@ -1008,58 +1008,6 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
ia32_pause();
}
-/*
- * This is about as magic as it gets. fortune(1) has got similar code
- * for reversing bits in a word. Who thinks up this stuff??
- *
- * Yes, it does appear to be consistently faster than:
- * while (i = ffs(m)) {
- * m >>= i;
- * bits++;
- * }
- * and
- * while (lsb = (m & -m)) { // This is magic too
- * m &= ~lsb; // or: m ^= lsb
- * bits++;
- * }
- * Both of these latter forms do some very strange things on gcc-3.1 with
- * -mcpu=pentiumpro and/or -march=pentiumpro and/or -O or -O2.
- * There is probably an SSE or MMX popcnt instruction.
- *
- * I wonder if this should be in libkern?
- *
- * XXX Stop the presses! Another one:
- * static __inline u_int32_t
- * popcnt1(u_int32_t v)
- * {
- * v -= ((v >> 1) & 0x55555555);
- * v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
- * v = (v + (v >> 4)) & 0x0F0F0F0F;
- * return (v * 0x01010101) >> 24;
- * }
- * The downside is that it has a multiply. With a pentium3 with
- * -mcpu=pentiumpro and -march=pentiumpro then gcc-3.1 will use
- * an imull, and in that case it is faster. In most other cases
- * it appears slightly slower.
- *
- * Another variant (also from fortune):
- * #define BITCOUNT(x) (((BX_(x)+(BX_(x)>>4)) & 0x0F0F0F0F) % 255)
- * #define BX_(x) ((x) - (((x)>>1)&0x77777777) \
- * - (((x)>>2)&0x33333333) \
- * - (((x)>>3)&0x11111111))
- */
-static __inline u_int32_t
-popcnt(u_int32_t m)
-{
-
- m = (m & 0x55555555) + ((m & 0xaaaaaaaa) >> 1);
- m = (m & 0x33333333) + ((m & 0xcccccccc) >> 2);
- m = (m & 0x0f0f0f0f) + ((m & 0xf0f0f0f0) >> 4);
- m = (m & 0x00ff00ff) + ((m & 0xff00ff00) >> 8);
- m = (m & 0x0000ffff) + ((m & 0xffff0000) >> 16);
- return m;
-}
-
static void
smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
{
@@ -1074,7 +1022,7 @@ smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offse
mask &= ~PCPU_GET(cpumask);
if (mask == 0)
return;
- ncpu = popcnt(mask);
+ ncpu = bitcount32(mask);
if (ncpu > othercpus) {
/* XXX this should be a panic offence */
printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
diff --git a/sys/sys/systm.h b/sys/sys/systm.h
index da9447a..cbd95ee 100644
--- a/sys/sys/systm.h
+++ b/sys/sys/systm.h
@@ -330,4 +330,56 @@ int alloc_unr(struct unrhdr *uh);
int alloc_unrl(struct unrhdr *uh);
void free_unr(struct unrhdr *uh, u_int item);
+/*
+ * This is about as magic as it gets. fortune(1) has got similar code
+ * for reversing bits in a word. Who thinks up this stuff??
+ *
+ * Yes, it does appear to be consistently faster than:
+ * while (i = ffs(m)) {
+ * m >>= i;
+ * bits++;
+ * }
+ * and
+ * while (lsb = (m & -m)) { // This is magic too
+ * m &= ~lsb; // or: m ^= lsb
+ * bits++;
+ * }
+ * Both of these latter forms do some very strange things on gcc-3.1 with
+ * -mcpu=pentiumpro and/or -march=pentiumpro and/or -O or -O2.
+ * There is probably an SSE or MMX popcnt instruction.
+ *
+ * I wonder if this should be in libkern?
+ *
+ * XXX Stop the presses! Another one:
+ * static __inline u_int32_t
+ * popcnt1(u_int32_t v)
+ * {
+ * v -= ((v >> 1) & 0x55555555);
+ * v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
+ * v = (v + (v >> 4)) & 0x0F0F0F0F;
+ * return (v * 0x01010101) >> 24;
+ * }
+ * The downside is that it has a multiply. With a pentium3 with
+ * -mcpu=pentiumpro and -march=pentiumpro then gcc-3.1 will use
+ * an imull, and in that case it is faster. In most other cases
+ * it appears slightly slower.
+ *
+ * Another variant (also from fortune):
+ * #define BITCOUNT(x) (((BX_(x)+(BX_(x)>>4)) & 0x0F0F0F0F) % 255)
+ * #define BX_(x) ((x) - (((x)>>1)&0x77777777) \
+ * - (((x)>>2)&0x33333333) \
+ * - (((x)>>3)&0x11111111))
+ */
+static __inline uint32_t
+bitcount32(uint32_t x)
+{
+
+ x = (x & 0x55555555) + ((x & 0xaaaaaaaa) >> 1);
+ x = (x & 0x33333333) + ((x & 0xcccccccc) >> 2);
+ x = (x & 0x0f0f0f0f) + ((x & 0xf0f0f0f0) >> 4);
+ x = (x & 0x00ff00ff) + ((x & 0xff00ff00) >> 8);
+ x = (x & 0x0000ffff) + ((x & 0xffff0000) >> 16);
+ return (x);
+}
+
#endif /* !_SYS_SYSTM_H_ */
OpenPOWER on IntegriCloud