summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjhb <jhb@FreeBSD.org>2015-04-14 20:05:26 +0000
committerjhb <jhb@FreeBSD.org>2015-04-14 20:05:26 +0000
commit887f061ed2aee5cdb4d1feca2f366981de423082 (patch)
tree972b20fdf5104b2f360f75937fb17893c3d1afc8
parentf3aa91c0843bcc324941e3ce2dc188ea47ef3294 (diff)
downloadFreeBSD-src-887f061ed2aee5cdb4d1feca2f366981de423082.zip
FreeBSD-src-887f061ed2aee5cdb4d1feca2f366981de423082.tar.gz
MFC 278474,278476,280279:
Expand the bitcount*() API and use it to implement CPU_COUNT for cpusets. 278474: Use __builtin_popcnt() to implement a BIT_COUNT() operation for bitsets and use this to implement CPU_COUNT() to count the number of CPUs in a cpuset. 278476: Use __builtin_popcountl() instead of __builtin_popcount(). 280279: Expand the bitcount* API to support 64-bit integers, plain ints and longs and create a "hidden" API that can be used in other system headers without adding namespace pollution. - If the POPCNT instruction is enabled at compile time, use __builtin_popcount*() to implement __bitcount*(), otherwise fall back to software implementations. - Use the existing bitcount16() and bitcount32() from <sys/systm.h> to implement the non-POPCNT __bitcount16() and __bitcount32() in <sys/types.h>. - For the non-POPCNT __bitcount64(), use a similar SWAR method on 64-bit systems. For 32-bit systems, use two __bitcount32() operations on the two halves. - Use __bitcount32() to provide a __bitcount() that operates on plain ints. - Use either __bitcount32() or __bitcount64() to provide a __bitcountl() that operates on longs. - Add public bitcount*() wrappers for __bitcount*() for use in the kernel in <sys/libkern.h>. - Use __bitcountl() instead of __builtin_popcountl() in BIT_COUNT().
-rw-r--r--sys/sys/bitset.h10
-rw-r--r--sys/sys/cpuset.h1
-rw-r--r--sys/sys/libkern.h5
-rw-r--r--sys/sys/systm.h27
-rw-r--r--sys/sys/types.h62
5 files changed, 78 insertions, 27 deletions
diff --git a/sys/sys/bitset.h b/sys/sys/bitset.h
index e6c4dc3..d130522 100644
--- a/sys/sys/bitset.h
+++ b/sys/sys/bitset.h
@@ -176,4 +176,14 @@
__bit; \
})
+#define BIT_COUNT(_s, p) __extension__ ({ \
+ __size_t __i; \
+ int __count; \
+ \
+ __count = 0; \
+ for (__i = 0; __i < __bitset_words((_s)); __i++) \
+ __count += __bitcountl((p)->__bits[__i]); \
+ __count; \
+})
+
#endif /* !_SYS_BITSET_H_ */
diff --git a/sys/sys/cpuset.h b/sys/sys/cpuset.h
index f3ec3da..dfa2d31 100644
--- a/sys/sys/cpuset.h
+++ b/sys/sys/cpuset.h
@@ -60,6 +60,7 @@
#define CPU_OR_ATOMIC(d, s) BIT_OR_ATOMIC(CPU_SETSIZE, d, s)
#define CPU_COPY_STORE_REL(f, t) BIT_COPY_STORE_REL(CPU_SETSIZE, f, t)
#define CPU_FFS(p) BIT_FFS(CPU_SETSIZE, p)
+#define CPU_COUNT(p) BIT_COUNT(CPU_SETSIZE, p)
/*
* Valid cpulevel_t values.
diff --git a/sys/sys/libkern.h b/sys/sys/libkern.h
index 5f850fb..0294537 100644
--- a/sys/sys/libkern.h
+++ b/sys/sys/libkern.h
@@ -97,6 +97,11 @@ int flsl(long);
#ifndef HAVE_INLINE_FLSLL
int flsll(long long);
#endif
+#define bitcount64(x) __bitcount64((uint64_t)(x))
+#define bitcount32(x) __bitcount32((uint32_t)(x))
+#define bitcount16(x) __bitcount16((uint16_t)(x))
+#define bitcountl(x) __bitcountl((u_long)(x))
+#define bitcount(x) __bitcount((u_int)(x))
int fnmatch(const char *, const char *, int);
int locc(int, char *, u_int);
diff --git a/sys/sys/systm.h b/sys/sys/systm.h
index bb0d64f..f4d2e92 100644
--- a/sys/sys/systm.h
+++ b/sys/sys/systm.h
@@ -424,33 +424,6 @@ int alloc_unr_specific(struct unrhdr *uh, u_int item);
int alloc_unrl(struct unrhdr *uh);
void free_unr(struct unrhdr *uh, u_int item);
-/*
- * Population count algorithm using SWAR approach
- * - "SIMD Within A Register".
- */
-static __inline uint32_t
-bitcount32(uint32_t x)
-{
-
- x = (x & 0x55555555) + ((x & 0xaaaaaaaa) >> 1);
- x = (x & 0x33333333) + ((x & 0xcccccccc) >> 2);
- x = (x + (x >> 4)) & 0x0f0f0f0f;
- x = (x + (x >> 8));
- x = (x + (x >> 16)) & 0x000000ff;
- return (x);
-}
-
-static __inline uint16_t
-bitcount16(uint32_t x)
-{
-
- x = (x & 0x5555) + ((x & 0xaaaa) >> 1);
- x = (x & 0x3333) + ((x & 0xcccc) >> 2);
- x = (x + (x >> 4)) & 0x0f0f;
- x = (x + (x >> 8)) & 0x00ff;
- return (x);
-}
-
void intr_prof_stack_use(struct thread *td, struct trapframe *frame);
#endif /* !_SYS_SYSTM_H_ */
diff --git a/sys/sys/types.h b/sys/sys/types.h
index 03eaa60..b6c4c89 100644
--- a/sys/sys/types.h
+++ b/sys/sys/types.h
@@ -289,6 +289,68 @@ typedef _Bool bool;
#include <sys/select.h>
+#ifdef __POPCNT__
+#define __bitcount64(x) __builtin_popcountll((__uint64_t)(x))
+#define __bitcount32(x) __builtin_popcount((__uint32_t)(x))
+#define __bitcount16(x) __builtin_popcount((__uint16_t)(x))
+#define __bitcountl(x) __builtin_popcountl((unsigned long)(x))
+#define __bitcount(x) __builtin_popcount((unsigned int)(x))
+#else
+/*
+ * Population count algorithm using SWAR approach
+ * - "SIMD Within A Register".
+ */
+static __inline __uint16_t
+__bitcount16(__uint16_t _x)
+{
+
+ _x = (_x & 0x5555) + ((_x & 0xaaaa) >> 1);
+ _x = (_x & 0x3333) + ((_x & 0xcccc) >> 2);
+ _x = (_x + (_x >> 4)) & 0x0f0f;
+ _x = (_x + (_x >> 8)) & 0x00ff;
+ return (_x);
+}
+
+static __inline __uint32_t
+__bitcount32(__uint32_t _x)
+{
+
+ _x = (_x & 0x55555555) + ((_x & 0xaaaaaaaa) >> 1);
+ _x = (_x & 0x33333333) + ((_x & 0xcccccccc) >> 2);
+ _x = (_x + (_x >> 4)) & 0x0f0f0f0f;
+ _x = (_x + (_x >> 8));
+ _x = (_x + (_x >> 16)) & 0x000000ff;
+ return (_x);
+}
+
+#ifdef __LP64__
+static __inline __uint64_t
+__bitcount64(__uint64_t _x)
+{
+
+ _x = (_x & 0x5555555555555555) + ((_x & 0xaaaaaaaaaaaaaaaa) >> 1);
+ _x = (_x & 0x3333333333333333) + ((_x & 0xcccccccccccccccc) >> 2);
+ _x = (_x + (_x >> 4)) & 0x0f0f0f0f0f0f0f0f;
+ _x = (_x + (_x >> 8));
+ _x = (_x + (_x >> 16));
+ _x = (_x + (_x >> 32)) & 0x000000ff;
+ return (_x);
+}
+
+#define __bitcountl(x) __bitcount64((unsigned long)(x))
+#else
+static __inline __uint64_t
+__bitcount64(__uint64_t _x)
+{
+
+ return (__bitcount32(_x >> 32) + __bitcount32(_x));
+}
+
+#define __bitcountl(x) __bitcount32((unsigned long)(x))
+#endif
+#define __bitcount(x) __bitcount32((unsigned int)(x))
+#endif
+
/*
* minor() gives a cookie instead of an index since we don't want to
* change the meanings of bits 0-15 or waste time and space shifting
OpenPOWER on IntegriCloud