summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoralc <alc@FreeBSD.org>2017-09-30 19:54:30 +0000
committeralc <alc@FreeBSD.org>2017-09-30 19:54:30 +0000
commit6de538b4bedf70c504cbe3ad1afc7d774d94f63b (patch)
tree8088082fb5b304007607bed87bb676f75ed11caa
parent70383b3a940d90e7d084b81f97f0be5bf5b2fda4 (diff)
downloadFreeBSD-src-6de538b4bedf70c504cbe3ad1afc7d774d94f63b.zip
FreeBSD-src-6de538b4bedf70c504cbe3ad1afc7d774d94f63b.tar.gz
MFC r323391
To analyze the allocation of swap blocks by blist functions, add a method for analyzing the radix tree structures and reporting on the number, and sizes, of maximal intervals of free blocks. The report includes the number of maximal intervals, and also the number of them in each of several size ranges, from small (size 1, or 3 to 4) to large (28657 to 46367) with size boundaries defined by Fibonacci numbers. The report is written in the test tool with the 's' command, or in a running kernel by sysctl. The analysis of the radix tree frequently computes the position of the lone bit set in a u_daddr_t, a computation that also appears in leaf allocation. That computation has been moved into a function of its own, and optimized for cases where an inlined machine instruction can replace the usual binary search.
-rw-r--r--sys/kern/subr_blist.c243
-rw-r--r--sys/sys/blist.h3
-rw-r--r--sys/vm/swap_pager.c35
3 files changed, 269 insertions, 12 deletions
diff --git a/sys/kern/subr_blist.c b/sys/kern/subr_blist.c
index 533f71b..442444e 100644
--- a/sys/kern/subr_blist.c
+++ b/sys/kern/subr_blist.c
@@ -90,6 +90,7 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <sys/blist.h>
#include <sys/malloc.h>
+#include <sys/sbuf.h>
#include <sys/proc.h>
#include <sys/mutex.h>
@@ -101,6 +102,7 @@ __FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/malloc.h>
+#include <sys/sbuf.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -144,6 +146,7 @@ static MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space");
#endif
CTASSERT(BLIST_BMAP_RADIX % BLIST_META_RADIX == 0);
+#define BLIST_META_MASK (BLIST_META_RADIX - 1)
/*
* For a subtree that can represent the state of up to 'radix' blocks, the
@@ -166,7 +169,35 @@ radix_to_skip(daddr_t radix)
{
return (radix /
- ((BLIST_BMAP_RADIX / BLIST_META_RADIX) * (BLIST_META_RADIX - 1)));
+ ((BLIST_BMAP_RADIX / BLIST_META_RADIX) * BLIST_META_MASK));
+}
+
+/*
+ * Use binary search, or a faster method, to find the 1 bit in a u_daddr_t.
+ * Assumes that the argument has only one bit set.
+ */
+static inline int
+bitpos(u_daddr_t mask)
+{
+ int hi, lo, mid;
+
+ switch (sizeof(mask)) {
+#ifdef HAVE_INLINE_FFSLL
+ case sizeof(long long):
+ return (ffsll(mask) - 1);
+#endif
+ default:
+ lo = 0;
+ hi = BLIST_BMAP_RADIX;
+ while (lo + 1 < hi) {
+ mid = (lo + hi) >> 1;
+ if ((mask >> mid) != 0)
+ lo = mid;
+ else
+ hi = mid;
+ }
+ return (lo);
+ }
}
/*
@@ -340,6 +371,192 @@ blist_print(blist_t bl)
#endif
+static const u_daddr_t fib[] = {
+ 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584,
+ 4181, 6765, 10946, 17711, 28657, 46368, 75025, 121393, 196418, 317811,
+ 514229, 832040, 1346269, 2178309, 3524578,
+};
+
+/*
+ * Use 'gap' to describe a maximal range of unallocated blocks/bits.
+ */
+struct gap_stats {
+ daddr_t start; /* current gap start, or SWAPBLK_NONE */
+ daddr_t num; /* number of gaps observed */
+ daddr_t max; /* largest gap size */
+ daddr_t avg; /* average gap size */
+ daddr_t err; /* sum - num * avg */
+ daddr_t histo[nitems(fib)]; /* # gaps in each size range */
+ int max_bucket; /* last histo elt with nonzero val */
+};
+
+/*
+ * gap_stats_counting() - is the state 'counting 1 bits'?
+ * or 'skipping 0 bits'?
+ */
+static inline bool
+gap_stats_counting(const struct gap_stats *stats)
+{
+
+ return (stats->start != SWAPBLK_NONE);
+}
+
+/*
+ * init_gap_stats() - initialize stats on gap sizes
+ */
+static inline void
+init_gap_stats(struct gap_stats *stats)
+{
+
+ bzero(stats, sizeof(*stats));
+ stats->start = SWAPBLK_NONE;
+}
+
+/*
+ * update_gap_stats() - update stats on gap sizes
+ */
+static void
+update_gap_stats(struct gap_stats *stats, daddr_t posn)
+{
+ daddr_t size;
+ int hi, lo, mid;
+
+ if (!gap_stats_counting(stats)) {
+ stats->start = posn;
+ return;
+ }
+ size = posn - stats->start;
+ stats->start = SWAPBLK_NONE;
+ if (size > stats->max)
+ stats->max = size;
+
+ /*
+ * Find the fibonacci range that contains size,
+ * expecting to find it in an early range.
+ */
+ lo = 0;
+ hi = 1;
+ while (hi < nitems(fib) && fib[hi] <= size) {
+ lo = hi;
+ hi *= 2;
+ }
+ if (hi >= nitems(fib))
+ hi = nitems(fib);
+ while (lo + 1 != hi) {
+ mid = (lo + hi) >> 1;
+ if (fib[mid] <= size)
+ lo = mid;
+ else
+ hi = mid;
+ }
+ stats->histo[lo]++;
+ if (lo > stats->max_bucket)
+ stats->max_bucket = lo;
+ stats->err += size - stats->avg;
+ stats->num++;
+ stats->avg += stats->err / stats->num;
+ stats->err %= stats->num;
+}
+
+/*
+ * dump_gap_stats() - print stats on gap sizes
+ */
+static inline void
+dump_gap_stats(const struct gap_stats *stats, struct sbuf *s)
+{
+ int i;
+
+ sbuf_printf(s, "number of maximal free ranges: %jd\n",
+ (intmax_t)stats->num);
+ sbuf_printf(s, "largest free range: %jd\n", (intmax_t)stats->max);
+ sbuf_printf(s, "average maximal free range size: %jd\n",
+ (intmax_t)stats->avg);
+ sbuf_printf(s, "number of maximal free ranges of different sizes:\n");
+ sbuf_printf(s, " count | size range\n");
+ sbuf_printf(s, " ----- | ----------\n");
+ for (i = 0; i < stats->max_bucket; i++) {
+ if (stats->histo[i] != 0) {
+ sbuf_printf(s, "%20jd | ",
+ (intmax_t)stats->histo[i]);
+ if (fib[i] != fib[i + 1] - 1)
+ sbuf_printf(s, "%jd to %jd\n", (intmax_t)fib[i],
+ (intmax_t)fib[i + 1] - 1);
+ else
+ sbuf_printf(s, "%jd\n", (intmax_t)fib[i]);
+ }
+ }
+ sbuf_printf(s, "%20jd | ", (intmax_t)stats->histo[i]);
+ if (stats->histo[i] > 1)
+ sbuf_printf(s, "%jd to %jd\n", (intmax_t)fib[i],
+ (intmax_t)stats->max);
+ else
+ sbuf_printf(s, "%jd\n", (intmax_t)stats->max);
+}
+
+/*
+ * blist_stats() - dump radix tree stats
+ */
+void
+blist_stats(blist_t bl, struct sbuf *s)
+{
+ struct gap_stats gstats;
+ struct gap_stats *stats = &gstats;
+ daddr_t i, nodes, radix;
+ u_daddr_t bit, diff, mask;
+
+ init_gap_stats(stats);
+ nodes = 0;
+ i = bl->bl_radix;
+ while (i < bl->bl_radix + bl->bl_blocks) {
+ /*
+ * Find max size subtree starting at i.
+ */
+ radix = BLIST_BMAP_RADIX;
+ while (((i / radix) & BLIST_META_MASK) == 0)
+ radix *= BLIST_META_RADIX;
+
+ /*
+ * Check for skippable subtrees starting at i.
+ */
+ while (radix > BLIST_BMAP_RADIX) {
+ if (bl->bl_root[nodes].u.bmu_avail == 0) {
+ if (gap_stats_counting(stats))
+ update_gap_stats(stats, i);
+ break;
+ }
+ if (bl->bl_root[nodes].u.bmu_avail == radix) {
+ if (!gap_stats_counting(stats))
+ update_gap_stats(stats, i);
+ break;
+ }
+
+ /*
+ * Skip subtree root.
+ */
+ nodes++;
+ radix /= BLIST_META_RADIX;
+ }
+ if (radix == BLIST_BMAP_RADIX) {
+ /*
+ * Scan leaf.
+ */
+ mask = bl->bl_root[nodes].u.bmu_bitmap;
+ diff = mask ^ (mask << 1);
+ if (gap_stats_counting(stats))
+ diff ^= 1;
+ while (diff != 0) {
+ bit = diff & -diff;
+ update_gap_stats(stats, i + bitpos(bit));
+ diff ^= bit;
+ }
+ }
+ nodes += radix_to_skip(radix);
+ i += radix;
+ }
+ update_gap_stats(stats, i);
+ dump_gap_stats(stats, s);
+}
+
/************************************************************************
* ALLOCATION SUPPORT FUNCTIONS *
************************************************************************
@@ -355,13 +572,13 @@ blist_print(blist_t bl)
*
* This is the core of the allocator and is optimized for the
* BLIST_BMAP_RADIX block allocation case. Otherwise, execution
- * time is proportional to log2(count) + log2(BLIST_BMAP_RADIX).
+ * time is proportional to log2(count) + bitpos time.
*/
static daddr_t
blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count, daddr_t cursor)
{
u_daddr_t mask;
- int count1, hi, lo, mid, num_shifts, range1, range_ext;
+ int count1, lo, num_shifts, range1, range_ext;
if (count == BLIST_BMAP_RADIX) {
/*
@@ -419,17 +636,10 @@ blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count, daddr_t cursor)
/*
* The least significant set bit in mask marks the start of the first
* available range of sufficient size. Clear all the bits but that one,
- * and then perform a binary search to find its position.
+ * and then find its position.
*/
mask &= -mask;
- hi = BLIST_BMAP_RADIX - count1;
- while (lo + 1 < hi) {
- mid = (lo + hi) >> 1;
- if ((mask >> mid) != 0)
- lo = mid;
- else
- hi = mid;
- }
+ lo = bitpos(mask);
/*
* Set in mask exactly the bits being allocated, and clear them from
@@ -980,6 +1190,7 @@ main(int ac, char **av)
int size = 1024;
int i;
blist_t bl;
+ struct sbuf *s;
for (i = 1; i < ac; ++i) {
const char *ptr = av[i];
@@ -1014,6 +1225,13 @@ main(int ac, char **av)
case 'p':
blist_print(bl);
break;
+ case 's':
+ s = sbuf_new_auto();
+ blist_stats(bl, s);
+ sbuf_finish(s);
+ printf("%s", sbuf_data(s));
+ sbuf_delete(s);
+ break;
case 'a':
if (sscanf(buf + 1, "%lld", &count) == 1) {
daddr_t blk = blist_alloc(bl, count);
@@ -1041,6 +1259,7 @@ main(int ac, char **av)
case 'h':
puts(
"p -print\n"
+ "s -stats\n"
"a %d -allocate\n"
"f %x %d -free\n"
"l %x %d -fill\n"
diff --git a/sys/sys/blist.h b/sys/sys/blist.h
index 9fb2317..4afefd6 100644
--- a/sys/sys/blist.h
+++ b/sys/sys/blist.h
@@ -90,6 +90,8 @@ typedef struct blist {
#define BLIST_MAX_ALLOC BLIST_BMAP_RADIX
+struct sbuf;
+
daddr_t blist_alloc(blist_t blist, daddr_t count);
daddr_t blist_avail(blist_t blist);
blist_t blist_create(daddr_t blocks, int flags);
@@ -98,6 +100,7 @@ daddr_t blist_fill(blist_t bl, daddr_t blkno, daddr_t count);
void blist_free(blist_t blist, daddr_t blkno, daddr_t count);
void blist_print(blist_t blist);
void blist_resize(blist_t *pblist, daddr_t count, int freenew, int flags);
+void blist_stats(blist_t blist, struct sbuf *s);
#endif /* _SYS_BLIST_H_ */
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index f663ce1..f9a4f59 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -91,6 +91,7 @@ __FBSDID("$FreeBSD$");
#include <sys/resource.h>
#include <sys/resourcevar.h>
#include <sys/rwlock.h>
+#include <sys/sbuf.h>
#include <sys/sysctl.h>
#include <sys/sysproto.h>
#include <sys/blist.h>
@@ -322,6 +323,10 @@ static int sysctl_swap_async_max(SYSCTL_HANDLER_ARGS);
SYSCTL_PROC(_vm, OID_AUTO, swap_async_max, CTLTYPE_INT | CTLFLAG_RW |
CTLFLAG_MPSAFE, NULL, 0, sysctl_swap_async_max, "I",
"Maximum running async swap ops");
+static int sysctl_swap_fragmentation(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_vm, OID_AUTO, swap_fragmentation, CTLTYPE_STRING | CTLFLAG_RD |
+ CTLFLAG_MPSAFE, NULL, 0, sysctl_swap_fragmentation, "A",
+ "Swap Fragmentation Info");
static struct sx sw_alloc_sx;
@@ -779,6 +784,36 @@ swp_pager_freeswapspace(daddr_t blk, int npages)
}
/*
+ * SYSCTL_SWAP_FRAGMENTATION() - produce raw swap space stats
+ */
+static int
+sysctl_swap_fragmentation(SYSCTL_HANDLER_ARGS)
+{
+ struct sbuf sbuf;
+ struct swdevt *sp;
+ const char *devname;
+ int error;
+
+ error = sysctl_wire_old_buffer(req, 0);
+ if (error != 0)
+ return (error);
+ sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
+ mtx_lock(&sw_dev_mtx);
+ TAILQ_FOREACH(sp, &swtailq, sw_list) {
+ if (vn_isdisk(sp->sw_vp, NULL))
+ devname = devtoname(sp->sw_vp->v_rdev);
+ else
+ devname = "[file]";
+ sbuf_printf(&sbuf, "\nFree space on device %s:\n", devname);
+ blist_stats(sp->sw_blist, &sbuf);
+ }
+ mtx_unlock(&sw_dev_mtx);
+ error = sbuf_finish(&sbuf);
+ sbuf_delete(&sbuf);
+ return (error);
+}
+
+/*
* SWAP_PAGER_FREESPACE() - frees swap blocks associated with a page
* range within an object.
*
OpenPOWER on IntegriCloud