Use extents rather than binary buddies to track free pages within

chunks. This allows runs to be any multiple of the page size. The primary advantage is that large objects are no longer constrained to be 2^n pages, which can dramatically decrease internal fragmentation for large objects. This also allows the sizes for runs that back small objects to be more finely tuned. Free runs are searched for linearly using the chunk page map (with the help of some heuristic optimizations). This changes the allocation policy from "first best fit" to "first fit". A prototype red-black tree implementation for tracking free runs that implemented "first best fit" did not cause a measurable speed or memory usage difference for realistic chunk sizes (though of course it is possible to construct benchmarks that favor one allocation policy over another). Refine the handling of fullness constraints for small runs to be more tunable. Restructure the per chunk page map to contain only two fields per entry, rather than four. Also, increase each entry from 4 to 8 bytes, since it allows for 32-bit integers, without increasing the number of chunk header pages. Relax the maximum chunk size constraint. This is of no practical interest; it is merely fallout from the chunk page map restructuring. Revamp statistics gathering and reporting to be faster, clearer and more informative. Statistics gathering is fast enough now to have little to no impact on application speed, but it still requires approximately two extra pages of memory per arena (per process). This memory overhead may be acceptable for most systems, but we still need to leave statistics gathering disabled by default in RELENG branches. Rename NO_MALLOC_EXTRAS to MALLOC_PRODUCTION in order to make its intent clearer (i.e. it should be defined in RELENG branches).
author: jasone <jasone@FreeBSD.org> 2007-03-23 05:05:48 +0000
committer: jasone <jasone@FreeBSD.org> 2007-03-23 05:05:48 +0000
commit: a1325e04bab8daf2ca1b3f6dc2d9ed6eef96f5f3 (patch)
tree: fc166cec1bf4e69eab990c85ba88e338243b8ba4 /lib/libc/stdlib
parent: 1639f74854940b93f384f48a250acd15ab50a812 (diff)
download: FreeBSD-src-a1325e04bab8daf2ca1b3f6dc2d9ed6eef96f5f3.zip
FreeBSD-src-a1325e04bab8daf2ca1b3f6dc2d9ed6eef96f5f3.tar.gz
1 files changed, 332 insertions, 323 deletions
diff --git a/lib/libc/stdlib/malloc.c b/lib/libc/stdlib/malloc.c
index 966e1d5..c631dda 100644
--- a/lib/libc/stdlib/malloc.c
+++ b/lib/libc/stdlib/malloc.c
@@ -72,11 +72,10 @@
  *   |                           |    ... |
  *   |                           | 256 kB |
  *   |                           | 512 kB |
- *   |                           |   1 MB |
  *   |====================================|
- *   | Huge                      |   2 MB |
- *   |                           |   4 MB |
- *   |                           |   6 MB |
+ *   | Huge                      |   1 MB |
+ *   |                           |   2 MB |
+ *   |                           |   3 MB |
  *   |                           |    ... |
  *   |====================================|
  *
@@ -174,13 +173,13 @@
 /******************************************************************************/
 
 /*
- * In order to disable various extra features that may have negative
- * performance impacts, (assertions, expanded statistics), define
- * NO_MALLOC_EXTRAS.
+ * MALLOC_PRODUCTION disables assertions and statistics gathering.  It also
+ * defaults the A and J runtime options to off.  These settings are appropriate
+ * for production systems.
  */
-/* #define NO_MALLOC_EXTRAS */
+/* #define MALLOC_PRODUCTION */
 
-#ifndef NO_MALLOC_EXTRAS
+#ifndef MALLOC_PRODUCTION
 #  define MALLOC_DEBUG
 #endif
 
@@ -222,11 +221,8 @@ __FBSDID("$FreeBSD$");
 
 #include "un-namespace.h"
 
-/*
- * Calculate statistics that can be used to get an idea of how well caching is
- * working.
- */
-#ifndef NO_MALLOC_EXTRAS
+/* MALLOC_STATS enables statistics calculation. */
+#ifndef MALLOC_PRODUCTION
 #  define MALLOC_STATS
 #endif
 
@@ -298,7 +294,6 @@ __FBSDID("$FreeBSD$");
  * memory system.
  */
 #define	CHUNK_2POW_DEFAULT	20
-#define	CHUNK_2POW_MAX		28
 
 /*
  * Maximum size of L1 cache line.  This is used to avoid cache line aliasing,
@@ -327,12 +322,15 @@ __FBSDID("$FreeBSD$");
  *
  * Note that it is possible to set this low enough that it cannot be honored
  * for some/all object sizes, since there is one bit of header overhead per
- * object (plus a constant).  In such cases, this value is iteratively doubled
- * in order to make sure that the overhead limit is achievable.
+ * object (plus a constant).  In such cases, this constraint is relaxed.
+ *
+ * RUN_MAX_OVRHD_RELAX specifies the maximum number of bits per region of
+ * overhead for which RUN_MAX_OVRHD is relaxed.
  */
-#define RUN_MAX_HDR_OVERHEAD	0.005
+#define RUN_MAX_OVRHD		0.015
+#define RUN_MAX_OVRHD_RELAX	1.5
 
-/* Put a cap on small object run size.  This overrides RUN_MAX_HDR_OVERHEAD. */
+/* Put a cap on small object run size.  This overrides RUN_MAX_OVRHD. */
 #define RUN_MAX_SMALL_2POW	16
 #define RUN_MAX_SMALL		(1 << RUN_MAX_SMALL_2POW)
 
@@ -385,16 +383,17 @@ struct malloc_bin_stats_s {
 
 typedef struct arena_stats_s arena_stats_t;
 struct arena_stats_s {
-	/* Total byte count of allocated memory, not including overhead. */
-	size_t		allocated;
+	/* Number of bytes currently mapped. */
+	size_t		mapped;
 
-	/* Number of times each function was called. */
-	uint64_t	nmalloc;
-	uint64_t	ndalloc;
-	uint64_t	nmadvise;
+	/* Per-size-category statistics. */
+	size_t		allocated_small;
+	uint64_t	nmalloc_small;
+	uint64_t	ndalloc_small;
 
-	/* Number of large allocation requests. */
-	uint64_t	large_nrequests;
+	size_t		allocated_large;
+	uint64_t	nmalloc_large;
+	uint64_t	ndalloc_large;
 };
 
 typedef struct chunk_stats_s chunk_stats_t;
@@ -450,10 +449,18 @@ typedef struct arena_bin_s arena_bin_t;
 
 typedef struct arena_chunk_map_s arena_chunk_map_t;
 struct arena_chunk_map_s {
-	bool		free:1;
-	bool		large:1;
-	unsigned	npages:15; /* Limiting factor for CHUNK_2POW_MAX. */
-	unsigned	pos:15;
+	/* Number of pages in run. */
+	uint32_t	npages;
+	/*
+	 * Position within run.  For a free run, this is POS_FREE for the first
+	 * and last pages.  The POS_FREE special value makes it possible to
+	 * quickly coalesce free runs.
+	 *
+	 * This is the limiting factor for chunk_size; there can be at most 2^31
+	 * pages in a run.
+	 */
+#define POS_FREE 0xffffffffU
+	uint32_t	pos;
 };
 
 /* Arena chunk header. */
@@ -472,24 +479,25 @@ struct arena_chunk_s {
 	uint32_t pages_used;
 
 	/*
-	 * Array of counters that keeps track of how many free runs of each
-	 * size are available in this chunk.  This table is sized at compile
-	 * time, which is wasteful.  However, due to unrelated rounding, this
-	 * doesn't actually waste any otherwise useful space.
-	 *
-	 *   index == 2^n pages
-	 *
-	 *   index | npages
-	 *   ------+-------
-	 *       0 |      1
-	 *       1 |      2
-	 *       2 |      4
-	 *       3 |      8
-	 *         :
+	 * Every time a free run larger than this value is created/coalesced,
+	 * this value is increased.  The only way that the value decreases is if
+	 * arena_run_alloc() fails to find a free run as large as advertised by
+	 * this value.
+	 */
+	uint32_t max_frun_npages;
+
+	/*
+	 * Every time a free run that starts at an earlier page than this value
+	 * is created/coalesced, this value is decreased.  It is reset in a
+	 * similar fashion to max_frun_npages.
 	 */
-	uint32_t nfree_runs[CHUNK_2POW_MAX/* - PAGE_SHIFT */];
+	uint32_t min_frun_ind;
 
-	/* Map of pages within chunk that keeps track of free/large/small. */
+	/*
+	 * Map of pages within chunk that keeps track of free/large/small.  For
+	 * free runs, only the map entries for the first and last pages are
+	 * kept up to date, so that free runs can be quickly coalesced.
+	 */
 	arena_chunk_map_t map[1]; /* Dynamically sized. */
 };
 typedef struct arena_chunk_tree_s arena_chunk_tree_t;
@@ -680,6 +688,7 @@ static unsigned		ncpus;
 
 /* VM page size. */
 static unsigned		pagesize;
+static unsigned		pagesize_mask;
 static unsigned		pagesize_2pow;
 
 /* Various bin-related settings. */
@@ -697,8 +706,9 @@ static size_t		quantum_mask; /* (quantum - 1). */
 /* Various chunk-related settings. */
 static size_t		chunk_size;
 static size_t		chunk_size_mask; /* (chunk_size - 1). */
+static unsigned		chunk_npages;
+static unsigned		arena_chunk_header_npages;
 static size_t		arena_maxclass; /* Max size class for arenas. */
-static unsigned		arena_chunk_maplen;
 
 /********/
 /*
@@ -731,7 +741,7 @@ static void		*brk_max;
 
 #ifdef MALLOC_STATS
 /*
- * Byte counters for allocated/total space used by the chunks in the huge
+ * Byte counters for allocated/mapped space used by the chunks in the huge
  * allocations tree.
  */
 static uint64_t		huge_nmalloc;
@@ -761,7 +771,7 @@ static void		*base_past_addr; /* Addr immediately past base_chunk. */
 static chunk_node_t	*base_chunk_nodes; /* LIFO cache of chunk nodes. */
 static malloc_mutex_t	base_mtx;
 #ifdef MALLOC_STATS
-static uint64_t		base_total;
+static size_t		base_mapped;
 #endif
 
 /********/
@@ -799,7 +809,7 @@ static chunk_stats_t	stats_chunks;
  */
 const char	*_malloc_options;
 
-#ifndef NO_MALLOC_EXTRAS
+#ifndef MALLOC_PRODUCTION
 static bool	opt_abort = true;
 static bool	opt_junk = true;
 #else
@@ -855,15 +865,14 @@ static void	chunk_dealloc(void *chunk, size_t size);
 #ifndef NO_TLS
 static arena_t	*choose_arena_hard(void);
 #endif
-static void	arena_run_split(arena_t *arena, arena_run_t *run, bool large,
-    size_t size);
+static void	arena_run_split(arena_t *arena, arena_run_t *run, size_t size);
 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
 static void	arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
 static void	arena_bin_run_promote(arena_t *arena, arena_bin_t *bin,
     arena_run_t *run);
 static void	arena_bin_run_demote(arena_t *arena, arena_bin_t *bin,
     arena_run_t *run);
-static arena_run_t *arena_run_alloc(arena_t *arena, bool large, size_t size);
+static arena_run_t *arena_run_alloc(arena_t *arena, size_t size);
 static void	arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size);
 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
 static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
@@ -946,6 +955,10 @@ malloc_mutex_unlock(malloc_mutex_t *a_mutex)
 #define	QUANTUM_CEILING(a)						\
 	(((a) + quantum_mask) & ~quantum_mask)
 
+/* Return the smallest pagesize multiple that is >= s. */
+#define	PAGE_CEILING(s)							\
+	(((s) + pagesize_mask) & ~pagesize_mask)
+
 /* Compute the smallest power of 2 that is >= x. */
 static inline size_t
 pow2_ceil(size_t x)
@@ -1061,7 +1074,7 @@ base_chunk_alloc(size_t minsize)
 				base_past_addr = (void *)((uintptr_t)base_chunk
 				    + incr);
 #ifdef MALLOC_STATS
-				base_total += incr;
+				base_mapped += incr;
 #endif
 				return (false);
 			}
@@ -1080,7 +1093,7 @@ base_chunk_alloc(size_t minsize)
 	base_next_addr = base_chunk;
 	base_past_addr = (void *)((uintptr_t)base_chunk + chunk_size);
 #ifdef MALLOC_STATS
-	base_total += chunk_size;
+	base_mapped += chunk_size;
 #endif
 	return (false);
 }
@@ -1154,19 +1167,22 @@ stats_print(arena_t *arena)
 	unsigned i;
 	int gap_start;
 
-	malloc_printf("allocated: %zu\n", arena->stats.allocated);
-
-	malloc_printf("calls:\n");
-	malloc_printf(" %12s %12s %12s\n", "nmalloc","ndalloc", "nmadvise");
-	malloc_printf(" %12llu %12llu %12llu\n",
-	    arena->stats.nmalloc, arena->stats.ndalloc, arena->stats.nmadvise);
-
-	malloc_printf("large requests: %llu\n", arena->stats.large_nrequests);
-
-	malloc_printf("bins:\n");
-	malloc_printf("%13s %1s %4s %5s %6s %9s %5s %6s %7s %6s %6s\n",
-	    "bin", "", "size", "nregs", "run_sz", "nrequests", "nruns",
-	    "hiruns", "curruns", "npromo", "ndemo");
+	malloc_printf(
+	    "          allocated/mapped            nmalloc      ndalloc\n");
+	malloc_printf("small: %12llu %-12s %12llu %12llu\n",
+	    arena->stats.allocated_small, "", arena->stats.nmalloc_small,
+	    arena->stats.ndalloc_small);
+	malloc_printf("large: %12llu %-12s %12llu %12llu\n",
+	    arena->stats.allocated_large, "", arena->stats.nmalloc_large,
+	    arena->stats.ndalloc_large);
+	malloc_printf("total: %12llu/%-12llu %12llu %12llu\n",
+	    arena->stats.allocated_small + arena->stats.allocated_large,
+	    arena->stats.mapped,
+	    arena->stats.nmalloc_small + arena->stats.nmalloc_large,
+	    arena->stats.ndalloc_small + arena->stats.ndalloc_large);
+
+	malloc_printf("bins:     bin   size regs pgs  requests newruns "
+	    "maxruns curruns promote  demote\n");
 	for (i = 0, gap_start = -1; i < ntbins + nqbins + nsbins; i++) {
 		if (arena->bins[i].stats.nrequests == 0) {
 			if (gap_start == -1)
@@ -1184,13 +1200,13 @@ stats_print(arena_t *arena)
 				gap_start = -1;
 			}
 			malloc_printf(
-			    "%13u %1s %4u %5u %6u %9llu %5llu"
-			    " %6lu %7lu %6llu %6llu\n",
+			    "%13u %1s %4u %4u %3u %9llu %7llu"
+			    " %7lu %7lu %7llu %7llu\n",
 			    i,
 			    i < ntbins ? "T" : i < ntbins + nqbins ? "Q" : "S",
 			    arena->bins[i].reg_size,
 			    arena->bins[i].nregs,
-			    arena->bins[i].run_size,
+			    arena->bins[i].run_size >> pagesize_2pow,
 			    arena->bins[i].stats.nrequests,
 			    arena->bins[i].stats.nruns,
 			    arena->bins[i].stats.highruns,
@@ -1776,57 +1792,44 @@ arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
 }
 
 static void
-arena_run_split(arena_t *arena, arena_run_t *run, bool large, size_t size)
+arena_run_split(arena_t *arena, arena_run_t *run, size_t size)
 {
 	arena_chunk_t *chunk;
-	unsigned run_ind, map_offset, total_pages, need_pages;
-	unsigned i, log2_run_pages, run_pages;
+	unsigned run_ind, map_offset, total_pages, need_pages, rem_pages;
+	unsigned i;
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
 	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
 	    >> pagesize_2pow);
-	assert(chunk->map[run_ind].free);
 	total_pages = chunk->map[run_ind].npages;
 	need_pages = (size >> pagesize_2pow);
-
-	assert(chunk->map[run_ind].free);
-	assert(chunk->map[run_ind].large == false);
-	assert(chunk->map[run_ind].npages == total_pages);
+	assert(need_pages <= total_pages);
+	rem_pages = total_pages - need_pages;
 
 	/* Split enough pages from the front of run to fit allocation size. */
 	map_offset = run_ind;
 	for (i = 0; i < need_pages; i++) {
-		chunk->map[map_offset + i].free = false;
-		chunk->map[map_offset + i].large = large;
 		chunk->map[map_offset + i].npages = need_pages;
 		chunk->map[map_offset + i].pos = i;
 	}
 
-	/* Update map for trailing pages. */
-	map_offset += need_pages;
-	while (map_offset < run_ind + total_pages) {
-		log2_run_pages = ffs((int)map_offset) - 1;
-		run_pages = (1 << log2_run_pages);
-
-		chunk->map[map_offset].free = true;
-		chunk->map[map_offset].large = false;
-		chunk->map[map_offset].npages = run_pages;
-
-		chunk->nfree_runs[log2_run_pages]++;
-
-		map_offset += run_pages;
+	/* Keep track of trailing unused pages for later use. */
+	if (rem_pages > 0) {
+		/* Update map for trailing pages. */
+		map_offset += need_pages;
+		chunk->map[map_offset].npages = rem_pages;
+		chunk->map[map_offset].pos = POS_FREE;
+		chunk->map[map_offset + rem_pages - 1].npages = rem_pages;
+		chunk->map[map_offset + rem_pages - 1].pos = POS_FREE;
 	}
 
-	chunk->pages_used += (size >> pagesize_2pow);
+	chunk->pages_used += need_pages;
 }
 
 static arena_chunk_t *
 arena_chunk_alloc(arena_t *arena)
 {
 	arena_chunk_t *chunk;
-	unsigned i, j, header_npages, pow2_header_npages, map_offset;
-	unsigned log2_run_pages, run_pages;
-	size_t header_size;
 
 	if (arena->spare != NULL) {
 		chunk = arena->spare;
@@ -1837,6 +1840,9 @@ arena_chunk_alloc(arena_t *arena)
 		chunk = (arena_chunk_t *)chunk_alloc(chunk_size);
 		if (chunk == NULL)
 			return (NULL);
+#ifdef MALLOC_STATS
+		arena->stats.mapped += chunk_size;
+#endif
 
 		chunk->arena = arena;
 
@@ -1848,58 +1854,19 @@ arena_chunk_alloc(arena_t *arena)
 		 */
 		chunk->pages_used = 0;
 
-		memset(&chunk->nfree_runs, 0, sizeof(chunk->nfree_runs));
-
-		header_size =
-		    (size_t)((uintptr_t)&chunk->map[arena_chunk_maplen] -
-		    (uintptr_t)chunk);
-		if (header_size % pagesize != 0) {
-			/* Round up to the nearest page boundary. */
-			header_size += pagesize - (header_size % pagesize);
-		}
-
-		header_npages = header_size >> pagesize_2pow;
-		pow2_header_npages = pow2_ceil(header_npages);
+		chunk->max_frun_npages = chunk_npages -
+		    arena_chunk_header_npages;
+		chunk->min_frun_ind = arena_chunk_header_npages;
 
 		/*
-		 * Iteratively mark runs as in use, until we've spoken for the
-		 * entire header.
+		 * Initialize enough of the map to support one maximal free run.
 		 */
-		map_offset = 0;
-		for (i = 0; header_npages > 0; i++) {
-			if ((pow2_header_npages >> i) <= header_npages) {
-				for (j = 0; j < (pow2_header_npages >> i);
-				    j++) {
-					chunk->map[map_offset + j].free =
-					    false;
-					chunk->map[map_offset + j].large =
-					    false;
-					chunk->map[map_offset + j].npages =
-					    (pow2_header_npages >> i);
-					chunk->map[map_offset + j].pos = j;
-				}
-				header_npages -= (pow2_header_npages >> i);
-				map_offset += (pow2_header_npages >> i);
-			}
-		}
-
-		/*
-		 * Finish initializing map.  The chunk header takes up some
-		 * space at the beginning of the chunk, which we just took care
-		 * of by "allocating" the leading pages.
-		 */
-		while (map_offset < (chunk_size >> pagesize_2pow)) {
-			log2_run_pages = ffs((int)map_offset) - 1;
-			run_pages = (1 << log2_run_pages);
-
-			chunk->map[map_offset].free = true;
-			chunk->map[map_offset].large = false;
-			chunk->map[map_offset].npages = run_pages;
-
-			chunk->nfree_runs[log2_run_pages]++;
-
-			map_offset += run_pages;
-		}
+		chunk->map[arena_chunk_header_npages].npages = chunk_npages -
+		    arena_chunk_header_npages;
+		chunk->map[arena_chunk_header_npages].pos = POS_FREE;
+		chunk->map[chunk_npages - 1].npages = chunk_npages -
+		    arena_chunk_header_npages;
+		chunk->map[chunk_npages - 1].pos = POS_FREE;
 	}
 
 	return (chunk);
@@ -1916,12 +1883,19 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
 	RB_REMOVE(arena_chunk_tree_s, &chunk->arena->chunks, chunk);
 
 	if (opt_hint == false) {
-		if (arena->spare != NULL)
+		if (arena->spare != NULL) {
 			chunk_dealloc((void *)arena->spare, chunk_size);
+#ifdef MALLOC_STATS
+			arena->stats.mapped -= chunk_size;
+#endif
+		}
 		arena->spare = chunk;
 	} else {
 		assert(arena->spare == NULL);
 		chunk_dealloc((void *)chunk, chunk_size);
+#ifdef MALLOC_STATS
+		arena->stats.mapped -= chunk_size;
+#endif
 	}
 }
 
@@ -1981,6 +1955,13 @@ arena_bin_run_promote(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
 			 * pathological edge cases.
 			 */
 			run->quartile++;
+#ifdef MALLOC_STATS
+			/*
+			 * Count as a double promotion, in order to keep
+			 * promotions and demotions symmetric.
+			 */
+			bin->stats.npromote++;
+#endif
 			/* Fall through. */
 		case RUN_Q100:
 			qr_remove(run, link);
@@ -2068,140 +2049,154 @@ arena_bin_run_demote(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
 }
 
 static arena_run_t *
-arena_run_alloc(arena_t *arena, bool large, size_t size)
+arena_run_alloc(arena_t *arena, size_t size)
 {
-	arena_run_t *run;
-	unsigned min_ind, i, j;
 	arena_chunk_t *chunk;
-#ifndef NDEBUG
-	int rep = 0;
-#endif
-
-	assert(size <= arena_maxclass);
+	arena_run_t *run;
+	unsigned need_npages, limit_pages, compl_need_npages;
 
-AGAIN:
-#ifndef NDEBUG
-	rep++;
-	assert(rep <= 2);
-#endif
+	assert(size <= (chunk_size - (arena_chunk_header_npages <<
+	    pagesize_2pow)));
 
 	/*
-	 * Search through arena's chunks in address order for a run that is
-	 * large enough.  Look for a precise fit, but do not pass up a chunk
-	 * that has a run which is large enough to split.
+	 * Search through arena's chunks in address order for a free run that is
+	 * large enough.  Look for the first fit.
 	 */
-	min_ind = ffs((int)(size >> pagesize_2pow)) - 1;
+	need_npages = (size >> pagesize_2pow);
+	limit_pages = chunk_npages - arena_chunk_header_npages;
+	compl_need_npages = limit_pages - need_npages;
 	RB_FOREACH(chunk, arena_chunk_tree_s, &arena->chunks) {
-		for (i = min_ind;
-		    i < (opt_chunk_2pow - pagesize_2pow);
-		    i++) {
-			if (chunk->nfree_runs[i] > 0) {
-				arena_chunk_map_t *map = chunk->map;
-
-				/* Scan chunk's map for free run. */
-				for (j = 0;
-				    j < arena_chunk_maplen;
-				    j += map[j].npages) {
-					if (map[j].free
-					    && map[j].npages == (1 << i))
-		{/*<----------------------------*/
-			run = (arena_run_t *)&((char *)chunk)[j
-			    << pagesize_2pow];
-
-			assert(chunk->nfree_runs[i] > 0);
-			chunk->nfree_runs[i]--;
-
-			/* Update page map. */
-			arena_run_split(arena, run, large, size);
-
-			return (run);
-		}/*---------------------------->*/
+		/*
+		 * Avoid searching this chunk if there are not enough
+		 * contiguous free pages for there to possibly be a large
+		 * enough free run.
+		 */
+		if (chunk->pages_used <= compl_need_npages &&
+		    need_npages <= chunk->max_frun_npages) {
+			arena_chunk_map_t *mapelm;
+			unsigned i;
+			uint32_t max_frun_npages = 0;
+			uint32_t min_frun_ind = chunk_npages;
+
+			assert(chunk->min_frun_ind >=
+			    arena_chunk_header_npages);
+			for (i = chunk->min_frun_ind; i < chunk_npages;) {
+				mapelm = &chunk->map[i];
+				if (mapelm->pos == POS_FREE) {
+					if (mapelm->npages >= need_npages) {
+						run = (arena_run_t *)
+						    ((uintptr_t)chunk + (i <<
+						    pagesize_2pow));
+						/* Update page map. */
+						arena_run_split(arena, run,
+						    size);
+						return (run);
+					}
+					if (mapelm->npages >
+					    max_frun_npages) {
+						max_frun_npages =
+						    mapelm->npages;
+					}
+					if (i < min_frun_ind) {
+						min_frun_ind = i;
+						if (i < chunk->min_frun_ind)
+							chunk->min_frun_ind = i;
+					}
 				}
-				/* Not reached. */
-				assert(0);
+				i += mapelm->npages;
 			}
+			/*
+			 * Search failure.  Reset cached chunk->max_frun_npages.
+			 * chunk->min_frun_ind was already reset above (if
+			 * necessary).
+			 */
+			chunk->max_frun_npages = max_frun_npages;
 		}
 	}
 
 	/* No usable runs.  Allocate a new chunk, then try again. */
-	if (arena_chunk_alloc(arena) == NULL)
+	chunk = arena_chunk_alloc(arena);
+	if (chunk == NULL)
 		return (NULL);
-	goto AGAIN;
+	run = (arena_run_t *)((uintptr_t)chunk + (arena_chunk_header_npages <<
+	    pagesize_2pow));
+	/* Update page map. */
+	arena_run_split(arena, run, size);
+	return (run);
 }
 
 static void
 arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size)
 {
 	arena_chunk_t *chunk;
-	unsigned run_ind, buddy_ind, base_run_ind, run_pages, log2_run_pages;
+	unsigned run_ind, run_pages;
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
+
 	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
 	    >> pagesize_2pow);
 	run_pages = (size >> pagesize_2pow);
-	log2_run_pages = ffs((int)run_pages) - 1;
-	assert(run_pages > 0);
+	assert(run_pages == chunk->map[run_ind].npages);
 
 	/* Subtract pages from count of pages used in chunk. */
 	chunk->pages_used -= run_pages;
 
 	/* Mark run as deallocated. */
-	chunk->map[run_ind].free = true;
-	chunk->map[run_ind].large = false;
-	chunk->map[run_ind].npages = run_pages;
+	assert(chunk->map[run_ind].npages == run_pages);
+	chunk->map[run_ind].pos = POS_FREE;
+	assert(chunk->map[run_ind + run_pages - 1].npages == run_pages);
+	chunk->map[run_ind + run_pages - 1].pos = POS_FREE;
 
 	/*
 	 * Tell the kernel that we don't need the data in this run, but only if
 	 * requested via runtime configuration.
 	 */
-	if (opt_hint) {
+	if (opt_hint)
 		madvise(run, size, MADV_FREE);
-#ifdef MALLOC_STATS
-		arena->stats.nmadvise += (size >> pagesize_2pow);
-#endif
-	}
 
-	/*
-	 * Iteratively coalesce with buddies.  Conceptually, the buddy scheme
-	 * induces a tree on the set of pages.  If we know the number of pages
-	 * in the subtree rooted at the current node, we can quickly determine
-	 * whether a run is the left or right buddy, and then calculate the
-	 * buddy's index.
-	 */
-	for (;
-	    (run_pages = (1 << log2_run_pages)) < arena_chunk_maplen;
-	    log2_run_pages++) {
-		if (((run_ind >> log2_run_pages) & 1) == 0) {
-			/* Current run precedes its buddy. */
-			buddy_ind = run_ind + run_pages;
-			base_run_ind = run_ind;
-		} else {
-			/* Current run follows its buddy. */
-			buddy_ind = run_ind - run_pages;
-			base_run_ind = buddy_ind;
-		}
+	/* Try to coalesce with neighboring runs. */
+	if (run_ind > arena_chunk_header_npages &&
+	    chunk->map[run_ind - 1].pos == POS_FREE) {
+		unsigned prev_npages;
 
-		if (chunk->map[buddy_ind].free == false
-		    || chunk->map[buddy_ind].npages != run_pages)
-			break;
+		/* Coalesce with previous run. */
+		prev_npages = chunk->map[run_ind - 1].npages;
+		run_ind -= prev_npages;
+		assert(chunk->map[run_ind].npages == prev_npages);
+		assert(chunk->map[run_ind].pos == POS_FREE);
+		run_pages += prev_npages;
 
-		assert(chunk->nfree_runs[log2_run_pages] > 0);
-		chunk->nfree_runs[log2_run_pages]--;
+		chunk->map[run_ind].npages = run_pages;
+		assert(chunk->map[run_ind].pos == POS_FREE);
+		chunk->map[run_ind + run_pages - 1].npages = run_pages;
+		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
+	}
+
+	if (run_ind + run_pages < chunk_npages &&
+	    chunk->map[run_ind + run_pages].pos == POS_FREE) {
+		unsigned next_npages;
 
-		/* Coalesce. */
-		chunk->map[base_run_ind].npages = (run_pages << 1);
+		/* Coalesce with next run. */
+		next_npages = chunk->map[run_ind + run_pages].npages;
+		run_pages += next_npages;
+		assert(chunk->map[run_ind + run_pages - 1].npages ==
+		    next_npages);
+		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
 
-		/* Update run_ind to be the beginning of the coalesced run. */
-		run_ind = base_run_ind;
+		chunk->map[run_ind].npages = run_pages;
+		chunk->map[run_ind].pos = POS_FREE;
+		chunk->map[run_ind + run_pages - 1].npages = run_pages;
+		assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
 	}
 
-	chunk->nfree_runs[log2_run_pages]++;
+	if (chunk->map[run_ind].npages > chunk->max_frun_npages)
+		chunk->max_frun_npages = chunk->map[run_ind].npages;
+	if (run_ind < chunk->min_frun_ind)
+		chunk->min_frun_ind = run_ind;
 
-	/* Free pages, to the extent possible. */
-	if (chunk->pages_used == 0) {
-		/* This chunk is completely unused now, so deallocate it. */
+	/* Deallocate chunk if it is now completely unused. */
+	if (chunk->pages_used == 0)
 		arena_chunk_dealloc(arena, chunk);
-	}
 }
 
 static arena_run_t *
@@ -2226,7 +2221,7 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
 	/* No existing runs have any space available. */
 
 	/* Allocate a new run. */
-	run = arena_run_alloc(arena, false, bin->run_size);
+	run = arena_run_alloc(arena, bin->run_size);
 	if (run == NULL)
 		return (NULL);
 
@@ -2305,7 +2300,7 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
  *   *) bin->run_size >= min_run_size
  *   *) bin->run_size <= arena_maxclass
  *   *) bin->run_size <= RUN_MAX_SMALL
- *   *) run header overhead <= RUN_MAX_HDR_OVERHEAD
+ *   *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
  *
  * bin->nregs, bin->regs_mask_nelms, and bin->reg0_offset are
  * also calculated here, since these settings are all interdependent.
@@ -2316,22 +2311,13 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
 	size_t try_run_size, good_run_size;
 	uint32_t good_nregs, good_mask_nelms, good_reg0_offset;
 	uint32_t try_nregs, try_mask_nelms, try_reg0_offset;
-	float run_max_hdr_overhead = RUN_MAX_HDR_OVERHEAD;
+	float max_ovrhd = RUN_MAX_OVRHD;
 
-	if (min_run_size < pagesize)
-		min_run_size = pagesize;
+	assert(min_run_size >= pagesize);
 	assert(min_run_size <= arena_maxclass);
 	assert(min_run_size <= RUN_MAX_SMALL);
 
 	/*
-	 * Make sure that the header overhead constraint allows a solution.  If
-	 * the maximum overhead is less than or equal to one bit per region,
-	 * there is clearly no solution.
-	 */
-	while (run_max_hdr_overhead <= 1.0 / ((float)(bin->reg_size << 3)))
-		run_max_hdr_overhead *= 2.0;
-
-	/*
 	 * Calculate known-valid settings before entering the run_size
 	 * expansion loop, so that the first part of the loop always copies
 	 * valid settings.
@@ -2363,7 +2349,7 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
 		good_reg0_offset = try_reg0_offset;
 
 		/* Try more aggressive settings. */
-		try_run_size <<= 1;
+		try_run_size += pagesize;
 		try_nregs = ((try_run_size - sizeof(arena_run_t)) /
 		    bin->reg_size) + 1; /* Counter-act try_nregs-- in loop. */
 		do {
@@ -2376,8 +2362,9 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
 		} while (sizeof(arena_run_t) + (sizeof(unsigned) *
 		    (try_mask_nelms - 1)) > try_reg0_offset);
 	} while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
+	    && max_ovrhd > RUN_MAX_OVRHD_RELAX / ((float)(bin->reg_size << 3))
 	    && ((float)(try_reg0_offset)) / ((float)(try_run_size)) >
-	    run_max_hdr_overhead);
+	    max_ovrhd);
 
 	assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
 	    <= good_reg0_offset);
@@ -2443,23 +2430,22 @@ arena_malloc(arena_t *arena, size_t size)
 
 #ifdef MALLOC_STATS
 		bin->stats.nrequests++;
+		arena->stats.nmalloc_small++;
+		if (ret != NULL)
+			arena->stats.allocated_small += size;
 #endif
 	} else {
-		/* Medium allocation. */
-		size = pow2_ceil(size);
+		/* Large allocation. */
+		size = PAGE_CEILING(size);
 		malloc_mutex_lock(&arena->mtx);
-		ret = (void *)arena_run_alloc(arena, true, size);
+		ret = (void *)arena_run_alloc(arena, size);
 #ifdef MALLOC_STATS
-		arena->stats.large_nrequests++;
+		arena->stats.nmalloc_large++;
+		if (ret != NULL)
+			arena->stats.allocated_large += size;
 #endif
 	}
 
-#ifdef MALLOC_STATS
-	arena->stats.nmalloc++;
-	if (ret != NULL)
-		arena->stats.allocated += size;
-#endif
-
 	malloc_mutex_unlock(&arena->mtx);
 
 	if (opt_junk && ret != NULL)
@@ -2488,8 +2474,8 @@ arena_salloc(const void *ptr)
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow);
 	mapelm = chunk->map[pageind];
-	assert(mapelm.free == false);
-	if (mapelm.large == false) {
+	if (mapelm.pos != 0 || ptr != (void *)((uintptr_t)chunk) + (pageind <<
+	    pagesize_2pow)) {
 		arena_run_t *run;
 
 		pageind -= mapelm.pos;
@@ -2520,7 +2506,11 @@ arena_ralloc(void *ptr, size_t size, size_t oldsize)
 		    == (QUANTUM_CEILING(oldsize) >> opt_quantum_2pow))
 			goto IN_PLACE;
 	} else {
-		if (oldsize > small_max && pow2_ceil(size) == oldsize)
+		/*
+		 * We make no attempt to resize runs here, though it would be
+		 * possible to do so.
+		 */
+		if (oldsize > small_max && PAGE_CEILING(size) == oldsize)
 			goto IN_PLACE;
 	}
 
@@ -2562,8 +2552,8 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
 
 	pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow);
 	mapelm = chunk->map[pageind];
-	assert(mapelm.free == false);
-	if (mapelm.large == false) {
+	if (mapelm.pos != 0 || ptr != (void *)((uintptr_t)chunk) + (pageind <<
+	    pagesize_2pow)) {
 		arena_run_t *run;
 		arena_bin_t *bin;
 
@@ -2586,22 +2576,26 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
 			/* Demote run to lower fullness quartile. */
 			arena_bin_run_demote(arena, bin, run);
 		}
+#ifdef MALLOC_STATS
+		arena->stats.allocated_small -= size;
+		arena->stats.ndalloc_small++;
+#endif
 	} else {
-		/* Medium allocation. */
+		/* Large allocation. */
 
 		size = mapelm.npages << pagesize_2pow;
-		assert((((uintptr_t)ptr) & (size - 1)) == 0);
+		assert((((uintptr_t)ptr) & pagesize_mask) == 0);
 
 		if (opt_junk)
 			memset(ptr, 0x5a, size);
 
 		malloc_mutex_lock(&arena->mtx);
 		arena_run_dalloc(arena, (arena_run_t *)ptr, size);
-	}
-
 #ifdef MALLOC_STATS
-	arena->stats.allocated -= size;
+		arena->stats.allocated_large -= size;
+		arena->stats.ndalloc_large++;
 #endif
+	}
 
 	malloc_mutex_unlock(&arena->mtx);
 }
@@ -3069,11 +3063,6 @@ idalloc(void *ptr)
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	if (chunk != ptr) {
 		/* Region. */
-#ifdef MALLOC_STATS
-		malloc_mutex_lock(&chunk->arena->mtx);
-		chunk->arena->stats.ndalloc++;
-		malloc_mutex_unlock(&chunk->arena->mtx);
-#endif
 		arena_dalloc(chunk->arena, chunk, ptr);
 	} else
 		huge_dalloc(ptr);
@@ -3087,20 +3076,6 @@ malloc_print_stats(void)
 		char s[UMAX2S_BUFSIZE];
 		_malloc_message("___ Begin malloc statistics ___\n", "", "",
 		    "");
-		_malloc_message("Number of CPUs: ", umax2s(ncpus, s), "\n", "");
-		_malloc_message("Number of arenas: ", umax2s(narenas, s), "\n",
-		    "");
-
-		_malloc_message("Chunk size: ", umax2s(chunk_size, s), "", "");
-		_malloc_message(" (2^", umax2s(opt_chunk_2pow, s), ")\n", "");
-
-		_malloc_message("Quantum size: ", umax2s(quantum, s), "", "");
-		_malloc_message(" (2^", umax2s(opt_quantum_2pow, s), ")\n", "");
-
-		_malloc_message("Max small size: ", umax2s(small_max, s), "\n",
-		    "");
-		_malloc_message("Pointer size: ", umax2s(sizeof(void *), s),
-		    "\n", "");
 		_malloc_message("Assertions ",
 #ifdef NDEBUG
 		    "disabled",
@@ -3108,41 +3083,58 @@ malloc_print_stats(void)
 		    "enabled",
 #endif
 		    "\n", "");
+		_malloc_message("Boolean MALLOC_OPTIONS: ",
+		    opt_abort ? "A" : "a",
+		    opt_junk ? "J" : "j",
+		    opt_hint ? "H" : "h");
+		_malloc_message(opt_utrace ? "PU" : "Pu",
+		    opt_sysv ? "V" : "v",
+		    opt_xmalloc ? "X" : "x",
+		    opt_zero ? "Z\n" : "z\n");
+
+		_malloc_message("CPUs: ", umax2s(ncpus, s), "\n", "");
+		_malloc_message("Max arenas: ", umax2s(narenas, s), "\n", "");
+		_malloc_message("Pointer size: ", umax2s(sizeof(void *), s),
+		    "\n", "");
+		_malloc_message("Quantum size: ", umax2s(quantum, s), "\n", "");
+		_malloc_message("Max small size: ", umax2s(small_max, s), "\n",
+		    "");
+
+		_malloc_message("Chunk size: ", umax2s(chunk_size, s), "", "");
+		_malloc_message(" (2^", umax2s(opt_chunk_2pow, s), ")\n", "");
 
 #ifdef MALLOC_STATS
 		{
-			size_t allocated, total;
+			size_t allocated, mapped;
 			unsigned i;
 			arena_t *arena;
 
-			/* Calculate and print allocated/total stats. */
+			/* Calculate and print allocated/mapped stats. */
 
 			/* arenas. */
 			for (i = 0, allocated = 0; i < narenas; i++) {
 				if (arenas[i] != NULL) {
 					malloc_mutex_lock(&arenas[i]->mtx);
-					allocated += arenas[i]->stats.allocated;
+					allocated +=
+					    arenas[i]->stats.allocated_small;
+					allocated +=
+					    arenas[i]->stats.allocated_large;
 					malloc_mutex_unlock(&arenas[i]->mtx);
 				}
 			}
 
-			/* huge. */
+			/* huge/base. */
 			malloc_mutex_lock(&chunks_mtx);
 			allocated += huge_allocated;
-			total = stats_chunks.curchunks * chunk_size;
+			mapped = stats_chunks.curchunks * chunk_size;
 			malloc_mutex_unlock(&chunks_mtx);
 
-			malloc_printf("Allocated: %zu, space used: %zu\n",
-			    allocated, total);
+			malloc_mutex_lock(&base_mtx);
+			mapped += base_mapped;
+			malloc_mutex_unlock(&base_mtx);
 
-			/* Print base stats. */
-			{
-				malloc_mutex_lock(&base_mtx);
-				malloc_printf("\nbase:\n");
-				malloc_printf(" %13s\n", "total");
-				malloc_printf(" %13llu\n", base_total);
-				malloc_mutex_unlock(&base_mtx);
-			}
+			malloc_printf("Allocated: %zu, mapped: %zu\n",
+			    allocated, mapped);
 
 			/* Print chunk stats. */
 			{
@@ -3152,28 +3144,27 @@ malloc_print_stats(void)
 				chunks_stats = stats_chunks;
 				malloc_mutex_unlock(&chunks_mtx);
 
-				malloc_printf("\nchunks:\n");
-				malloc_printf(" %13s%13s%13s\n", "nchunks",
-				    "highchunks", "curchunks");
-				malloc_printf(" %13llu%13lu%13lu\n",
+				malloc_printf("chunks: nchunks   "
+				    "highchunks    curchunks\n");
+				malloc_printf("  %13llu%13lu%13lu\n",
 				    chunks_stats.nchunks,
 				    chunks_stats.highchunks,
 				    chunks_stats.curchunks);
 			}
 
 			/* Print chunk stats. */
-			malloc_printf("\nhuge:\n");
-			malloc_printf("%12s %12s %12s\n",
-			    "nmalloc", "ndalloc", "allocated");
-			malloc_printf("%12llu %12llu %12zu\n",
-			    huge_nmalloc, huge_ndalloc, huge_allocated);
+			malloc_printf(
+			    "huge: nmalloc      ndalloc    allocated\n");
+			malloc_printf(" %12llu %12llu %12zu\n",
+			    huge_nmalloc, huge_ndalloc, huge_allocated
+			    * chunk_size);
 
 			/* Print stats for each arena. */
 			for (i = 0; i < narenas; i++) {
 				arena = arenas[i];
 				if (arena != NULL) {
 					malloc_printf(
-					    "\narenas[%u] statistics:\n", i);
+					    "\narenas[%u]:\n", i);
 					malloc_mutex_lock(&arena->mtx);
 					stats_print(arena);
 					malloc_mutex_unlock(&arena->mtx);
@@ -3242,9 +3233,10 @@ malloc_init_hard(void)
 
 		/*
 		 * We assume that pagesize is a power of 2 when calculating
-		 * pagesize_2pow.
+		 * pagesize_mask and pagesize_2pow.
 		 */
 		assert(((result - 1) & result) == 0);
+		pagesize_mask = result - 1;
 		pagesize_2pow = ffs((int)result) - 1;
 	}
 
@@ -3327,7 +3319,14 @@ malloc_init_hard(void)
 					opt_chunk_2pow--;
 				break;
 			case 'K':
-				if (opt_chunk_2pow < CHUNK_2POW_MAX)
+				/*
+				 * There must be fewer pages in a chunk than
+				 * can be recorded by the pos field of
+				 * arena_chunk_map_t, in order to make POS_FREE
+				 * special.
+				 */
+				if (opt_chunk_2pow - pagesize_2pow
+				    < (sizeof(uint32_t) << 3) - 1)
 					opt_chunk_2pow++;
 				break;
 			case 'n':
@@ -3424,10 +3423,20 @@ malloc_init_hard(void)
 	assert(small_min <= quantum);
 
 	/* Set variables according to the value of opt_chunk_2pow. */
-	chunk_size = (1 << opt_chunk_2pow);
+	chunk_size = (1LU << opt_chunk_2pow);
 	chunk_size_mask = chunk_size - 1;
-	arena_chunk_maplen = (1 << (opt_chunk_2pow - pagesize_2pow));
-	arena_maxclass = (chunk_size >> 1);
+	chunk_npages = (chunk_size >> pagesize_2pow);
+	{
+		unsigned header_size;
+
+		header_size = sizeof(arena_chunk_t) + (sizeof(arena_chunk_map_t)
+		    * (chunk_npages - 1));
+		arena_chunk_header_npages = (header_size >> pagesize_2pow);
+		if ((header_size & pagesize_mask) != 0)
+			arena_chunk_header_npages++;
+	}
+	arena_maxclass = chunk_size - (arena_chunk_header_npages <<
+	    pagesize_2pow);
 
 	UTRACE(0, 0, 0);
 
@@ -3459,7 +3468,7 @@ malloc_init_hard(void)
 
 	/* Initialize base allocation data structures. */
 #ifdef MALLOC_STATS
-	base_total = 0;
+	base_mapped = 0;
 #endif
 #ifdef USE_BRK
 	/*
author	jasone <jasone@FreeBSD.org>	2007-03-23 05:05:48 +0000
committer	jasone <jasone@FreeBSD.org>	2007-03-23 05:05:48 +0000
commit	a1325e04bab8daf2ca1b3f6dc2d9ed6eef96f5f3 (patch)
tree	fc166cec1bf4e69eab990c85ba88e338243b8ba4 /lib/libc/stdlib
parent	1639f74854940b93f384f48a250acd15ab50a812 (diff)
download	FreeBSD-src-a1325e04bab8daf2ca1b3f6dc2d9ed6eef96f5f3.zip FreeBSD-src-a1325e04bab8daf2ca1b3f6dc2d9ed6eef96f5f3.tar.gz