Implement dynamic load balancing of thread-->arena mapping, based on lock

contention. The intent is to dynamically adjust to load imbalances, which can cause severe contention. Use pthread mutexes where possible instead of libc "spinlocks" (they aren't actually spin locks). Conceptually, this change is meant only to support the dynamic load balancing code by enabling the use of spin locks, but it has the added apparent benefit of substantially improving performance due to reduced context switches when there is moderate arena lock contention. Proper tuning parameter configuration for this change is a finicky business, and it is very much machine-dependent. One seemingly promising solution would be to run a tuning program during operating system installation that computes appropriate settings for load balancing. (The pthreads adaptive spin locks should probably be similarly tuned.)
author: jasone <jasone@FreeBSD.org> 2007-11-27 03:17:30 +0000
committer: jasone <jasone@FreeBSD.org> 2007-11-27 03:17:30 +0000
commit: 4db288d2b346e18e540c270c6b3e1b6ed3bd7b14 (patch)
tree: d430972af51669ddebf984cee4d6655e511d190b /lib/libc/stdlib
parent: 21bb948195adb6636c33738cab39bb89cac41bc7 (diff)
download: FreeBSD-src-4db288d2b346e18e540c270c6b3e1b6ed3bd7b14.zip
FreeBSD-src-4db288d2b346e18e540c270c6b3e1b6ed3bd7b14.tar.gz
1 files changed, 297 insertions, 58 deletions
diff --git a/lib/libc/stdlib/malloc.c b/lib/libc/stdlib/malloc.c
index 3ad27f5..4d37e12 100644
--- a/lib/libc/stdlib/malloc.c
+++ b/lib/libc/stdlib/malloc.c
@@ -171,6 +171,7 @@ __FBSDID("$FreeBSD$");
 #  define QUANTUM_2POW_MIN	4
 #  define SIZEOF_PTR_2POW	2
 #  define USE_BRK
+#  define CPU_SPINWAIT		__asm__ volatile("pause")
 #endif
 #ifdef __ia64__
 #  define QUANTUM_2POW_MIN	4
@@ -189,6 +190,7 @@ __FBSDID("$FreeBSD$");
 #ifdef __amd64__
 #  define QUANTUM_2POW_MIN	4
 #  define SIZEOF_PTR_2POW	3
+#  define CPU_SPINWAIT		__asm__ volatile("pause")
 #endif
 #ifdef __arm__
 #  define QUANTUM_2POW_MIN	3
@@ -270,11 +272,51 @@ __FBSDID("$FreeBSD$");
  */
 #define	LAZY_FREE_NPROBES	5
 
+/*
+ * Hyper-threaded CPUs may need a special instruction inside spin loops in
+ * order to yield to another virtual CPU.  If no such instruction is defined
+ * above, make CPU_SPINWAIT a no-op.
+ */
+#ifndef CPU_SPINWAIT
+#  define CPU_SPINWAIT
+#endif
+
+/*
+ * Adaptive spinning must eventually switch to blocking, in order to avoid the
+ * potential for priority inversion deadlock.  Backing off past a certain point
+ * can actually waste time.
+ */
+#define	SPIN_LIMIT_2POW		11
+
+/*
+ * Conversion from spinning to blocking is expensive; we use (1U <<
+ * BLOCK_COST_2POW) to estimate how many more times costly blocking is than
+ * worst-case spinning.
+ */
+#define	BLOCK_COST_2POW		4
+
+/*
+ * We use an exponential moving average to track recent lock contention, where
+ * the size of the history window is N, and alpha=2/(N+1).
+ *
+ * Due to integer math rounding, very small values here can cause substantial
+ * degradation in accuracy, thus making the moving average decay faster than it
+ * would with precise calculation.
+ */
+#define	BALANCE_ALPHA_INV_2POW	9
+
+/*
+ * Threshold value for the exponential moving contention average at which to
+ * re-assign a thread.
+ */
+#define	BALANCE_THRESHOLD_DEFAULT	(1U << (SPIN_LIMIT_2POW-4))
+
 /******************************************************************************/
 
 /*
- * Mutexes based on spinlocks.  We can't use normal pthread mutexes, because
- * they require malloc()ed memory.
+ * Mutexes based on spinlocks.  We can't use normal pthread spinlocks in all
+ * places, because they require malloc()ed memory, which causes bootstrapping
+ * issues in some cases.
  */
 typedef struct {
 	spinlock_t	lock;
@@ -330,6 +372,11 @@ struct arena_stats_s {
 	size_t		allocated_large;
 	uint64_t	nmalloc_large;
 	uint64_t	ndalloc_large;
+
+#ifndef NO_TLS
+	/* Number of times this arena reassigned a thread due to contention. */
+	uint64_t	nbalance;
+#endif
 };
 
 typedef struct chunk_stats_s chunk_stats_t;
@@ -517,8 +564,8 @@ struct arena_s {
 #  define ARENA_MAGIC 0x947d3d24
 #endif
 
-	/* All operations on this arena require that mtx be locked. */
-	malloc_mutex_t		mtx;
+	/* All operations on this arena require that lock be locked. */
+	pthread_mutex_t		lock;
 
 #ifdef MALLOC_STATS
 	arena_stats_t		stats;
@@ -542,6 +589,12 @@ struct arena_s {
 
 #ifndef NO_TLS
 	/*
+	 * The arena load balancing machinery needs to keep track of how much
+	 * lock contention there is.  This value is exponentially averaged.
+	 */
+	uint32_t		contention;
+
+	/*
 	 * Deallocation of small objects can be lazy, in which case free_cache
 	 * stores pointers to those objects that have not yet been deallocated.
 	 * In order to avoid lock contention, slots are chosen randomly.  Empty
@@ -681,9 +734,9 @@ static size_t		base_mapped;
 static arena_t		**arenas;
 static unsigned		narenas;
 #ifndef NO_TLS
-static unsigned		next_arena;
+static unsigned		narenas_2pow;
 #endif
-static malloc_mutex_t	arenas_mtx; /* Protects arenas initialization. */
+static pthread_mutex_t arenas_lock; /* Protects arenas initialization. */
 
 #ifndef NO_TLS
 /*
@@ -714,6 +767,7 @@ static bool	opt_junk = false;
 static bool	opt_hint = false;
 #ifndef NO_TLS
 static int	opt_lazy_free_2pow = LAZY_FREE_2POW_DEFAULT;
+static uint64_t	opt_balance_threshold = BALANCE_THRESHOLD_DEFAULT;
 #endif
 static bool	opt_print_stats = false;
 static size_t	opt_quantum_2pow = QUANTUM_2POW_MIN;
@@ -743,6 +797,7 @@ typedef struct {
  */
 
 static void	malloc_mutex_init(malloc_mutex_t *a_mutex);
+static bool	malloc_spin_init(pthread_mutex_t *lock);
 static void	wrtmessage(const char *p1, const char *p2, const char *p3,
 		const char *p4);
 #ifdef MALLOC_STATS
@@ -751,6 +806,7 @@ static void	malloc_printf(const char *format, ...);
 static char	*umax2s(uintmax_t x, char *s);
 static bool	base_pages_alloc(size_t minsize);
 static void	*base_alloc(size_t size);
+static void	*base_calloc(size_t number, size_t size);
 static chunk_node_t *base_chunk_node_alloc(void);
 static void	base_chunk_node_dealloc(chunk_node_t *node);
 #ifdef MALLOC_STATS
@@ -798,7 +854,9 @@ static bool	malloc_init_hard(void);
  */
 /******************************************************************************/
 /*
- * Begin mutex.
+ * Begin mutex.  We can't use normal pthread mutexes in all places, because
+ * they require malloc()ed memory, which causes bootstrapping issues in some
+ * cases.
  */
 
 static void
@@ -830,6 +888,86 @@ malloc_mutex_unlock(malloc_mutex_t *a_mutex)
  */
 /******************************************************************************/
 /*
+ * Begin spin lock.  Spin locks here are actually adaptive mutexes that block
+ * after a period of spinning, because unbounded spinning would allow for
+ * priority inversion.
+ */
+
+/*
+ * We use an unpublished interface to initialize pthread mutexes with an
+ * allocation callback, in order to avoid infinite recursion.
+ */
+int	_pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
+    void *(calloc_cb)(size_t, size_t));
+
+__weak_reference(_pthread_mutex_init_calloc_cb_stub,
+    _pthread_mutex_init_calloc_cb);
+
+int
+_pthread_mutex_init_calloc_cb_stub(pthread_mutex_t *mutex,
+    void *(calloc_cb)(size_t, size_t))
+{
+
+	return (0);
+}
+
+static bool
+malloc_spin_init(pthread_mutex_t *lock)
+{
+
+	if (_pthread_mutex_init_calloc_cb(lock, base_calloc) != 0)
+		return (true);
+
+	return (false);
+}
+
+static inline unsigned
+malloc_spin_lock(pthread_mutex_t *lock)
+{
+	unsigned ret = 0;
+
+	if (__isthreaded) {
+		if (_pthread_mutex_trylock(lock) != 0) {
+			unsigned i;
+			volatile unsigned j;
+
+			/* Exponentially back off. */
+			for (i = 1; i <= SPIN_LIMIT_2POW; i++) {
+				for (j = 0; j < (1U << i); j++)
+					ret++;
+
+				CPU_SPINWAIT;
+				if (_pthread_mutex_trylock(lock) == 0)
+					return (ret);
+			}
+
+			/*
+			 * Spinning failed.  Block until the lock becomes
+			 * available, in order to avoid indefinite priority
+			 * inversion.
+			 */
+			_pthread_mutex_lock(lock);
+			assert((ret << BLOCK_COST_2POW) != 0);
+			return (ret << BLOCK_COST_2POW);
+		}
+	}
+
+	return (ret);
+}
+
+static inline void
+malloc_spin_unlock(pthread_mutex_t *lock)
+{
+
+	if (__isthreaded)
+		_pthread_mutex_unlock(lock);
+}
+
+/*
+ * End spin lock.
+ */
+/******************************************************************************/
+/*
  * Begin Utility functions/macros.
  */
 
@@ -926,6 +1064,10 @@ prn_##suffix(uint32_t lg_range)						\
 /* Define the per-thread PRNG used for lazy deallocation. */
 static __thread uint32_t lazy_free_x;
 PRN_DEFINE(lazy_free, lazy_free_x, 12345, 12347)
+
+/* Define the PRNG used for arena assignment. */
+static __thread uint32_t balance_x;
+PRN_DEFINE(balance, balance_x, 1297, 1301);
 #endif
 
 static void
@@ -1083,6 +1225,17 @@ RETURN:
 	return (ret);
 }
 
+static void *
+base_calloc(size_t number, size_t size)
+{
+	void *ret;
+
+	ret = base_alloc(number * size);
+	memset(ret, 0, number * size);
+
+	return (ret);
+}
+
 static chunk_node_t *
 base_chunk_node_alloc(void)
 {
@@ -1574,12 +1727,12 @@ choose_arena(void)
 			 * Avoid races with another thread that may have already
 			 * initialized arenas[ind].
 			 */
-			malloc_mutex_lock(&arenas_mtx);
+			malloc_spin_lock(&arenas_lock);
 			if (arenas[ind] == NULL)
 				ret = arenas_extend((unsigned)ind);
 			else
 				ret = arenas[ind];
-			malloc_mutex_unlock(&arenas_mtx);
+			malloc_spin_unlock(&arenas_lock);
 		}
 	} else
 		ret = arenas[0];
@@ -1613,21 +1766,27 @@ choose_arena_hard(void)
 	 */
 	SPRN(lazy_free, (uint32_t)(uintptr_t)(_pthread_self()));
 
-	/* Assign one of the arenas to this thread, in a round-robin fashion. */
-	malloc_mutex_lock(&arenas_mtx);
-	ret = arenas[next_arena];
-	if (ret == NULL)
-		ret = arenas_extend(next_arena);
-	if (ret == NULL) {
-		/*
-		 * Make sure that this function never returns NULL, so that
-		 * choose_arena() doesn't have to check for a NULL return
-		 * value.
-		 */
+	/*
+	 * Seed the PRNG used for arena load balancing.  We can get away with
+	 * using the same seed here as for the lazy_free PRNG without
+	 * introducing autocorrelation because the PRNG parameters are
+	 * distinct.
+	 */
+	SPRN(balance, (uint32_t)(uintptr_t)(_pthread_self()));
+
+	if (narenas > 1) {
+		unsigned ind;
+
+		ind = PRN(balance, narenas_2pow);
+		if ((ret = arenas[ind]) == NULL) {
+			malloc_spin_lock(&arenas_lock);
+			if ((ret = arenas[ind]) == NULL)
+				ret = arenas_extend(ind);
+			malloc_spin_unlock(&arenas_lock);
+		}
+	} else
 		ret = arenas[0];
-	}
-	next_arena = (next_arena + 1) % narenas;
-	malloc_mutex_unlock(&arenas_mtx);
+
 	arenas_map = ret;
 
 	return (ret);
@@ -2325,6 +2484,48 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
 	return (good_run_size);
 }
 
+static inline void
+arena_lock_balance(arena_t *arena)
+{
+#ifndef NO_TLS
+	unsigned contention;
+
+	contention = malloc_spin_lock(&arena->lock);
+	if (narenas > 1) {
+		/*
+		 * Calculate the exponentially averaged contention for this
+		 * arena.  Due to integer math always rounding down, this value
+		 * decays somewhat faster then normal.
+		 */
+		arena->contention = (((uint64_t)arena->contention
+		    * (uint64_t)((1U << BALANCE_ALPHA_INV_2POW)-1))
+		    + (uint64_t)contention) >> BALANCE_ALPHA_INV_2POW;
+		if (arena->contention >= opt_balance_threshold) {
+			uint32_t ind;
+
+			arena->contention = 0;
+#ifdef MALLOC_STATS
+			arena->stats.nbalance++;
+#endif
+			ind = PRN(balance, narenas_2pow);
+			if (arenas[ind] != NULL)
+				arenas_map = arenas[ind];
+			else {
+				malloc_spin_lock(&arenas_lock);
+				if (arenas[ind] != NULL)
+					arenas_map = arenas[ind];
+				else
+					arenas_map = arenas_extend(ind);
+				malloc_spin_unlock(&arenas_lock);
+			}
+		}
+	}
+#else
+	malloc_spin_lock(&arena->lock);
+#endif
+
+}
+
 static void *
 arena_malloc(arena_t *arena, size_t size, bool zero)
 {
@@ -2368,14 +2569,14 @@ arena_malloc(arena_t *arena, size_t size, bool zero)
 		}
 		assert(size == bin->reg_size);
 
-		malloc_mutex_lock(&arena->mtx);
+		arena_lock_balance(arena);
 		if ((run = bin->runcur) != NULL && run->nfree > 0)
 			ret = arena_bin_malloc_easy(arena, bin, run);
 		else
 			ret = arena_bin_malloc_hard(arena, bin);
 
 		if (ret == NULL) {
-			malloc_mutex_unlock(&arena->mtx);
+			malloc_spin_unlock(&arena->lock);
 			return (NULL);
 		}
 
@@ -2384,7 +2585,7 @@ arena_malloc(arena_t *arena, size_t size, bool zero)
 		arena->stats.nmalloc_small++;
 		arena->stats.allocated_small += size;
 #endif
-		malloc_mutex_unlock(&arena->mtx);
+		malloc_spin_unlock(&arena->lock);
 
 		if (zero == false) {
 			if (opt_junk)
@@ -2396,17 +2597,17 @@ arena_malloc(arena_t *arena, size_t size, bool zero)
 	} else {
 		/* Large allocation. */
 		size = PAGE_CEILING(size);
-		malloc_mutex_lock(&arena->mtx);
+		arena_lock_balance(arena);
 		ret = (void *)arena_run_alloc(arena, size, true); // XXX zero?
 		if (ret == NULL) {
-			malloc_mutex_unlock(&arena->mtx);
+			malloc_spin_unlock(&arena->lock);
 			return (NULL);
 		}
 #ifdef MALLOC_STATS
 		arena->stats.nmalloc_large++;
 		arena->stats.allocated_large += size;
 #endif
-		malloc_mutex_unlock(&arena->mtx);
+		malloc_spin_unlock(&arena->lock);
 
 		if (zero == false) {
 			if (opt_junk)
@@ -2453,10 +2654,10 @@ arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
 
 	npages = size >> pagesize_2pow;
 
-	malloc_mutex_lock(&arena->mtx);
+	arena_lock_balance(arena);
 	ret = (void *)arena_run_alloc(arena, alloc_size, false);
 	if (ret == NULL) {
-		malloc_mutex_unlock(&arena->mtx);
+		malloc_spin_unlock(&arena->lock);
 		return (NULL);
 	}
 
@@ -2509,7 +2710,7 @@ arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
 	arena->stats.nmalloc_large++;
 	arena->stats.allocated_large += size;
 #endif
-	malloc_mutex_unlock(&arena->mtx);
+	malloc_spin_unlock(&arena->lock);
 
 	if (opt_junk)
 		memset(ret, 0xa5, size);
@@ -2675,9 +2876,9 @@ arena_dalloc_lazy(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 	unsigned i, slot;
 
 	if (!__isthreaded || opt_lazy_free_2pow < 0) {
-		malloc_mutex_lock(&arena->mtx);
+		malloc_spin_lock(&arena->lock);
 		arena_dalloc_small(arena, chunk, ptr, pageind, mapelm);
-		malloc_mutex_unlock(&arena->mtx);
+		malloc_spin_unlock(&arena->lock);
 		return;
 	}
 
@@ -2689,7 +2890,7 @@ arena_dalloc_lazy(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 		}
 	}
 
-	malloc_mutex_lock(&arena->mtx);
+	malloc_spin_lock(&arena->lock);
 	arena_dalloc_small(arena, chunk, ptr, pageind, mapelm);
 
 	/*
@@ -2736,7 +2937,7 @@ arena_dalloc_lazy(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 		}
 	}
 
-	malloc_mutex_unlock(&arena->mtx);
+	malloc_spin_unlock(&arena->lock);
 }
 #endif
 
@@ -2760,9 +2961,9 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
 #ifndef NO_TLS
 		arena_dalloc_lazy(arena, chunk, ptr, pageind, mapelm);
 #else
-		malloc_mutex_lock(&arena->mtx);
+		malloc_spin_lock(&arena->lock);
 		arena_dalloc_small(arena, chunk, ptr, pageind, mapelm);
-		malloc_mutex_unlock(&arena->mtx);
+		malloc_spin_unlock(&arena->lock);
 #endif
 	} else {
 		size_t size;
@@ -2775,13 +2976,13 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
 		if (opt_junk)
 			memset(ptr, 0x5a, size);
 
-		malloc_mutex_lock(&arena->mtx);
+		malloc_spin_lock(&arena->lock);
 		arena_run_dalloc(arena, (arena_run_t *)ptr, size);
 #ifdef MALLOC_STATS
 		arena->stats.allocated_large -= size;
 		arena->stats.ndalloc_large++;
 #endif
-		malloc_mutex_unlock(&arena->mtx);
+		malloc_spin_unlock(&arena->lock);
 	}
 }
 
@@ -2792,7 +2993,8 @@ arena_new(arena_t *arena)
 	arena_bin_t *bin;
 	size_t pow2_size, prev_run_size;
 
-	malloc_mutex_init(&arena->mtx);
+	if (malloc_spin_init(&arena->lock))
+		return (true);
 
 #ifdef MALLOC_STATS
 	memset(&arena->stats, 0, sizeof(arena_stats_t));
@@ -2803,6 +3005,7 @@ arena_new(arena_t *arena)
 	arena->spare = NULL;
 
 #ifndef NO_TLS
+	arena->contention = 0;
 	if (opt_lazy_free_2pow >= 0) {
 		arena->free_cache = (void **) base_alloc(sizeof(void *)
 		    * (1U << opt_lazy_free_2pow));
@@ -3332,6 +3535,8 @@ malloc_print_stats(void)
 			    umax2s(1U << opt_lazy_free_2pow, s), "\n", "");
 		} else
 			_malloc_message("Lazy free slots: 0\n", "", "", "");
+		_malloc_message("Arena balance threshold: ",
+		    umax2s(opt_balance_threshold, s), "\n", "");
 #endif
 		_malloc_message("Pointer size: ", umax2s(sizeof(void *), s),
 		    "\n", "");
@@ -3345,6 +3550,9 @@ malloc_print_stats(void)
 #ifdef MALLOC_STATS
 		{
 			size_t allocated, mapped;
+#ifndef NO_TLS
+			uint64_t nbalance = 0;
+#endif
 			unsigned i;
 			arena_t *arena;
 
@@ -3353,12 +3561,15 @@ malloc_print_stats(void)
 			/* arenas. */
 			for (i = 0, allocated = 0; i < narenas; i++) {
 				if (arenas[i] != NULL) {
-					malloc_mutex_lock(&arenas[i]->mtx);
+					malloc_spin_lock(&arenas[i]->lock);
 					allocated +=
 					    arenas[i]->stats.allocated_small;
 					allocated +=
 					    arenas[i]->stats.allocated_large;
-					malloc_mutex_unlock(&arenas[i]->mtx);
+#ifndef NO_TLS
+					nbalance += arenas[i]->stats.nbalance;
+#endif
+					malloc_spin_unlock(&arenas[i]->lock);
 				}
 			}
 
@@ -3375,6 +3586,11 @@ malloc_print_stats(void)
 			malloc_printf("Allocated: %zu, mapped: %zu\n",
 			    allocated, mapped);
 
+#ifndef NO_TLS
+			malloc_printf("Arena balance reassignments: %llu\n",
+			    nbalance);
+#endif
+
 			/* Print chunk stats. */
 			{
 				chunk_stats_t chunks_stats;
@@ -3403,9 +3619,9 @@ malloc_print_stats(void)
 				if (arena != NULL) {
 					malloc_printf(
 					    "\narenas[%u]:\n", i);
-					malloc_mutex_lock(&arena->mtx);
+					malloc_spin_lock(&arena->lock);
 					stats_print(arena);
-					malloc_mutex_unlock(&arena->mtx);
+					malloc_spin_unlock(&arena->lock);
 				}
 			}
 		}
@@ -3540,6 +3756,20 @@ malloc_init_hard(void)
 			case 'A':
 				opt_abort = true;
 				break;
+			case 'b':
+#ifndef NO_TLS
+				opt_balance_threshold >>= 1;
+#endif
+				break;
+			case 'B':
+#ifndef NO_TLS
+				if (opt_balance_threshold == 0)
+					opt_balance_threshold = 1;
+				else if ((opt_balance_threshold << 1)
+				    > opt_balance_threshold)
+					opt_balance_threshold <<= 1;
+#endif
+				break;
 			case 'h':
 				opt_hint = false;
 				break;
@@ -3565,8 +3795,8 @@ malloc_init_hard(void)
 				/*
 				 * There must be fewer pages in a chunk than
 				 * can be recorded by the pos field of
-				 * arena_chunk_map_t, in order to make POS_FREE
-				 * special.
+				 * arena_chunk_map_t, in order to make
+				 * POS_EMPTY/POS_FREE special.
 				 */
 				if (opt_chunk_2pow - pagesize_2pow
 				    < (sizeof(uint32_t) << 3) - 1)
@@ -3770,6 +4000,12 @@ malloc_init_hard(void)
 		if (narenas == 0)
 			narenas = 1;
 	}
+#ifndef NO_TLS
+	assert(narenas != 0);
+	for (narenas_2pow = 0;
+	     (narenas >> (narenas_2pow + 1)) != 0;
+	     narenas_2pow++);
+#endif
 
 #ifdef NO_TLS
 	if (narenas > 1) {
@@ -3798,10 +4034,6 @@ malloc_init_hard(void)
 	}
 #endif
 
-#ifndef NO_TLS
-	next_arena = 0;
-#endif
-
 	/* Allocate and initialize arenas. */
 	arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
 	if (arenas == NULL) {
@@ -3825,13 +4057,20 @@ malloc_init_hard(void)
 	}
 #ifndef NO_TLS
 	/*
+	 * Assign the initial arena to the initial thread, in order to avoid
+	 * spurious creation of an extra arena if the application switches to
+	 * threaded mode.
+	 */
+	arenas_map = arenas[0];
+	/*
 	 * Seed here for the initial thread, since choose_arena_hard() is only
 	 * called for other threads.  The seed values don't really matter.
 	 */
 	SPRN(lazy_free, 42);
+	SPRN(balance, 42);
 #endif
 
-	malloc_mutex_init(&arenas_mtx);
+	malloc_spin_init(&arenas_lock);
 
 	malloc_initialized = true;
 	malloc_mutex_unlock(&init_lock);
@@ -4075,12 +4314,12 @@ _malloc_prefork(void)
 
 	/* Acquire all mutexes in a safe order. */
 
-	malloc_mutex_lock(&arenas_mtx);
+	malloc_spin_lock(&arenas_lock);
 	for (i = 0; i < narenas; i++) {
 		if (arenas[i] != NULL)
-			malloc_mutex_lock(&arenas[i]->mtx);
+			malloc_spin_lock(&arenas[i]->lock);
 	}
-	malloc_mutex_unlock(&arenas_mtx);
+	malloc_spin_unlock(&arenas_lock);
 
 	malloc_mutex_lock(&base_mtx);
 
@@ -4098,12 +4337,12 @@ _malloc_postfork(void)
 
 	malloc_mutex_unlock(&base_mtx);
 
-	malloc_mutex_lock(&arenas_mtx);
+	malloc_spin_lock(&arenas_lock);
 	for (i = 0; i < narenas; i++) {
 		if (arenas[i] != NULL)
-			malloc_mutex_unlock(&arenas[i]->mtx);
+			malloc_spin_unlock(&arenas[i]->lock);
 	}
-	malloc_mutex_unlock(&arenas_mtx);
+	malloc_spin_unlock(&arenas_lock);
 }
 
 /*
author	jasone <jasone@FreeBSD.org>	2007-11-27 03:17:30 +0000
committer	jasone <jasone@FreeBSD.org>	2007-11-27 03:17:30 +0000
commit	4db288d2b346e18e540c270c6b3e1b6ed3bd7b14 (patch)
tree	d430972af51669ddebf984cee4d6655e511d190b /lib/libc/stdlib
parent	21bb948195adb6636c33738cab39bb89cac41bc7 (diff)
download	FreeBSD-src-4db288d2b346e18e540c270c6b3e1b6ed3bd7b14.zip FreeBSD-src-4db288d2b346e18e540c270c6b3e1b6ed3bd7b14.tar.gz