From b7209126971ecb28bf4f6b1bbb24a078b590e35a Mon Sep 17 00:00:00 2001
From: jasone <jasone@FreeBSD.org>
Date: Tue, 18 Dec 2007 05:27:57 +0000
Subject: Use fixed point integer math instead of floating point math when
 calculating run sizes.  Use of the floating point unit was a potential
 pessimization to context switching for applications that do not otherwise use
 floating point math. [1]

Reformat cpp macro-related comments to improve consistency.

Submitted by:	das
---
 lib/libc/stdlib/malloc.c | 89 +++++++++++++++++++++++++-----------------------
 1 file changed, 47 insertions(+), 42 deletions(-)

(limited to 'lib')

diff --git a/lib/libc/stdlib/malloc.c b/lib/libc/stdlib/malloc.c
index 3d1439d..bbd4bae 100644
--- a/lib/libc/stdlib/malloc.c
+++ b/lib/libc/stdlib/malloc.c
@@ -101,7 +101,14 @@
 /* #define	MALLOC_PRODUCTION */
 
 #ifndef MALLOC_PRODUCTION
+   /*
+    * MALLOC_DEBUG enables assertions and other sanity checks, and disables
+    * inline functions.
+    */
 #  define MALLOC_DEBUG
+
+   /* MALLOC_STATS enables statistics calculation. */
+#  define MALLOC_STATS
 #endif
 
 /*
@@ -156,11 +163,6 @@ __FBSDID("$FreeBSD$");
 
 #include "un-namespace.h"
 
-/* MALLOC_STATS enables statistics calculation. */
-#ifndef MALLOC_PRODUCTION
-#  define MALLOC_STATS
-#endif
-
 #ifdef MALLOC_DEBUG
 #  ifdef NDEBUG
 #    undef NDEBUG
@@ -267,35 +269,40 @@ __FBSDID("$FreeBSD$");
 #define	SMALL_MAX_DEFAULT	(1U << SMALL_MAX_2POW_DEFAULT)
 
 /*
- * Maximum desired run header overhead.  Runs are sized as small as possible
- * such that this setting is still honored, without violating other constraints.
- * The goal is to make runs as small as possible without exceeding a per run
- * external fragmentation threshold.
+ * RUN_MAX_OVRHD indicates maximum desired run header overhead.  Runs are sized
+ * as small as possible such that this setting is still honored, without
+ * violating other constraints.  The goal is to make runs as small as possible
+ * without exceeding a per run external fragmentation threshold.
+ *
+ * We use binary fixed point math for overhead computations, where the binary
+ * point is implicitly RUN_BFP bits to the left.
  *
- * Note that it is possible to set this low enough that it cannot be honored
- * for some/all object sizes, since there is one bit of header overhead per
- * object (plus a constant).  In such cases, this constraint is relaxed.
+ * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
+ * honored for some/all object sizes, since there is one bit of header overhead
+ * per object (plus a constant).  This constraint is relaxed (ignored) for runs
+ * that are so small that the per-region overhead is greater than:
  *
- * RUN_MAX_OVRHD_RELAX specifies the maximum number of bits per region of
- * overhead for which RUN_MAX_OVRHD is relaxed.
+ *   (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
  */
-#define	RUN_MAX_OVRHD		0.015
-#define	RUN_MAX_OVRHD_RELAX	1.5
+#define	RUN_BFP			12
+/*                                    \/   Implicit binary fixed point. */
+#define	RUN_MAX_OVRHD		0x0000003dU
+#define	RUN_MAX_OVRHD_RELAX	0x00001800U
 
 /* Put a cap on small object run size.  This overrides RUN_MAX_OVRHD. */
 #define	RUN_MAX_SMALL_2POW	15
 #define	RUN_MAX_SMALL		(1U << RUN_MAX_SMALL_2POW)
 
 #ifdef MALLOC_LAZY_FREE
-/* Default size of each arena's lazy free cache. */
-#  define LAZY_FREE_2POW_DEFAULT	8
-/*
- * Number of pseudo-random probes to conduct before considering the cache to be
- * overly full.  It takes on average n probes to detect fullness of (n-1)/n.
- * However, we are effectively doing multiple non-independent trials (each
- * deallocation is a trial), so the actual average threshold for clearing the
- * cache is somewhat lower.
- */
+   /* Default size of each arena's lazy free cache. */
+#  define LAZY_FREE_2POW_DEFAULT 8
+   /*
+    * Number of pseudo-random probes to conduct before considering the cache to
+    * be overly full.  It takes on average n probes to detect fullness of
+    * (n-1)/n.  However, we are effectively doing multiple non-independent
+    * trials (each deallocation is a trial), so the actual average threshold
+    * for clearing the cache is somewhat lower.
+    */
 #  define LAZY_FREE_NPROBES	5
 #endif
 
@@ -323,20 +330,20 @@ __FBSDID("$FreeBSD$");
 #define	BLOCK_COST_2POW		4
 
 #ifdef MALLOC_BALANCE
-/*
- * We use an exponential moving average to track recent lock contention, where
- * the size of the history window is N, and alpha=2/(N+1).
- *
- * Due to integer math rounding, very small values here can cause substantial
- * degradation in accuracy, thus making the moving average decay faster than it
- * would with precise calculation.
- */
+   /*
+    * We use an exponential moving average to track recent lock contention,
+    * where the size of the history window is N, and alpha=2/(N+1).
+    *
+    * Due to integer math rounding, very small values here can cause
+    * substantial degradation in accuracy, thus making the moving average decay
+    * faster than it would with precise calculation.
+    */
 #  define BALANCE_ALPHA_INV_2POW	9
 
-/*
- * Threshold value for the exponential moving contention average at which to
- * re-assign a thread.
- */
+   /*
+    * Threshold value for the exponential moving contention average at which to
+    * re-assign a thread.
+    */
 #  define BALANCE_THRESHOLD_DEFAULT	(1U << (SPIN_LIMIT_2POW-4))
 #endif
 
@@ -2468,7 +2475,6 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
 	size_t try_run_size, good_run_size;
 	unsigned good_nregs, good_mask_nelms, good_reg0_offset;
 	unsigned try_nregs, try_mask_nelms, try_reg0_offset;
-	float max_ovrhd = RUN_MAX_OVRHD;
 
 	assert(min_run_size >= pagesize);
 	assert(min_run_size <= arena_maxclass);
@@ -2486,7 +2492,7 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
 	 */
 	try_run_size = min_run_size;
 	try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin->reg_size)
-	    + 1; /* Counter-act the first line of the loop. */
+	    + 1; /* Counter-act try_nregs-- in loop. */
 	do {
 		try_nregs--;
 		try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
@@ -2519,9 +2525,8 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
 		} while (sizeof(arena_run_t) + (sizeof(unsigned) *
 		    (try_mask_nelms - 1)) > try_reg0_offset);
 	} while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
-	    && max_ovrhd > RUN_MAX_OVRHD_RELAX / ((float)(bin->reg_size << 3))
-	    && ((float)(try_reg0_offset)) / ((float)(try_run_size)) >
-	    max_ovrhd);
+	    && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
+	    && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size);
 
 	assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
 	    <= good_reg0_offset);
-- 
cgit v1.1