summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorjasone <jasone@FreeBSD.org>2007-12-18 05:27:57 +0000
committerjasone <jasone@FreeBSD.org>2007-12-18 05:27:57 +0000
commitb7209126971ecb28bf4f6b1bbb24a078b590e35a (patch)
treeaca6b02fa9920445126f5c8e6a18e2c0564bfe30 /lib
parent3b94f3069c6b52c0da16f2398f27fd474010f7a4 (diff)
downloadFreeBSD-src-b7209126971ecb28bf4f6b1bbb24a078b590e35a.zip
FreeBSD-src-b7209126971ecb28bf4f6b1bbb24a078b590e35a.tar.gz
Use fixed point integer math instead of floating point math when
calculating run sizes. Use of the floating point unit was a potential pessimization to context switching for applications that do not otherwise use floating point math. [1] Reformat cpp macro-related comments to improve consistency. Submitted by: das
Diffstat (limited to 'lib')
-rw-r--r--lib/libc/stdlib/malloc.c89
1 files changed, 47 insertions, 42 deletions
diff --git a/lib/libc/stdlib/malloc.c b/lib/libc/stdlib/malloc.c
index 3d1439d..bbd4bae 100644
--- a/lib/libc/stdlib/malloc.c
+++ b/lib/libc/stdlib/malloc.c
@@ -101,7 +101,14 @@
/* #define MALLOC_PRODUCTION */
#ifndef MALLOC_PRODUCTION
+ /*
+ * MALLOC_DEBUG enables assertions and other sanity checks, and disables
+ * inline functions.
+ */
# define MALLOC_DEBUG
+
+ /* MALLOC_STATS enables statistics calculation. */
+# define MALLOC_STATS
#endif
/*
@@ -156,11 +163,6 @@ __FBSDID("$FreeBSD$");
#include "un-namespace.h"
-/* MALLOC_STATS enables statistics calculation. */
-#ifndef MALLOC_PRODUCTION
-# define MALLOC_STATS
-#endif
-
#ifdef MALLOC_DEBUG
# ifdef NDEBUG
# undef NDEBUG
@@ -267,35 +269,40 @@ __FBSDID("$FreeBSD$");
#define SMALL_MAX_DEFAULT (1U << SMALL_MAX_2POW_DEFAULT)
/*
- * Maximum desired run header overhead. Runs are sized as small as possible
- * such that this setting is still honored, without violating other constraints.
- * The goal is to make runs as small as possible without exceeding a per run
- * external fragmentation threshold.
+ * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized
+ * as small as possible such that this setting is still honored, without
+ * violating other constraints. The goal is to make runs as small as possible
+ * without exceeding a per run external fragmentation threshold.
+ *
+ * We use binary fixed point math for overhead computations, where the binary
+ * point is implicitly RUN_BFP bits to the left.
*
- * Note that it is possible to set this low enough that it cannot be honored
- * for some/all object sizes, since there is one bit of header overhead per
- * object (plus a constant). In such cases, this constraint is relaxed.
+ * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
+ * honored for some/all object sizes, since there is one bit of header overhead
+ * per object (plus a constant). This constraint is relaxed (ignored) for runs
+ * that are so small that the per-region overhead is greater than:
*
- * RUN_MAX_OVRHD_RELAX specifies the maximum number of bits per region of
- * overhead for which RUN_MAX_OVRHD is relaxed.
+ * (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
*/
-#define RUN_MAX_OVRHD 0.015
-#define RUN_MAX_OVRHD_RELAX 1.5
+#define RUN_BFP 12
+/* \/ Implicit binary fixed point. */
+#define RUN_MAX_OVRHD 0x0000003dU
+#define RUN_MAX_OVRHD_RELAX 0x00001800U
/* Put a cap on small object run size. This overrides RUN_MAX_OVRHD. */
#define RUN_MAX_SMALL_2POW 15
#define RUN_MAX_SMALL (1U << RUN_MAX_SMALL_2POW)
#ifdef MALLOC_LAZY_FREE
-/* Default size of each arena's lazy free cache. */
-# define LAZY_FREE_2POW_DEFAULT 8
-/*
- * Number of pseudo-random probes to conduct before considering the cache to be
- * overly full. It takes on average n probes to detect fullness of (n-1)/n.
- * However, we are effectively doing multiple non-independent trials (each
- * deallocation is a trial), so the actual average threshold for clearing the
- * cache is somewhat lower.
- */
+ /* Default size of each arena's lazy free cache. */
+# define LAZY_FREE_2POW_DEFAULT 8
+ /*
+ * Number of pseudo-random probes to conduct before considering the cache to
+ * be overly full. It takes on average n probes to detect fullness of
+ * (n-1)/n. However, we are effectively doing multiple non-independent
+ * trials (each deallocation is a trial), so the actual average threshold
+ * for clearing the cache is somewhat lower.
+ */
# define LAZY_FREE_NPROBES 5
#endif
@@ -323,20 +330,20 @@ __FBSDID("$FreeBSD$");
#define BLOCK_COST_2POW 4
#ifdef MALLOC_BALANCE
-/*
- * We use an exponential moving average to track recent lock contention, where
- * the size of the history window is N, and alpha=2/(N+1).
- *
- * Due to integer math rounding, very small values here can cause substantial
- * degradation in accuracy, thus making the moving average decay faster than it
- * would with precise calculation.
- */
+ /*
+ * We use an exponential moving average to track recent lock contention,
+ * where the size of the history window is N, and alpha=2/(N+1).
+ *
+ * Due to integer math rounding, very small values here can cause
+ * substantial degradation in accuracy, thus making the moving average decay
+ * faster than it would with precise calculation.
+ */
# define BALANCE_ALPHA_INV_2POW 9
-/*
- * Threshold value for the exponential moving contention average at which to
- * re-assign a thread.
- */
+ /*
+ * Threshold value for the exponential moving contention average at which to
+ * re-assign a thread.
+ */
# define BALANCE_THRESHOLD_DEFAULT (1U << (SPIN_LIMIT_2POW-4))
#endif
@@ -2468,7 +2475,6 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
size_t try_run_size, good_run_size;
unsigned good_nregs, good_mask_nelms, good_reg0_offset;
unsigned try_nregs, try_mask_nelms, try_reg0_offset;
- float max_ovrhd = RUN_MAX_OVRHD;
assert(min_run_size >= pagesize);
assert(min_run_size <= arena_maxclass);
@@ -2486,7 +2492,7 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
*/
try_run_size = min_run_size;
try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin->reg_size)
- + 1; /* Counter-act the first line of the loop. */
+ + 1; /* Counter-act try_nregs-- in loop. */
do {
try_nregs--;
try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
@@ -2519,9 +2525,8 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
} while (sizeof(arena_run_t) + (sizeof(unsigned) *
(try_mask_nelms - 1)) > try_reg0_offset);
} while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
- && max_ovrhd > RUN_MAX_OVRHD_RELAX / ((float)(bin->reg_size << 3))
- && ((float)(try_reg0_offset)) / ((float)(try_run_size)) >
- max_ovrhd);
+ && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
+ && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size);
assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
<= good_reg0_offset);
OpenPOWER on IntegriCloud