From b7209126971ecb28bf4f6b1bbb24a078b590e35a Mon Sep 17 00:00:00 2001 From: jasone Date: Tue, 18 Dec 2007 05:27:57 +0000 Subject: Use fixed point integer math instead of floating point math when calculating run sizes. Use of the floating point unit was a potential pessimization to context switching for applications that do not otherwise use floating point math. [1] Reformat cpp macro-related comments to improve consistency. Submitted by: das --- lib/libc/stdlib/malloc.c | 89 +++++++++++++++++++++++++----------------------- 1 file changed, 47 insertions(+), 42 deletions(-) (limited to 'lib') diff --git a/lib/libc/stdlib/malloc.c b/lib/libc/stdlib/malloc.c index 3d1439d..bbd4bae 100644 --- a/lib/libc/stdlib/malloc.c +++ b/lib/libc/stdlib/malloc.c @@ -101,7 +101,14 @@ /* #define MALLOC_PRODUCTION */ #ifndef MALLOC_PRODUCTION + /* + * MALLOC_DEBUG enables assertions and other sanity checks, and disables + * inline functions. + */ # define MALLOC_DEBUG + + /* MALLOC_STATS enables statistics calculation. */ +# define MALLOC_STATS #endif /* @@ -156,11 +163,6 @@ __FBSDID("$FreeBSD$"); #include "un-namespace.h" -/* MALLOC_STATS enables statistics calculation. */ -#ifndef MALLOC_PRODUCTION -# define MALLOC_STATS -#endif - #ifdef MALLOC_DEBUG # ifdef NDEBUG # undef NDEBUG @@ -267,35 +269,40 @@ __FBSDID("$FreeBSD$"); #define SMALL_MAX_DEFAULT (1U << SMALL_MAX_2POW_DEFAULT) /* - * Maximum desired run header overhead. Runs are sized as small as possible - * such that this setting is still honored, without violating other constraints. - * The goal is to make runs as small as possible without exceeding a per run - * external fragmentation threshold. + * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized + * as small as possible such that this setting is still honored, without + * violating other constraints. The goal is to make runs as small as possible + * without exceeding a per run external fragmentation threshold. + * + * We use binary fixed point math for overhead computations, where the binary + * point is implicitly RUN_BFP bits to the left. * - * Note that it is possible to set this low enough that it cannot be honored - * for some/all object sizes, since there is one bit of header overhead per - * object (plus a constant). In such cases, this constraint is relaxed. + * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be + * honored for some/all object sizes, since there is one bit of header overhead + * per object (plus a constant). This constraint is relaxed (ignored) for runs + * that are so small that the per-region overhead is greater than: * - * RUN_MAX_OVRHD_RELAX specifies the maximum number of bits per region of - * overhead for which RUN_MAX_OVRHD is relaxed. + * (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP)) */ -#define RUN_MAX_OVRHD 0.015 -#define RUN_MAX_OVRHD_RELAX 1.5 +#define RUN_BFP 12 +/* \/ Implicit binary fixed point. */ +#define RUN_MAX_OVRHD 0x0000003dU +#define RUN_MAX_OVRHD_RELAX 0x00001800U /* Put a cap on small object run size. This overrides RUN_MAX_OVRHD. */ #define RUN_MAX_SMALL_2POW 15 #define RUN_MAX_SMALL (1U << RUN_MAX_SMALL_2POW) #ifdef MALLOC_LAZY_FREE -/* Default size of each arena's lazy free cache. */ -# define LAZY_FREE_2POW_DEFAULT 8 -/* - * Number of pseudo-random probes to conduct before considering the cache to be - * overly full. It takes on average n probes to detect fullness of (n-1)/n. - * However, we are effectively doing multiple non-independent trials (each - * deallocation is a trial), so the actual average threshold for clearing the - * cache is somewhat lower. - */ + /* Default size of each arena's lazy free cache. */ +# define LAZY_FREE_2POW_DEFAULT 8 + /* + * Number of pseudo-random probes to conduct before considering the cache to + * be overly full. It takes on average n probes to detect fullness of + * (n-1)/n. However, we are effectively doing multiple non-independent + * trials (each deallocation is a trial), so the actual average threshold + * for clearing the cache is somewhat lower. + */ # define LAZY_FREE_NPROBES 5 #endif @@ -323,20 +330,20 @@ __FBSDID("$FreeBSD$"); #define BLOCK_COST_2POW 4 #ifdef MALLOC_BALANCE -/* - * We use an exponential moving average to track recent lock contention, where - * the size of the history window is N, and alpha=2/(N+1). - * - * Due to integer math rounding, very small values here can cause substantial - * degradation in accuracy, thus making the moving average decay faster than it - * would with precise calculation. - */ + /* + * We use an exponential moving average to track recent lock contention, + * where the size of the history window is N, and alpha=2/(N+1). + * + * Due to integer math rounding, very small values here can cause + * substantial degradation in accuracy, thus making the moving average decay + * faster than it would with precise calculation. + */ # define BALANCE_ALPHA_INV_2POW 9 -/* - * Threshold value for the exponential moving contention average at which to - * re-assign a thread. - */ + /* + * Threshold value for the exponential moving contention average at which to + * re-assign a thread. + */ # define BALANCE_THRESHOLD_DEFAULT (1U << (SPIN_LIMIT_2POW-4)) #endif @@ -2468,7 +2475,6 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size) size_t try_run_size, good_run_size; unsigned good_nregs, good_mask_nelms, good_reg0_offset; unsigned try_nregs, try_mask_nelms, try_reg0_offset; - float max_ovrhd = RUN_MAX_OVRHD; assert(min_run_size >= pagesize); assert(min_run_size <= arena_maxclass); @@ -2486,7 +2492,7 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size) */ try_run_size = min_run_size; try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin->reg_size) - + 1; /* Counter-act the first line of the loop. */ + + 1; /* Counter-act try_nregs-- in loop. */ do { try_nregs--; try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) + @@ -2519,9 +2525,8 @@ arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size) } while (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1)) > try_reg0_offset); } while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL - && max_ovrhd > RUN_MAX_OVRHD_RELAX / ((float)(bin->reg_size << 3)) - && ((float)(try_reg0_offset)) / ((float)(try_run_size)) > - max_ovrhd); + && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX + && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size); assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1)) <= good_reg0_offset); -- cgit v1.1