diff options
Diffstat (limited to 'sys/contrib/octeon-sdk/cvmx-malloc/malloc.c')
-rw-r--r-- | sys/contrib/octeon-sdk/cvmx-malloc/malloc.c | 4106 |
1 files changed, 4106 insertions, 0 deletions
diff --git a/sys/contrib/octeon-sdk/cvmx-malloc/malloc.c b/sys/contrib/octeon-sdk/cvmx-malloc/malloc.c new file mode 100644 index 0000000..222ad5d --- /dev/null +++ b/sys/contrib/octeon-sdk/cvmx-malloc/malloc.c @@ -0,0 +1,4106 @@ +/* +Copyright (c) 2001 Wolfram Gloger +Copyright (c) 2006 Cavium networks + +Permission to use, copy, modify, distribute, and sell this software +and its documentation for any purpose is hereby granted without fee, +provided that (i) the above copyright notices and this permission +notice appear in all copies of the software and related documentation, +and (ii) the name of Wolfram Gloger may not be used in any advertising +or publicity relating to the software. + +THE SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, +EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY +WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +IN NO EVENT SHALL WOLFRAM GLOGER BE LIABLE FOR ANY SPECIAL, +INCIDENTAL, INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, OR ANY +DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY +OF LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THIS SOFTWARE. +*/ + +/* + This is a version (aka ptmalloc2) of malloc/free/realloc written by + Doug Lea and adapted to multiple threads/arenas by Wolfram Gloger. + +* Version ptmalloc2-20011215 + $Id: malloc.c 30481 2007-12-05 21:46:59Z rfranz $ + based on: + VERSION 2.7.1pre1 Sat May 12 07:41:21 2001 Doug Lea (dl at gee) + + Note: There may be an updated version of this malloc obtainable at + http://www.malloc.de/malloc/ptmalloc2.tar.gz + Check before installing! + +* Quickstart + + In order to compile this implementation, a Makefile is provided with + the ptmalloc2 distribution, which has pre-defined targets for some + popular systems (e.g. "make posix" for Posix threads). All that is + typically required with regard to compiler flags is the selection of + the thread package via defining one out of USE_PTHREADS, USE_THR or + USE_SPROC. Check the thread-m.h file for what effects this has. + Many/most systems will additionally require USE_TSD_DATA_HACK to be + defined, so this is the default for "make posix". + +* Why use this malloc? + + This is not the fastest, most space-conserving, most portable, or + most tunable malloc ever written. However it is among the fastest + while also being among the most space-conserving, portable and tunable. + Consistent balance across these factors results in a good general-purpose + allocator for malloc-intensive programs. + + The main properties of the algorithms are: + * For large (>= 512 bytes) requests, it is a pure best-fit allocator, + with ties normally decided via FIFO (i.e. least recently used). + * For small (<= 64 bytes by default) requests, it is a caching + allocator, that maintains pools of quickly recycled chunks. + * In between, and for combinations of large and small requests, it does + the best it can trying to meet both goals at once. + * For very large requests (>= 128KB by default), it relies on system + memory mapping facilities, if supported. + + For a longer but slightly out of date high-level description, see + http://gee.cs.oswego.edu/dl/html/malloc.html + + You may already by default be using a C library containing a malloc + that is based on some version of this malloc (for example in + linux). You might still want to use the one in this file in order to + customize settings or to avoid overheads associated with library + versions. + +* Contents, described in more detail in "description of public routines" below. + + Standard (ANSI/SVID/...) functions: + malloc(size_t n); + calloc(size_t n_elements, size_t element_size); + free(Void_t* p); + realloc(Void_t* p, size_t n); + memalign(size_t alignment, size_t n); + valloc(size_t n); + mallinfo() + mallopt(int parameter_number, int parameter_value) + + Additional functions: + independent_calloc(size_t n_elements, size_t size, Void_t* chunks[]); + independent_comalloc(size_t n_elements, size_t sizes[], Void_t* chunks[]); + pvalloc(size_t n); + cfree(Void_t* p); + malloc_trim(size_t pad); + malloc_usable_size(Void_t* p); + malloc_stats(); + +* Vital statistics: + + Supported pointer representation: 4 or 8 bytes + Supported size_t representation: 4 or 8 bytes + Note that size_t is allowed to be 4 bytes even if pointers are 8. + You can adjust this by defining INTERNAL_SIZE_T + + Alignment: 2 * sizeof(size_t) (default) + (i.e., 8 byte alignment with 4byte size_t). This suffices for + nearly all current machines and C compilers. However, you can + define MALLOC_ALIGNMENT to be wider than this if necessary. + + Minimum overhead per allocated chunk: 4 or 8 bytes + Each malloced chunk has a hidden word of overhead holding size + and status information. + + Minimum allocated size: 4-byte ptrs: 16 bytes (including 4 overhead) + 8-byte ptrs: 24/32 bytes (including, 4/8 overhead) + + When a chunk is freed, 12 (for 4byte ptrs) or 20 (for 8 byte + ptrs but 4 byte size) or 24 (for 8/8) additional bytes are + needed; 4 (8) for a trailing size field and 8 (16) bytes for + free list pointers. Thus, the minimum allocatable size is + 16/24/32 bytes. + + Even a request for zero bytes (i.e., malloc(0)) returns a + pointer to something of the minimum allocatable size. + + The maximum overhead wastage (i.e., number of extra bytes + allocated than were requested in malloc) is less than or equal + to the minimum size, except for requests >= mmap_threshold that + are serviced via mmap(), where the worst case wastage is 2 * + sizeof(size_t) bytes plus the remainder from a system page (the + minimal mmap unit); typically 4096 or 8192 bytes. + + Maximum allocated size: 4-byte size_t: 2^32 minus about two pages + 8-byte size_t: 2^64 minus about two pages + + It is assumed that (possibly signed) size_t values suffice to + represent chunk sizes. `Possibly signed' is due to the fact + that `size_t' may be defined on a system as either a signed or + an unsigned type. The ISO C standard says that it must be + unsigned, but a few systems are known not to adhere to this. + Additionally, even when size_t is unsigned, sbrk (which is by + default used to obtain memory from system) accepts signed + arguments, and may not be able to handle size_t-wide arguments + with negative sign bit. Generally, values that would + appear as negative after accounting for overhead and alignment + are supported only via mmap(), which does not have this + limitation. + + Requests for sizes outside the allowed range will perform an optional + failure action and then return null. (Requests may also + also fail because a system is out of memory.) + + Thread-safety: thread-safe unless NO_THREADS is defined + + Compliance: I believe it is compliant with the 1997 Single Unix Specification + (See http://www.opennc.org). Also SVID/XPG, ANSI C, and probably + others as well. + +* Synopsis of compile-time options: + + People have reported using previous versions of this malloc on all + versions of Unix, sometimes by tweaking some of the defines + below. It has been tested most extensively on Solaris and + Linux. It is also reported to work on WIN32 platforms. + People also report using it in stand-alone embedded systems. + + The implementation is in straight, hand-tuned ANSI C. It is not + at all modular. (Sorry!) It uses a lot of macros. To be at all + usable, this code should be compiled using an optimizing compiler + (for example gcc -O3) that can simplify expressions and control + paths. (FAQ: some macros import variables as arguments rather than + declare locals because people reported that some debuggers + otherwise get confused.) + + OPTION DEFAULT VALUE + + Compilation Environment options: + + __STD_C derived from C compiler defines + WIN32 NOT defined + HAVE_MEMCPY defined + USE_MEMCPY 1 if HAVE_MEMCPY is defined + HAVE_MMAP defined as 1 + MMAP_CLEARS 1 + HAVE_MREMAP 0 unless linux defined + USE_ARENAS the same as HAVE_MMAP + malloc_getpagesize derived from system #includes, or 4096 if not + HAVE_USR_INCLUDE_MALLOC_H NOT defined + LACKS_UNISTD_H NOT defined unless WIN32 + LACKS_SYS_PARAM_H NOT defined unless WIN32 + LACKS_SYS_MMAN_H NOT defined unless WIN32 + + Changing default word sizes: + + INTERNAL_SIZE_T size_t + MALLOC_ALIGNMENT 2 * sizeof(INTERNAL_SIZE_T) + + Configuration and functionality options: + + USE_DL_PREFIX NOT defined + USE_PUBLIC_MALLOC_WRAPPERS NOT defined + USE_MALLOC_LOCK NOT defined + MALLOC_DEBUG NOT defined + REALLOC_ZERO_BYTES_FREES 1 + MALLOC_FAILURE_ACTION errno = ENOMEM, if __STD_C defined, else no-op + TRIM_FASTBINS 0 + FIRST_SORTED_BIN_SIZE 512 + + Options for customizing MORECORE: + + MORECORE sbrk + MORECORE_FAILURE -1 + MORECORE_CONTIGUOUS 1 + MORECORE_CANNOT_TRIM NOT defined + MORECORE_CLEARS 1 + MMAP_AS_MORECORE_SIZE (1024 * 1024) + + Tuning options that are also dynamically changeable via mallopt: + + DEFAULT_MXFAST 64 + DEFAULT_TRIM_THRESHOLD 128 * 1024 + DEFAULT_TOP_PAD 0 + DEFAULT_MMAP_THRESHOLD 128 * 1024 + DEFAULT_MMAP_MAX 65536 + + There are several other #defined constants and macros that you + probably don't want to touch unless you are extending or adapting malloc. */ + +/* + __STD_C should be nonzero if using ANSI-standard C compiler, a C++ + compiler, or a C compiler sufficiently close to ANSI to get away + with it. +*/ + +#include "cvmx-config.h" +#include "cvmx.h" +#include "cvmx-spinlock.h" +#include "cvmx-malloc.h" + + +#ifndef __STD_C +#if defined(__STDC__) || defined(__cplusplus) +#define __STD_C 1 +#else +#define __STD_C 0 +#endif +#endif /*__STD_C*/ + + +/* + Void_t* is the pointer type that malloc should say it returns +*/ + +#ifndef Void_t +#if 1 +#define Void_t void +#else +#define Void_t char +#endif +#endif /*Void_t*/ + + +#ifdef __cplusplus +extern "C" { +#endif + +/* define LACKS_UNISTD_H if your system does not have a <unistd.h>. */ + +/* #define LACKS_UNISTD_H */ + +#ifndef LACKS_UNISTD_H +#include <unistd.h> +#endif + +/* define LACKS_SYS_PARAM_H if your system does not have a <sys/param.h>. */ + +/* #define LACKS_SYS_PARAM_H */ + + +#include <stdio.h> /* needed for malloc_stats */ +#include <errno.h> /* needed for optional MALLOC_FAILURE_ACTION */ + + +/* + Debugging: + + Because freed chunks may be overwritten with bookkeeping fields, this + malloc will often die when freed memory is overwritten by user + programs. This can be very effective (albeit in an annoying way) + in helping track down dangling pointers. + + If you compile with -DMALLOC_DEBUG, a number of assertion checks are + enabled that will catch more memory errors. You probably won't be + able to make much sense of the actual assertion errors, but they + should help you locate incorrectly overwritten memory. The checking + is fairly extensive, and will slow down execution + noticeably. Calling malloc_stats or mallinfo with MALLOC_DEBUG set + will attempt to check every non-mmapped allocated and free chunk in + the course of computing the summmaries. (By nature, mmapped regions + cannot be checked very much automatically.) + + Setting MALLOC_DEBUG may also be helpful if you are trying to modify + this code. The assertions in the check routines spell out in more + detail the assumptions and invariants underlying the algorithms. + + Setting MALLOC_DEBUG does NOT provide an automated mechanism for + checking that all accesses to malloced memory stay within their + bounds. However, there are several add-ons and adaptations of this + or other mallocs available that do this. +*/ + +#define MALLOC_DEBUG 1 +#if MALLOC_DEBUG +#include <assert.h> +#else +#define assert(x) ((void)0) +#endif + + +/* + INTERNAL_SIZE_T is the word-size used for internal bookkeeping + of chunk sizes. + + The default version is the same as size_t. + + While not strictly necessary, it is best to define this as an + unsigned type, even if size_t is a signed type. This may avoid some + artificial size limitations on some systems. + + On a 64-bit machine, you may be able to reduce malloc overhead by + defining INTERNAL_SIZE_T to be a 32 bit `unsigned int' at the + expense of not being able to handle more than 2^32 of malloced + space. If this limitation is acceptable, you are encouraged to set + this unless you are on a platform requiring 16byte alignments. In + this case the alignment requirements turn out to negate any + potential advantages of decreasing size_t word size. + + Implementors: Beware of the possible combinations of: + - INTERNAL_SIZE_T might be signed or unsigned, might be 32 or 64 bits, + and might be the same width as int or as long + - size_t might have different width and signedness as INTERNAL_SIZE_T + - int and long might be 32 or 64 bits, and might be the same width + To deal with this, most comparisons and difference computations + among INTERNAL_SIZE_Ts should cast them to unsigned long, being + aware of the fact that casting an unsigned int to a wider long does + not sign-extend. (This also makes checking for negative numbers + awkward.) Some of these casts result in harmless compiler warnings + on some systems. +*/ + +#ifndef INTERNAL_SIZE_T +#define INTERNAL_SIZE_T size_t +#endif + +/* The corresponding word size */ +#define SIZE_SZ (sizeof(INTERNAL_SIZE_T)) + + +/* + MALLOC_ALIGNMENT is the minimum alignment for malloc'ed chunks. + It must be a power of two at least 2 * SIZE_SZ, even on machines + for which smaller alignments would suffice. It may be defined as + larger than this though. Note however that code and data structures + are optimized for the case of 8-byte alignment. +*/ + + +#ifndef MALLOC_ALIGNMENT +#define MALLOC_ALIGNMENT (2 * SIZE_SZ) +#endif + +/* The corresponding bit mask value */ +#define MALLOC_ALIGN_MASK (MALLOC_ALIGNMENT - 1) + + + +/* + REALLOC_ZERO_BYTES_FREES should be set if a call to + realloc with zero bytes should be the same as a call to free. + This is required by the C standard. Otherwise, since this malloc + returns a unique pointer for malloc(0), so does realloc(p, 0). +*/ + +#ifndef REALLOC_ZERO_BYTES_FREES +#define REALLOC_ZERO_BYTES_FREES 1 +#endif + +/* + TRIM_FASTBINS controls whether free() of a very small chunk can + immediately lead to trimming. Setting to true (1) can reduce memory + footprint, but will almost always slow down programs that use a lot + of small chunks. + + Define this only if you are willing to give up some speed to more + aggressively reduce system-level memory footprint when releasing + memory in programs that use many small chunks. You can get + essentially the same effect by setting MXFAST to 0, but this can + lead to even greater slowdowns in programs using many small chunks. + TRIM_FASTBINS is an in-between compile-time option, that disables + only those chunks bordering topmost memory from being placed in + fastbins. +*/ + +#ifndef TRIM_FASTBINS +#define TRIM_FASTBINS 0 +#endif + + +/* + USE_DL_PREFIX will prefix all public routines with the string 'dl'. + This is necessary when you only want to use this malloc in one part + of a program, using your regular system malloc elsewhere. +*/ + +#define USE_DL_PREFIX + + +/* + Two-phase name translation. + All of the actual routines are given mangled names. + When wrappers are used, they become the public callable versions. + When DL_PREFIX is used, the callable names are prefixed. +*/ + +#ifdef USE_DL_PREFIX +#define public_cALLOc cvmx_calloc +#define public_fREe cvmx_free +#define public_cFREe dlcfree +#define public_mALLOc cvmx_malloc +#define public_mEMALIGn cvmx_memalign +#define public_rEALLOc cvmx_realloc +#define public_vALLOc dlvalloc +#define public_pVALLOc dlpvalloc +#define public_mALLINFo dlmallinfo +#define public_mALLOPt dlmallopt +#define public_mTRIm dlmalloc_trim +#define public_mSTATs dlmalloc_stats +#define public_mUSABLe dlmalloc_usable_size +#define public_iCALLOc dlindependent_calloc +#define public_iCOMALLOc dlindependent_comalloc +#define public_gET_STATe dlget_state +#define public_sET_STATe dlset_state +#else /* USE_DL_PREFIX */ +#ifdef _LIBC +#error _LIBC defined and should not be +/* Special defines for the GNU C library. */ +#define public_cALLOc __libc_calloc +#define public_fREe __libc_free +#define public_cFREe __libc_cfree +#define public_mALLOc __libc_malloc +#define public_mEMALIGn __libc_memalign +#define public_rEALLOc __libc_realloc +#define public_vALLOc __libc_valloc +#define public_pVALLOc __libc_pvalloc +#define public_mALLINFo __libc_mallinfo +#define public_mALLOPt __libc_mallopt +#define public_mTRIm __malloc_trim +#define public_mSTATs __malloc_stats +#define public_mUSABLe __malloc_usable_size +#define public_iCALLOc __libc_independent_calloc +#define public_iCOMALLOc __libc_independent_comalloc +#define public_gET_STATe __malloc_get_state +#define public_sET_STATe __malloc_set_state +#define malloc_getpagesize __getpagesize() +#define open __open +#define mmap __mmap +#define munmap __munmap +#define mremap __mremap +#define mprotect __mprotect +#define MORECORE (*__morecore) +#define MORECORE_FAILURE 0 + +Void_t * __default_morecore (ptrdiff_t); +Void_t *(*__morecore)(ptrdiff_t) = __default_morecore; + +#else /* !_LIBC */ +#define public_cALLOc calloc +#define public_fREe free +#define public_cFREe cfree +#define public_mALLOc malloc +#define public_mEMALIGn memalign +#define public_rEALLOc realloc +#define public_vALLOc valloc +#define public_pVALLOc pvalloc +#define public_mALLINFo mallinfo +#define public_mALLOPt mallopt +#define public_mTRIm malloc_trim +#define public_mSTATs malloc_stats +#define public_mUSABLe malloc_usable_size +#define public_iCALLOc independent_calloc +#define public_iCOMALLOc independent_comalloc +#define public_gET_STATe malloc_get_state +#define public_sET_STATe malloc_set_state +#endif /* _LIBC */ +#endif /* USE_DL_PREFIX */ + + +/* + HAVE_MEMCPY should be defined if you are not otherwise using + ANSI STD C, but still have memcpy and memset in your C library + and want to use them in calloc and realloc. Otherwise simple + macro versions are defined below. + + USE_MEMCPY should be defined as 1 if you actually want to + have memset and memcpy called. People report that the macro + versions are faster than libc versions on some systems. + + Even if USE_MEMCPY is set to 1, loops to copy/clear small chunks + (of <= 36 bytes) are manually unrolled in realloc and calloc. +*/ + +#define HAVE_MEMCPY + +#ifndef USE_MEMCPY +#ifdef HAVE_MEMCPY +#define USE_MEMCPY 1 +#else +#define USE_MEMCPY 0 +#endif +#endif + + +#if (__STD_C || defined(HAVE_MEMCPY)) + +#ifdef WIN32 +/* On Win32 memset and memcpy are already declared in windows.h */ +#else +#if __STD_C +void* memset(void*, int, size_t); +void* memcpy(void*, const void*, size_t); +#else +Void_t* memset(); +Void_t* memcpy(); +#endif +#endif +#endif + +/* + MALLOC_FAILURE_ACTION is the action to take before "return 0" when + malloc fails to be able to return memory, either because memory is + exhausted or because of illegal arguments. + + By default, sets errno if running on STD_C platform, else does nothing. +*/ + +#ifndef MALLOC_FAILURE_ACTION +#if __STD_C +#define MALLOC_FAILURE_ACTION \ + errno = ENOMEM; + +#else +#define MALLOC_FAILURE_ACTION +#endif +#endif + +/* + MORECORE-related declarations. By default, rely on sbrk +*/ + + +#ifdef LACKS_UNISTD_H +#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) +#if __STD_C +extern Void_t* sbrk(ptrdiff_t); +#else +extern Void_t* sbrk(); +#endif +#endif +#endif + +/* + MORECORE is the name of the routine to call to obtain more memory + from the system. See below for general guidance on writing + alternative MORECORE functions, as well as a version for WIN32 and a + sample version for pre-OSX macos. +*/ +#undef MORECORE // not supported +#ifndef MORECORE +#define MORECORE notsupported +#endif + +/* + MORECORE_FAILURE is the value returned upon failure of MORECORE + as well as mmap. Since it cannot be an otherwise valid memory address, + and must reflect values of standard sys calls, you probably ought not + try to redefine it. +*/ + +#ifndef MORECORE_FAILURE +#define MORECORE_FAILURE (-1) +#endif + +/* + If MORECORE_CONTIGUOUS is true, take advantage of fact that + consecutive calls to MORECORE with positive arguments always return + contiguous increasing addresses. This is true of unix sbrk. Even + if not defined, when regions happen to be contiguous, malloc will + permit allocations spanning regions obtained from different + calls. But defining this when applicable enables some stronger + consistency checks and space efficiencies. +*/ + +#ifndef MORECORE_CONTIGUOUS +#define MORECORE_CONTIGUOUS 0 +#endif + +/* + Define MORECORE_CANNOT_TRIM if your version of MORECORE + cannot release space back to the system when given negative + arguments. This is generally necessary only if you are using + a hand-crafted MORECORE function that cannot handle negative arguments. +*/ + +#define MORECORE_CANNOT_TRIM 1 + +/* MORECORE_CLEARS (default 1) + The degree to which the routine mapped to MORECORE zeroes out + memory: never (0), only for newly allocated space (1) or always + (2). The distinction between (1) and (2) is necessary because on + some systems, if the application first decrements and then + increments the break value, the contents of the reallocated space + are unspecified. +*/ + +#ifndef MORECORE_CLEARS +#define MORECORE_CLEARS 0 +#endif + + +/* + Define HAVE_MMAP as true to optionally make malloc() use mmap() to + allocate very large blocks. These will be returned to the + operating system immediately after a free(). Also, if mmap + is available, it is used as a backup strategy in cases where + MORECORE fails to provide space from system. + + This malloc is best tuned to work with mmap for large requests. + If you do not have mmap, operations involving very large chunks (1MB + or so) may be slower than you'd like. +*/ + +#undef HAVE_MMAP +#ifndef HAVE_MMAP +#define HAVE_MMAP 0 + +/* + Standard unix mmap using /dev/zero clears memory so calloc doesn't + need to. +*/ + +#ifndef MMAP_CLEARS +#define MMAP_CLEARS 0 +#endif + +#else /* no mmap */ +#ifndef MMAP_CLEARS +#define MMAP_CLEARS 0 +#endif +#endif + + +/* + MMAP_AS_MORECORE_SIZE is the minimum mmap size argument to use if + sbrk fails, and mmap is used as a backup (which is done only if + HAVE_MMAP). The value must be a multiple of page size. This + backup strategy generally applies only when systems have "holes" in + address space, so sbrk cannot perform contiguous expansion, but + there is still space available on system. On systems for which + this is known to be useful (i.e. most linux kernels), this occurs + only when programs allocate huge amounts of memory. Between this, + and the fact that mmap regions tend to be limited, the size should + be large, to avoid too many mmap calls and thus avoid running out + of kernel resources. +*/ + +#ifndef MMAP_AS_MORECORE_SIZE +#define MMAP_AS_MORECORE_SIZE (1024 * 1024) +#endif + +/* + Define HAVE_MREMAP to make realloc() use mremap() to re-allocate + large blocks. This is currently only possible on Linux with + kernel versions newer than 1.3.77. +*/ +#undef linux +#ifndef HAVE_MREMAP +#ifdef linux +#define HAVE_MREMAP 1 +#else +#define HAVE_MREMAP 0 +#endif + +#endif /* HAVE_MMAP */ + +/* Define USE_ARENAS to enable support for multiple `arenas'. These + are allocated using mmap(), are necessary for threads and + occasionally useful to overcome address space limitations affecting + sbrk(). */ + +#ifndef USE_ARENAS +#define USE_ARENAS 1 // we 'manually' mmap the arenas..... +#endif + + +/* + The system page size. To the extent possible, this malloc manages + memory from the system in page-size units. Note that this value is + cached during initialization into a field of malloc_state. So even + if malloc_getpagesize is a function, it is only called once. + + The following mechanics for getpagesize were adapted from bsd/gnu + getpagesize.h. If none of the system-probes here apply, a value of + 4096 is used, which should be OK: If they don't apply, then using + the actual value probably doesn't impact performance. +*/ + + +#define malloc_getpagesize (4096) +#ifndef malloc_getpagesize + +#ifndef LACKS_UNISTD_H +# include <unistd.h> +#endif + +# ifdef _SC_PAGESIZE /* some SVR4 systems omit an underscore */ +# ifndef _SC_PAGE_SIZE +# define _SC_PAGE_SIZE _SC_PAGESIZE +# endif +# endif + +# ifdef _SC_PAGE_SIZE +# define malloc_getpagesize sysconf(_SC_PAGE_SIZE) +# else +# if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE) + extern size_t getpagesize(); +# define malloc_getpagesize getpagesize() +# else +# ifdef WIN32 /* use supplied emulation of getpagesize */ +# define malloc_getpagesize getpagesize() +# else +# ifndef LACKS_SYS_PARAM_H +# include <sys/param.h> +# endif +# ifdef EXEC_PAGESIZE +# define malloc_getpagesize EXEC_PAGESIZE +# else +# ifdef NBPG +# ifndef CLSIZE +# define malloc_getpagesize NBPG +# else +# define malloc_getpagesize (NBPG * CLSIZE) +# endif +# else +# ifdef NBPC +# define malloc_getpagesize NBPC +# else +# ifdef PAGESIZE +# define malloc_getpagesize PAGESIZE +# else /* just guess */ +# define malloc_getpagesize (4096) +# endif +# endif +# endif +# endif +# endif +# endif +# endif +#endif + +/* + This version of malloc supports the standard SVID/XPG mallinfo + routine that returns a struct containing usage properties and + statistics. It should work on any SVID/XPG compliant system that has + a /usr/include/malloc.h defining struct mallinfo. (If you'd like to + install such a thing yourself, cut out the preliminary declarations + as described above and below and save them in a malloc.h file. But + there's no compelling reason to bother to do this.) + + The main declaration needed is the mallinfo struct that is returned + (by-copy) by mallinfo(). The SVID/XPG malloinfo struct contains a + bunch of fields that are not even meaningful in this version of + malloc. These fields are are instead filled by mallinfo() with + other numbers that might be of interest. + + HAVE_USR_INCLUDE_MALLOC_H should be set if you have a + /usr/include/malloc.h file that includes a declaration of struct + mallinfo. If so, it is included; else an SVID2/XPG2 compliant + version is declared below. These must be precisely the same for + mallinfo() to work. The original SVID version of this struct, + defined on most systems with mallinfo, declares all fields as + ints. But some others define as unsigned long. If your system + defines the fields using a type of different width than listed here, + you must #include your system version and #define + HAVE_USR_INCLUDE_MALLOC_H. +*/ + +/* #define HAVE_USR_INCLUDE_MALLOC_H */ + +#ifdef HAVE_USR_INCLUDE_MALLOC_H +#include "/usr/include/malloc.h" +#endif + + +/* ---------- description of public routines ------------ */ + +/* + malloc(size_t n) + Returns a pointer to a newly allocated chunk of at least n bytes, or null + if no space is available. Additionally, on failure, errno is + set to ENOMEM on ANSI C systems. + + If n is zero, malloc returns a minumum-sized chunk. (The minimum + size is 16 bytes on most 32bit systems, and 24 or 32 bytes on 64bit + systems.) On most systems, size_t is an unsigned type, so calls + with negative arguments are interpreted as requests for huge amounts + of space, which will often fail. The maximum supported value of n + differs across systems, but is in all cases less than the maximum + representable value of a size_t. +*/ +#if __STD_C +Void_t* public_mALLOc(cvmx_arena_list_t arena_list, size_t); +#else +Void_t* public_mALLOc(); +#endif + +/* + free(Void_t* p) + Releases the chunk of memory pointed to by p, that had been previously + allocated using malloc or a related routine such as realloc. + It has no effect if p is null. It can have arbitrary (i.e., bad!) + effects if p has already been freed. + + Unless disabled (using mallopt), freeing very large spaces will + when possible, automatically trigger operations that give + back unused memory to the system, thus reducing program footprint. +*/ +#if __STD_C +void public_fREe(Void_t*); +#else +void public_fREe(); +#endif + +/* + calloc(size_t n_elements, size_t element_size); + Returns a pointer to n_elements * element_size bytes, with all locations + set to zero. +*/ +#if __STD_C +Void_t* public_cALLOc(cvmx_arena_list_t arena_list, size_t, size_t); +#else +Void_t* public_cALLOc(); +#endif + +/* + realloc(Void_t* p, size_t n) + Returns a pointer to a chunk of size n that contains the same data + as does chunk p up to the minimum of (n, p's size) bytes, or null + if no space is available. + + The returned pointer may or may not be the same as p. The algorithm + prefers extending p when possible, otherwise it employs the + equivalent of a malloc-copy-free sequence. + + If p is null, realloc is equivalent to malloc. + + If space is not available, realloc returns null, errno is set (if on + ANSI) and p is NOT freed. + + if n is for fewer bytes than already held by p, the newly unused + space is lopped off and freed if possible. Unless the #define + REALLOC_ZERO_BYTES_FREES is set, realloc with a size argument of + zero (re)allocates a minimum-sized chunk. + + Large chunks that were internally obtained via mmap will always + be reallocated using malloc-copy-free sequences unless + the system supports MREMAP (currently only linux). + + The old unix realloc convention of allowing the last-free'd chunk + to be used as an argument to realloc is not supported. +*/ +#if __STD_C +Void_t* public_rEALLOc(cvmx_arena_list_t arena_list, Void_t*, size_t); +#else +Void_t* public_rEALLOc(); +#endif + +/* + memalign(size_t alignment, size_t n); + Returns a pointer to a newly allocated chunk of n bytes, aligned + in accord with the alignment argument. + + The alignment argument should be a power of two. If the argument is + not a power of two, the nearest greater power is used. + 8-byte alignment is guaranteed by normal malloc calls, so don't + bother calling memalign with an argument of 8 or less. + + Overreliance on memalign is a sure way to fragment space. +*/ +#if __STD_C +Void_t* public_mEMALIGn(cvmx_arena_list_t arena_list, size_t, size_t); +#else +Void_t* public_mEMALIGn(); +#endif + +/* + valloc(size_t n); + Equivalent to memalign(pagesize, n), where pagesize is the page + size of the system. If the pagesize is unknown, 4096 is used. +*/ +#if __STD_C +Void_t* public_vALLOc(size_t); +#else +Void_t* public_vALLOc(); +#endif + + + +/* + mallopt(int parameter_number, int parameter_value) + Sets tunable parameters The format is to provide a + (parameter-number, parameter-value) pair. mallopt then sets the + corresponding parameter to the argument value if it can (i.e., so + long as the value is meaningful), and returns 1 if successful else + 0. SVID/XPG/ANSI defines four standard param numbers for mallopt, + normally defined in malloc.h. Only one of these (M_MXFAST) is used + in this malloc. The others (M_NLBLKS, M_GRAIN, M_KEEP) don't apply, + so setting them has no effect. But this malloc also supports four + other options in mallopt. See below for details. Briefly, supported + parameters are as follows (listed defaults are for "typical" + configurations). + + Symbol param # default allowed param values + M_MXFAST 1 64 0-80 (0 disables fastbins) + M_TRIM_THRESHOLD -1 128*1024 any (-1U disables trimming) + M_TOP_PAD -2 0 any + M_MMAP_THRESHOLD -3 128*1024 any (or 0 if no MMAP support) + M_MMAP_MAX -4 65536 any (0 disables use of mmap) +*/ +#if __STD_C +int public_mALLOPt(int, int); +#else +int public_mALLOPt(); +#endif + + +/* + mallinfo() + Returns (by copy) a struct containing various summary statistics: + + arena: current total non-mmapped bytes allocated from system + ordblks: the number of free chunks + smblks: the number of fastbin blocks (i.e., small chunks that + have been freed but not use resused or consolidated) + hblks: current number of mmapped regions + hblkhd: total bytes held in mmapped regions + usmblks: the maximum total allocated space. This will be greater + than current total if trimming has occurred. + fsmblks: total bytes held in fastbin blocks + uordblks: current total allocated space (normal or mmapped) + fordblks: total free space + keepcost: the maximum number of bytes that could ideally be released + back to system via malloc_trim. ("ideally" means that + it ignores page restrictions etc.) + + Because these fields are ints, but internal bookkeeping may + be kept as longs, the reported values may wrap around zero and + thus be inaccurate. +*/ +#if __STD_C +struct mallinfo public_mALLINFo(void); +#else +struct mallinfo public_mALLINFo(); +#endif + +/* + independent_calloc(size_t n_elements, size_t element_size, Void_t* chunks[]); + + independent_calloc is similar to calloc, but instead of returning a + single cleared space, it returns an array of pointers to n_elements + independent elements that can hold contents of size elem_size, each + of which starts out cleared, and can be independently freed, + realloc'ed etc. The elements are guaranteed to be adjacently + allocated (this is not guaranteed to occur with multiple callocs or + mallocs), which may also improve cache locality in some + applications. + + The "chunks" argument is optional (i.e., may be null, which is + probably the most typical usage). If it is null, the returned array + is itself dynamically allocated and should also be freed when it is + no longer needed. Otherwise, the chunks array must be of at least + n_elements in length. It is filled in with the pointers to the + chunks. + + In either case, independent_calloc returns this pointer array, or + null if the allocation failed. If n_elements is zero and "chunks" + is null, it returns a chunk representing an array with zero elements + (which should be freed if not wanted). + + Each element must be individually freed when it is no longer + needed. If you'd like to instead be able to free all at once, you + should instead use regular calloc and assign pointers into this + space to represent elements. (In this case though, you cannot + independently free elements.) + + independent_calloc simplifies and speeds up implementations of many + kinds of pools. It may also be useful when constructing large data + structures that initially have a fixed number of fixed-sized nodes, + but the number is not known at compile time, and some of the nodes + may later need to be freed. For example: + + struct Node { int item; struct Node* next; }; + + struct Node* build_list() { + struct Node** pool; + int n = read_number_of_nodes_needed(); + if (n <= 0) return 0; + pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0); + if (pool == 0) die(); + // organize into a linked list... + struct Node* first = pool[0]; + for (i = 0; i < n-1; ++i) + pool[i]->next = pool[i+1]; + free(pool); // Can now free the array (or not, if it is needed later) + return first; + } +*/ +#if __STD_C +Void_t** public_iCALLOc(size_t, size_t, Void_t**); +#else +Void_t** public_iCALLOc(); +#endif + +/* + independent_comalloc(size_t n_elements, size_t sizes[], Void_t* chunks[]); + + independent_comalloc allocates, all at once, a set of n_elements + chunks with sizes indicated in the "sizes" array. It returns + an array of pointers to these elements, each of which can be + independently freed, realloc'ed etc. The elements are guaranteed to + be adjacently allocated (this is not guaranteed to occur with + multiple callocs or mallocs), which may also improve cache locality + in some applications. + + The "chunks" argument is optional (i.e., may be null). If it is null + the returned array is itself dynamically allocated and should also + be freed when it is no longer needed. Otherwise, the chunks array + must be of at least n_elements in length. It is filled in with the + pointers to the chunks. + + In either case, independent_comalloc returns this pointer array, or + null if the allocation failed. If n_elements is zero and chunks is + null, it returns a chunk representing an array with zero elements + (which should be freed if not wanted). + + Each element must be individually freed when it is no longer + needed. If you'd like to instead be able to free all at once, you + should instead use a single regular malloc, and assign pointers at + particular offsets in the aggregate space. (In this case though, you + cannot independently free elements.) + + independent_comallac differs from independent_calloc in that each + element may have a different size, and also that it does not + automatically clear elements. + + independent_comalloc can be used to speed up allocation in cases + where several structs or objects must always be allocated at the + same time. For example: + + struct Head { ... } + struct Foot { ... } + + void send_message(char* msg) { + int msglen = strlen(msg); + size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) }; + void* chunks[3]; + if (independent_comalloc(3, sizes, chunks) == 0) + die(); + struct Head* head = (struct Head*)(chunks[0]); + char* body = (char*)(chunks[1]); + struct Foot* foot = (struct Foot*)(chunks[2]); + // ... + } + + In general though, independent_comalloc is worth using only for + larger values of n_elements. For small values, you probably won't + detect enough difference from series of malloc calls to bother. + + Overuse of independent_comalloc can increase overall memory usage, + since it cannot reuse existing noncontiguous small chunks that + might be available for some of the elements. +*/ +#if __STD_C +Void_t** public_iCOMALLOc(size_t, size_t*, Void_t**); +#else +Void_t** public_iCOMALLOc(); +#endif + + +/* + pvalloc(size_t n); + Equivalent to valloc(minimum-page-that-holds(n)), that is, + round up n to nearest pagesize. + */ +#if __STD_C +Void_t* public_pVALLOc(size_t); +#else +Void_t* public_pVALLOc(); +#endif + +/* + cfree(Void_t* p); + Equivalent to free(p). + + cfree is needed/defined on some systems that pair it with calloc, + for odd historical reasons (such as: cfree is used in example + code in the first edition of K&R). +*/ +#if __STD_C +void public_cFREe(Void_t*); +#else +void public_cFREe(); +#endif + +/* + malloc_trim(size_t pad); + + If possible, gives memory back to the system (via negative + arguments to sbrk) if there is unused memory at the `high' end of + the malloc pool. You can call this after freeing large blocks of + memory to potentially reduce the system-level memory requirements + of a program. However, it cannot guarantee to reduce memory. Under + some allocation patterns, some large free blocks of memory will be + locked between two used chunks, so they cannot be given back to + the system. + + The `pad' argument to malloc_trim represents the amount of free + trailing space to leave untrimmed. If this argument is zero, + only the minimum amount of memory to maintain internal data + structures will be left (one page or less). Non-zero arguments + can be supplied to maintain enough trailing space to service + future expected allocations without having to re-obtain memory + from the system. + + Malloc_trim returns 1 if it actually released any memory, else 0. + On systems that do not support "negative sbrks", it will always + rreturn 0. +*/ +#if __STD_C +int public_mTRIm(size_t); +#else +int public_mTRIm(); +#endif + +/* + malloc_usable_size(Void_t* p); + + Returns the number of bytes you can actually use in + an allocated chunk, which may be more than you requested (although + often not) due to alignment and minimum size constraints. + You can use this many bytes without worrying about + overwriting other allocated objects. This is not a particularly great + programming practice. malloc_usable_size can be more useful in + debugging and assertions, for example: + + p = malloc(n); + assert(malloc_usable_size(p) >= 256); + +*/ +#if __STD_C +size_t public_mUSABLe(Void_t*); +#else +size_t public_mUSABLe(); +#endif + +/* + malloc_stats(); + Prints on stderr the amount of space obtained from the system (both + via sbrk and mmap), the maximum amount (which may be more than + current if malloc_trim and/or munmap got called), and the current + number of bytes allocated via malloc (or realloc, etc) but not yet + freed. Note that this is the number of bytes allocated, not the + number requested. It will be larger than the number requested + because of alignment and bookkeeping overhead. Because it includes + alignment wastage as being in use, this figure may be greater than + zero even when no user-level chunks are allocated. + + The reported current and maximum system memory can be inaccurate if + a program makes other calls to system memory allocation functions + (normally sbrk) outside of malloc. + + malloc_stats prints only the most commonly interesting statistics. + More information can be obtained by calling mallinfo. + +*/ +#if __STD_C +void public_mSTATs(void); +#else +void public_mSTATs(); +#endif + +/* + malloc_get_state(void); + + Returns the state of all malloc variables in an opaque data + structure. +*/ +#if __STD_C +Void_t* public_gET_STATe(void); +#else +Void_t* public_gET_STATe(); +#endif + +/* + malloc_set_state(Void_t* state); + + Restore the state of all malloc variables from data obtained with + malloc_get_state(). +*/ +#if __STD_C +int public_sET_STATe(Void_t*); +#else +int public_sET_STATe(); +#endif + +#ifdef _LIBC +/* + posix_memalign(void **memptr, size_t alignment, size_t size); + + POSIX wrapper like memalign(), checking for validity of size. +*/ +int __posix_memalign(void **, size_t, size_t); +#endif + +/* mallopt tuning options */ + +/* + M_MXFAST is the maximum request size used for "fastbins", special bins + that hold returned chunks without consolidating their spaces. This + enables future requests for chunks of the same size to be handled + very quickly, but can increase fragmentation, and thus increase the + overall memory footprint of a program. + + This malloc manages fastbins very conservatively yet still + efficiently, so fragmentation is rarely a problem for values less + than or equal to the default. The maximum supported value of MXFAST + is 80. You wouldn't want it any higher than this anyway. Fastbins + are designed especially for use with many small structs, objects or + strings -- the default handles structs/objects/arrays with sizes up + to 8 4byte fields, or small strings representing words, tokens, + etc. Using fastbins for larger objects normally worsens + fragmentation without improving speed. + + M_MXFAST is set in REQUEST size units. It is internally used in + chunksize units, which adds padding and alignment. You can reduce + M_MXFAST to 0 to disable all use of fastbins. This causes the malloc + algorithm to be a closer approximation of fifo-best-fit in all cases, + not just for larger requests, but will generally cause it to be + slower. +*/ + + +/* M_MXFAST is a standard SVID/XPG tuning option, usually listed in malloc.h */ +#ifndef M_MXFAST +#define M_MXFAST 1 +#endif + +#ifndef DEFAULT_MXFAST +#define DEFAULT_MXFAST 64 +#endif + + +/* + M_TRIM_THRESHOLD is the maximum amount of unused top-most memory + to keep before releasing via malloc_trim in free(). + + Automatic trimming is mainly useful in long-lived programs. + Because trimming via sbrk can be slow on some systems, and can + sometimes be wasteful (in cases where programs immediately + afterward allocate more large chunks) the value should be high + enough so that your overall system performance would improve by + releasing this much memory. + + The trim threshold and the mmap control parameters (see below) + can be traded off with one another. Trimming and mmapping are + two different ways of releasing unused memory back to the + system. Between these two, it is often possible to keep + system-level demands of a long-lived program down to a bare + minimum. For example, in one test suite of sessions measuring + the XF86 X server on Linux, using a trim threshold of 128K and a + mmap threshold of 192K led to near-minimal long term resource + consumption. + + If you are using this malloc in a long-lived program, it should + pay to experiment with these values. As a rough guide, you + might set to a value close to the average size of a process + (program) running on your system. Releasing this much memory + would allow such a process to run in memory. Generally, it's + worth it to tune for trimming rather tham memory mapping when a + program undergoes phases where several large chunks are + allocated and released in ways that can reuse each other's + storage, perhaps mixed with phases where there are no such + chunks at all. And in well-behaved long-lived programs, + controlling release of large blocks via trimming versus mapping + is usually faster. + + However, in most programs, these parameters serve mainly as + protection against the system-level effects of carrying around + massive amounts of unneeded memory. Since frequent calls to + sbrk, mmap, and munmap otherwise degrade performance, the default + parameters are set to relatively high values that serve only as + safeguards. + + The trim value It must be greater than page size to have any useful + effect. To disable trimming completely, you can set to + (unsigned long)(-1) + + Trim settings interact with fastbin (MXFAST) settings: Unless + TRIM_FASTBINS is defined, automatic trimming never takes place upon + freeing a chunk with size less than or equal to MXFAST. Trimming is + instead delayed until subsequent freeing of larger chunks. However, + you can still force an attempted trim by calling malloc_trim. + + Also, trimming is not generally possible in cases where + the main arena is obtained via mmap. + + Note that the trick some people use of mallocing a huge space and + then freeing it at program startup, in an attempt to reserve system + memory, doesn't have the intended effect under automatic trimming, + since that memory will immediately be returned to the system. +*/ + +#define M_TRIM_THRESHOLD -1 + +#ifndef DEFAULT_TRIM_THRESHOLD +#define DEFAULT_TRIM_THRESHOLD (128 * 1024) +#endif + +/* + M_TOP_PAD is the amount of extra `padding' space to allocate or + retain whenever sbrk is called. It is used in two ways internally: + + * When sbrk is called to extend the top of the arena to satisfy + a new malloc request, this much padding is added to the sbrk + request. + + * When malloc_trim is called automatically from free(), + it is used as the `pad' argument. + + In both cases, the actual amount of padding is rounded + so that the end of the arena is always a system page boundary. + + The main reason for using padding is to avoid calling sbrk so + often. Having even a small pad greatly reduces the likelihood + that nearly every malloc request during program start-up (or + after trimming) will invoke sbrk, which needlessly wastes + time. + + Automatic rounding-up to page-size units is normally sufficient + to avoid measurable overhead, so the default is 0. However, in + systems where sbrk is relatively slow, it can pay to increase + this value, at the expense of carrying around more memory than + the program needs. +*/ + +#define M_TOP_PAD -2 + +#ifndef DEFAULT_TOP_PAD +#define DEFAULT_TOP_PAD (0) +#endif + +/* + M_MMAP_THRESHOLD is the request size threshold for using mmap() + to service a request. Requests of at least this size that cannot + be allocated using already-existing space will be serviced via mmap. + (If enough normal freed space already exists it is used instead.) + + Using mmap segregates relatively large chunks of memory so that + they can be individually obtained and released from the host + system. A request serviced through mmap is never reused by any + other request (at least not directly; the system may just so + happen to remap successive requests to the same locations). + + Segregating space in this way has the benefits that: + + 1. Mmapped space can ALWAYS be individually released back + to the system, which helps keep the system level memory + demands of a long-lived program low. + 2. Mapped memory can never become `locked' between + other chunks, as can happen with normally allocated chunks, which + means that even trimming via malloc_trim would not release them. + 3. On some systems with "holes" in address spaces, mmap can obtain + memory that sbrk cannot. + + However, it has the disadvantages that: + + 1. The space cannot be reclaimed, consolidated, and then + used to service later requests, as happens with normal chunks. + 2. It can lead to more wastage because of mmap page alignment + requirements + 3. It causes malloc performance to be more dependent on host + system memory management support routines which may vary in + implementation quality and may impose arbitrary + limitations. Generally, servicing a request via normal + malloc steps is faster than going through a system's mmap. + + The advantages of mmap nearly always outweigh disadvantages for + "large" chunks, but the value of "large" varies across systems. The + default is an empirically derived value that works well in most + systems. +*/ + +#define M_MMAP_THRESHOLD -3 + +#ifndef DEFAULT_MMAP_THRESHOLD +#define DEFAULT_MMAP_THRESHOLD (128 * 1024) +#endif + +/* + M_MMAP_MAX is the maximum number of requests to simultaneously + service using mmap. This parameter exists because + some systems have a limited number of internal tables for + use by mmap, and using more than a few of them may degrade + performance. + + The default is set to a value that serves only as a safeguard. + Setting to 0 disables use of mmap for servicing large requests. If + HAVE_MMAP is not set, the default value is 0, and attempts to set it + to non-zero values in mallopt will fail. +*/ + +#define M_MMAP_MAX -4 + +#ifndef DEFAULT_MMAP_MAX +#if HAVE_MMAP +#define DEFAULT_MMAP_MAX (65536) +#else +#define DEFAULT_MMAP_MAX (0) +#endif +#endif + +#ifdef __cplusplus +}; /* end of extern "C" */ +#endif + +#include <cvmx-spinlock.h> +#include "malloc.h" +#include "thread-m.h" + +#ifdef DEBUG_PRINTS +#define debug_printf printf +#else +#define debug_printf(format, args...) +#endif + +#ifndef BOUNDED_N +#define BOUNDED_N(ptr, sz) (ptr) +#endif +#ifndef RETURN_ADDRESS +#define RETURN_ADDRESS(X_) (NULL) +#endif + +/* On some platforms we can compile internal, not exported functions better. + Let the environment provide a macro and define it to be empty if it + is not available. */ +#ifndef internal_function +# define internal_function +#endif + +/* Forward declarations. */ +struct malloc_chunk; +typedef struct malloc_chunk* mchunkptr; + +/* Internal routines. */ + +#if __STD_C + +static Void_t* _int_malloc(mstate, size_t); +static void _int_free(mstate, Void_t*); +static Void_t* _int_realloc(mstate, Void_t*, size_t); +static Void_t* _int_memalign(mstate, size_t, size_t); +static Void_t* _int_valloc(mstate, size_t); +static Void_t* _int_pvalloc(mstate, size_t); +static Void_t* cALLOc(cvmx_arena_list_t arena_list, size_t, size_t); +static Void_t** _int_icalloc(mstate, size_t, size_t, Void_t**); +static Void_t** _int_icomalloc(mstate, size_t, size_t*, Void_t**); +static int mTRIm(size_t); +static size_t mUSABLe(Void_t*); +static void mSTATs(void); +static int mALLOPt(int, int); +static struct mallinfo mALLINFo(mstate); + +static Void_t* internal_function mem2mem_check(Void_t *p, size_t sz); +static int internal_function top_check(void); +static void internal_function munmap_chunk(mchunkptr p); +#if HAVE_MREMAP +static mchunkptr internal_function mremap_chunk(mchunkptr p, size_t new_size); +#endif + +static Void_t* malloc_check(size_t sz, const Void_t *caller); +static void free_check(Void_t* mem, const Void_t *caller); +static Void_t* realloc_check(Void_t* oldmem, size_t bytes, + const Void_t *caller); +static Void_t* memalign_check(size_t alignment, size_t bytes, + const Void_t *caller); +#ifndef NO_THREADS +static Void_t* malloc_starter(size_t sz, const Void_t *caller); +static void free_starter(Void_t* mem, const Void_t *caller); +static Void_t* malloc_atfork(size_t sz, const Void_t *caller); +static void free_atfork(Void_t* mem, const Void_t *caller); +#endif + +#else + +Void_t* _int_malloc(); +void _int_free(); +Void_t* _int_realloc(); +Void_t* _int_memalign(); +Void_t* _int_valloc(); +Void_t* _int_pvalloc(); +/*static Void_t* cALLOc();*/ +static Void_t** _int_icalloc(); +static Void_t** _int_icomalloc(); +static int mTRIm(); +static size_t mUSABLe(); +static void mSTATs(); +static int mALLOPt(); +static struct mallinfo mALLINFo(); + +#endif + + + + +/* ------------- Optional versions of memcopy ---------------- */ + + +#if USE_MEMCPY + +/* + Note: memcpy is ONLY invoked with non-overlapping regions, + so the (usually slower) memmove is not needed. +*/ + +#define MALLOC_COPY(dest, src, nbytes) memcpy(dest, src, nbytes) +#define MALLOC_ZERO(dest, nbytes) memset(dest, 0, nbytes) + +#else /* !USE_MEMCPY */ + +/* Use Duff's device for good zeroing/copying performance. */ + +#define MALLOC_ZERO(charp, nbytes) \ +do { \ + INTERNAL_SIZE_T* mzp = (INTERNAL_SIZE_T*)(charp); \ + unsigned long mctmp = (nbytes)/sizeof(INTERNAL_SIZE_T); \ + long mcn; \ + if (mctmp < 8) mcn = 0; else { mcn = (mctmp-1)/8; mctmp %= 8; } \ + switch (mctmp) { \ + case 0: for(;;) { *mzp++ = 0; \ + case 7: *mzp++ = 0; \ + case 6: *mzp++ = 0; \ + case 5: *mzp++ = 0; \ + case 4: *mzp++ = 0; \ + case 3: *mzp++ = 0; \ + case 2: *mzp++ = 0; \ + case 1: *mzp++ = 0; if(mcn <= 0) break; mcn--; } \ + } \ +} while(0) + +#define MALLOC_COPY(dest,src,nbytes) \ +do { \ + INTERNAL_SIZE_T* mcsrc = (INTERNAL_SIZE_T*) src; \ + INTERNAL_SIZE_T* mcdst = (INTERNAL_SIZE_T*) dest; \ + unsigned long mctmp = (nbytes)/sizeof(INTERNAL_SIZE_T); \ + long mcn; \ + if (mctmp < 8) mcn = 0; else { mcn = (mctmp-1)/8; mctmp %= 8; } \ + switch (mctmp) { \ + case 0: for(;;) { *mcdst++ = *mcsrc++; \ + case 7: *mcdst++ = *mcsrc++; \ + case 6: *mcdst++ = *mcsrc++; \ + case 5: *mcdst++ = *mcsrc++; \ + case 4: *mcdst++ = *mcsrc++; \ + case 3: *mcdst++ = *mcsrc++; \ + case 2: *mcdst++ = *mcsrc++; \ + case 1: *mcdst++ = *mcsrc++; if(mcn <= 0) break; mcn--; } \ + } \ +} while(0) + +#endif + +/* ------------------ MMAP support ------------------ */ + + +#if HAVE_MMAP + +#include <fcntl.h> +#ifndef LACKS_SYS_MMAN_H +#include <sys/mman.h> +#endif + +#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) +# define MAP_ANONYMOUS MAP_ANON +#endif +#if !defined(MAP_FAILED) +# define MAP_FAILED ((char*)-1) +#endif + +#ifndef MAP_NORESERVE +# ifdef MAP_AUTORESRV +# define MAP_NORESERVE MAP_AUTORESRV +# else +# define MAP_NORESERVE 0 +# endif +#endif + +/* + Nearly all versions of mmap support MAP_ANONYMOUS, + so the following is unlikely to be needed, but is + supplied just in case. +*/ + +#ifndef MAP_ANONYMOUS + +static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */ + +#define MMAP(addr, size, prot, flags) ((dev_zero_fd < 0) ? \ + (dev_zero_fd = open("/dev/zero", O_RDWR), \ + mmap((addr), (size), (prot), (flags), dev_zero_fd, 0)) : \ + mmap((addr), (size), (prot), (flags), dev_zero_fd, 0)) + +#else + +#define MMAP(addr, size, prot, flags) \ + (mmap((addr), (size), (prot), (flags)|MAP_ANONYMOUS, -1, 0)) + +#endif + + +#endif /* HAVE_MMAP */ + + +/* + ----------------------- Chunk representations ----------------------- +*/ + + +/* + This struct declaration is misleading (but accurate and necessary). + It declares a "view" into memory allowing access to necessary + fields at known offsets from a given base. See explanation below. +*/ +struct malloc_chunk { + + INTERNAL_SIZE_T prev_size; /* Size of previous chunk (if free). */ + INTERNAL_SIZE_T size; /* Size in bytes, including overhead. */ + mstate arena_ptr; /* ptr to arena chunk belongs to */ + + struct malloc_chunk* fd; /* double links -- used only if free. */ + struct malloc_chunk* bk; +}; + + +/* + malloc_chunk details: + + (The following includes lightly edited explanations by Colin Plumb.) + + Chunks of memory are maintained using a `boundary tag' method as + described in e.g., Knuth or Standish. (See the paper by Paul + Wilson ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a + survey of such techniques.) Sizes of free chunks are stored both + in the front of each chunk and at the end. This makes + consolidating fragmented chunks into bigger chunks very fast. The + size fields also hold bits representing whether chunks are free or + in use. + + An allocated chunk looks like this: + + + chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of previous chunk, if allocated | | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of chunk, in bytes |P| + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | User data starts here... . + . . + . (malloc_usable_space() bytes) . + . | +nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of chunk | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + + Where "chunk" is the front of the chunk for the purpose of most of + the malloc code, but "mem" is the pointer that is returned to the + user. "Nextchunk" is the beginning of the next contiguous chunk. + + Chunks always begin on even word boundries, so the mem portion + (which is returned to the user) is also on an even word boundary, and + thus at least double-word aligned. + + Free chunks are stored in circular doubly-linked lists, and look like this: + + chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Size of previous chunk | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + `head:' | Size of chunk, in bytes |P| + mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Forward pointer to next chunk in list | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Back pointer to previous chunk in list | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + | Unused space (may be 0 bytes long) . + . . + . | +nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + `foot:' | Size of chunk, in bytes | + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + + The P (PREV_INUSE) bit, stored in the unused low-order bit of the + chunk size (which is always a multiple of two words), is an in-use + bit for the *previous* chunk. If that bit is *clear*, then the + word before the current chunk size contains the previous chunk + size, and can be used to find the front of the previous chunk. + The very first chunk allocated always has this bit set, + preventing access to non-existent (or non-owned) memory. If + prev_inuse is set for any given chunk, then you CANNOT determine + the size of the previous chunk, and might even get a memory + addressing fault when trying to do so. + + Note that the `foot' of the current chunk is actually represented + as the prev_size of the NEXT chunk. This makes it easier to + deal with alignments etc but can be very confusing when trying + to extend or adapt this code. + + The two exceptions to all this are + + 1. The special chunk `top' doesn't bother using the + trailing size field since there is no next contiguous chunk + that would have to index off it. After initialization, `top' + is forced to always exist. If it would become less than + MINSIZE bytes long, it is replenished. + + 2. Chunks allocated via mmap, which have the second-lowest-order + bit (IS_MMAPPED) set in their size fields. Because they are + allocated one-by-one, each must contain its own trailing size field. + +*/ + +/* + ---------- Size and alignment checks and conversions ---------- +*/ + +/* conversion from malloc headers to user pointers, and back */ +/* Added size for pointer to make room for arena_ptr */ +#define chunk2mem(p) ((Void_t*)((char*)(p) + 2*SIZE_SZ + sizeof(void *))) +#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ - sizeof(void *))) + +/* The smallest possible chunk */ +#define MIN_CHUNK_SIZE (sizeof(struct malloc_chunk)) + +/* The smallest size we can malloc is an aligned minimal chunk */ + +#define MINSIZE \ + (unsigned long)(((MIN_CHUNK_SIZE+MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)) + +/* Check if m has acceptable alignment */ + +#define aligned_OK(m) (((unsigned long)((m)) & (MALLOC_ALIGN_MASK)) == 0) + + +/* + Check if a request is so large that it would wrap around zero when + padded and aligned. To simplify some other code, the bound is made + low enough so that adding MINSIZE will also not wrap around zero. +*/ + +#define REQUEST_OUT_OF_RANGE(req) \ + ((unsigned long)(req) >= \ + (unsigned long)(INTERNAL_SIZE_T)(-2 * MINSIZE)) + +/* pad request bytes into a usable size -- internal version */ + + +/* prev_size field of next chunk is overwritten with data +** when in use. NOTE - last SIZE_SZ of arena must be left +** unused for last chunk to use +*/ +/* Added sizeof(void *) to make room for arena_ptr */ +#define request2size(req) \ + (((req) + sizeof(void *) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE) ? \ + MINSIZE : \ + ((req) + sizeof(void *) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK) + +/* Same, except also perform argument check */ + +#define checked_request2size(req, sz) \ + if (REQUEST_OUT_OF_RANGE(req)) { \ + MALLOC_FAILURE_ACTION; \ + return 0; \ + } \ + (sz) = request2size(req); + +/* + --------------- Physical chunk operations --------------- +*/ + + +/* size field is or'ed with PREV_INUSE when previous adjacent chunk in use */ +#define PREV_INUSE 0x1 + +/* extract inuse bit of previous chunk */ +#define prev_inuse(p) ((p)->size & PREV_INUSE) + + +/* size field is or'ed with IS_MMAPPED if the chunk was obtained with mmap() */ +#define IS_MMAPPED 0x2 + +/* check for mmap()'ed chunk */ +#define chunk_is_mmapped(p) ((p)->size & IS_MMAPPED) + + + +/* + Bits to mask off when extracting size + + Note: IS_MMAPPED is intentionally not masked off from size field in + macros for which mmapped chunks should never be seen. This should + cause helpful core dumps to occur if it is tried by accident by + people extending or adapting this malloc. +*/ +#define SIZE_BITS (PREV_INUSE|IS_MMAPPED) + +/* Get size, ignoring use bits */ +#define chunksize(p) ((p)->size & ~(SIZE_BITS)) + + +/* Ptr to next physical malloc_chunk. */ +#define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->size & ~SIZE_BITS) )) + +/* Ptr to previous physical malloc_chunk */ +#define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_size) )) + +/* Treat space at ptr + offset as a chunk */ +#define chunk_at_offset(p, s) ((mchunkptr)(((char*)(p)) + (s))) + +/* extract p's inuse bit */ +#define inuse(p)\ +((((mchunkptr)(((char*)(p))+((p)->size & ~SIZE_BITS)))->size) & PREV_INUSE) + +/* set/clear chunk as being inuse without otherwise disturbing */ +#define set_inuse(p)\ +((mchunkptr)(((char*)(p)) + ((p)->size & ~SIZE_BITS)))->size |= PREV_INUSE + +#define clear_inuse(p)\ +((mchunkptr)(((char*)(p)) + ((p)->size & ~SIZE_BITS)))->size &= ~(PREV_INUSE) + + +/* check/set/clear inuse bits in known places */ +#define inuse_bit_at_offset(p, s)\ + (((mchunkptr)(((char*)(p)) + (s)))->size & PREV_INUSE) + +#define set_inuse_bit_at_offset(p, s)\ + (((mchunkptr)(((char*)(p)) + (s)))->size |= PREV_INUSE) + +#define clear_inuse_bit_at_offset(p, s)\ + (((mchunkptr)(((char*)(p)) + (s)))->size &= ~(PREV_INUSE)) + + +/* Set size at head, without disturbing its use bit */ +#define set_head_size(p, s) ((p)->size = (((p)->size & SIZE_BITS) | (s))) + +/* Set size/use field */ +#define set_head(p, s) ((p)->size = (s)) + +/* Set size at footer (only when chunk is not in use) */ +#define set_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_size = (s)) + + +/* + -------------------- Internal data structures -------------------- + + All internal state is held in an instance of malloc_state defined + below. There are no other static variables, except in two optional + cases: + * If USE_MALLOC_LOCK is defined, the mALLOC_MUTEx declared above. + * If HAVE_MMAP is true, but mmap doesn't support + MAP_ANONYMOUS, a dummy file descriptor for mmap. + + Beware of lots of tricks that minimize the total bookkeeping space + requirements. The result is a little over 1K bytes (for 4byte + pointers and size_t.) +*/ + +/* + Bins + + An array of bin headers for free chunks. Each bin is doubly + linked. The bins are approximately proportionally (log) spaced. + There are a lot of these bins (128). This may look excessive, but + works very well in practice. Most bins hold sizes that are + unusual as malloc request sizes, but are more usual for fragments + and consolidated sets of chunks, which is what these bins hold, so + they can be found quickly. All procedures maintain the invariant + that no consolidated chunk physically borders another one, so each + chunk in a list is known to be preceeded and followed by either + inuse chunks or the ends of memory. + + Chunks in bins are kept in size order, with ties going to the + approximately least recently used chunk. Ordering isn't needed + for the small bins, which all contain the same-sized chunks, but + facilitates best-fit allocation for larger chunks. These lists + are just sequential. Keeping them in order almost never requires + enough traversal to warrant using fancier ordered data + structures. + + Chunks of the same size are linked with the most + recently freed at the front, and allocations are taken from the + back. This results in LRU (FIFO) allocation order, which tends + to give each chunk an equal opportunity to be consolidated with + adjacent freed chunks, resulting in larger free chunks and less + fragmentation. + + To simplify use in double-linked lists, each bin header acts + as a malloc_chunk. This avoids special-casing for headers. + But to conserve space and improve locality, we allocate + only the fd/bk pointers of bins, and then use repositioning tricks + to treat these as the fields of a malloc_chunk*. +*/ + +typedef struct malloc_chunk* mbinptr; + +/* addressing -- note that bin_at(0) does not exist */ +#define bin_at(m, i) ((mbinptr)((char*)&((m)->bins[(i)<<1]) - (SIZE_SZ<<1))) + +/* analog of ++bin */ +#define next_bin(b) ((mbinptr)((char*)(b) + (sizeof(mchunkptr)<<1))) + +/* Reminders about list directionality within bins */ +#define first(b) ((b)->fd) +#define last(b) ((b)->bk) + +/* Take a chunk off a bin list */ +#define unlink(P, BK, FD) { \ + FD = P->fd; \ + BK = P->bk; \ + FD->bk = BK; \ + BK->fd = FD; \ +} + +/* + Indexing + + Bins for sizes < 512 bytes contain chunks of all the same size, spaced + 8 bytes apart. Larger bins are approximately logarithmically spaced: + + 64 bins of size 8 + 32 bins of size 64 + 16 bins of size 512 + 8 bins of size 4096 + 4 bins of size 32768 + 2 bins of size 262144 + 1 bin of size what's left + + There is actually a little bit of slop in the numbers in bin_index + for the sake of speed. This makes no difference elsewhere. + + The bins top out around 1MB because we expect to service large + requests via mmap. +*/ + +#define NBINS 128 +#define NSMALLBINS 64 +#define SMALLBIN_WIDTH 8 +#define MIN_LARGE_SIZE 512 + +#define in_smallbin_range(sz) \ + ((unsigned long)(sz) < (unsigned long)MIN_LARGE_SIZE) + +#define smallbin_index(sz) (((unsigned)(sz)) >> 3) + +#define largebin_index(sz) \ +(((((unsigned long)(sz)) >> 6) <= 32)? 56 + (((unsigned long)(sz)) >> 6): \ + ((((unsigned long)(sz)) >> 9) <= 20)? 91 + (((unsigned long)(sz)) >> 9): \ + ((((unsigned long)(sz)) >> 12) <= 10)? 110 + (((unsigned long)(sz)) >> 12): \ + ((((unsigned long)(sz)) >> 15) <= 4)? 119 + (((unsigned long)(sz)) >> 15): \ + ((((unsigned long)(sz)) >> 18) <= 2)? 124 + (((unsigned long)(sz)) >> 18): \ + 126) + +#define bin_index(sz) \ + ((in_smallbin_range(sz)) ? smallbin_index(sz) : largebin_index(sz)) + +/* + FIRST_SORTED_BIN_SIZE is the chunk size corresponding to the + first bin that is maintained in sorted order. This must + be the smallest size corresponding to a given bin. + + Normally, this should be MIN_LARGE_SIZE. But you can weaken + best fit guarantees to sometimes speed up malloc by increasing value. + Doing this means that malloc may choose a chunk that is + non-best-fitting by up to the width of the bin. + + Some useful cutoff values: + 512 - all bins sorted + 2560 - leaves bins <= 64 bytes wide unsorted + 12288 - leaves bins <= 512 bytes wide unsorted + 65536 - leaves bins <= 4096 bytes wide unsorted + 262144 - leaves bins <= 32768 bytes wide unsorted + -1 - no bins sorted (not recommended!) +*/ + +#define FIRST_SORTED_BIN_SIZE MIN_LARGE_SIZE +/* #define FIRST_SORTED_BIN_SIZE 65536 */ + +/* + Unsorted chunks + + All remainders from chunk splits, as well as all returned chunks, + are first placed in the "unsorted" bin. They are then placed + in regular bins after malloc gives them ONE chance to be used before + binning. So, basically, the unsorted_chunks list acts as a queue, + with chunks being placed on it in free (and malloc_consolidate), + and taken off (to be either used or placed in bins) in malloc. + + The NON_MAIN_ARENA flag is never set for unsorted chunks, so it + does not have to be taken into account in size comparisons. +*/ + +/* The otherwise unindexable 1-bin is used to hold unsorted chunks. */ +#define unsorted_chunks(M) (bin_at(M, 1)) + +/* + Top + + The top-most available chunk (i.e., the one bordering the end of + available memory) is treated specially. It is never included in + any bin, is used only if no other chunk is available, and is + released back to the system if it is very large (see + M_TRIM_THRESHOLD). Because top initially + points to its own bin with initial zero size, thus forcing + extension on the first malloc request, we avoid having any special + code in malloc to check whether it even exists yet. But we still + need to do so when getting memory from system, so we make + initial_top treat the bin as a legal but unusable chunk during the + interval between initialization and the first call to + sYSMALLOc. (This is somewhat delicate, since it relies on + the 2 preceding words to be zero during this interval as well.) +*/ + +/* Conveniently, the unsorted bin can be used as dummy top on first call */ +#define initial_top(M) (unsorted_chunks(M)) + +/* + Binmap + + To help compensate for the large number of bins, a one-level index + structure is used for bin-by-bin searching. `binmap' is a + bitvector recording whether bins are definitely empty so they can + be skipped over during during traversals. The bits are NOT always + cleared as soon as bins are empty, but instead only + when they are noticed to be empty during traversal in malloc. +*/ + +/* Conservatively use 32 bits per map word, even if on 64bit system */ +#define BINMAPSHIFT 5 +#define BITSPERMAP (1U << BINMAPSHIFT) +#define BINMAPSIZE (NBINS / BITSPERMAP) + +#define idx2block(i) ((i) >> BINMAPSHIFT) +#define idx2bit(i) ((1U << ((i) & ((1U << BINMAPSHIFT)-1)))) + +#define mark_bin(m,i) ((m)->binmap[idx2block(i)] |= idx2bit(i)) +#define unmark_bin(m,i) ((m)->binmap[idx2block(i)] &= ~(idx2bit(i))) +#define get_binmap(m,i) ((m)->binmap[idx2block(i)] & idx2bit(i)) + +/* + Fastbins + + An array of lists holding recently freed small chunks. Fastbins + are not doubly linked. It is faster to single-link them, and + since chunks are never removed from the middles of these lists, + double linking is not necessary. Also, unlike regular bins, they + are not even processed in FIFO order (they use faster LIFO) since + ordering doesn't much matter in the transient contexts in which + fastbins are normally used. + + Chunks in fastbins keep their inuse bit set, so they cannot + be consolidated with other free chunks. malloc_consolidate + releases all chunks in fastbins and consolidates them with + other free chunks. +*/ + +typedef struct malloc_chunk* mfastbinptr; + +/* offset 2 to use otherwise unindexable first 2 bins */ +#define fastbin_index(sz) ((int)((((unsigned int)(sz)) >> 3) - 2)) + +/* The maximum fastbin request size we support */ +#define MAX_FAST_SIZE 80 + +#define NFASTBINS (fastbin_index(request2size(MAX_FAST_SIZE))+1) + +/* + FASTBIN_CONSOLIDATION_THRESHOLD is the size of a chunk in free() + that triggers automatic consolidation of possibly-surrounding + fastbin chunks. This is a heuristic, so the exact value should not + matter too much. It is defined at half the default trim threshold as a + compromise heuristic to only attempt consolidation if it is likely + to lead to trimming. However, it is not dynamically tunable, since + consolidation reduces fragmentation surrounding large chunks even + if trimming is not used. +*/ + +#define FASTBIN_CONSOLIDATION_THRESHOLD (65536UL) + +/* + Since the lowest 2 bits in max_fast don't matter in size comparisons, + they are used as flags. +*/ + +/* + FASTCHUNKS_BIT held in max_fast indicates that there are probably + some fastbin chunks. It is set true on entering a chunk into any + fastbin, and cleared only in malloc_consolidate. + + The truth value is inverted so that have_fastchunks will be true + upon startup (since statics are zero-filled), simplifying + initialization checks. +*/ + +#define FASTCHUNKS_BIT (1U) + +#define have_fastchunks(M) (((M)->max_fast & FASTCHUNKS_BIT) == 0) +#define clear_fastchunks(M) ((M)->max_fast |= FASTCHUNKS_BIT) +#define set_fastchunks(M) ((M)->max_fast &= ~FASTCHUNKS_BIT) + +/* + NONCONTIGUOUS_BIT indicates that MORECORE does not return contiguous + regions. Otherwise, contiguity is exploited in merging together, + when possible, results from consecutive MORECORE calls. + + The initial value comes from MORECORE_CONTIGUOUS, but is + changed dynamically if mmap is ever used as an sbrk substitute. +*/ + +#define NONCONTIGUOUS_BIT (2U) + +#define contiguous(M) (((M)->max_fast & NONCONTIGUOUS_BIT) == 0) +#define noncontiguous(M) (((M)->max_fast & NONCONTIGUOUS_BIT) != 0) +#define set_noncontiguous(M) ((M)->max_fast |= NONCONTIGUOUS_BIT) +#define set_contiguous(M) ((M)->max_fast &= ~NONCONTIGUOUS_BIT) + +/* + Set value of max_fast. + Use impossibly small value if 0. + Precondition: there are no existing fastbin chunks. + Setting the value clears fastchunk bit but preserves noncontiguous bit. +*/ + +#define set_max_fast(M, s) \ + (M)->max_fast = (((s) == 0)? SMALLBIN_WIDTH: request2size(s)) | \ + FASTCHUNKS_BIT | \ + ((M)->max_fast & NONCONTIGUOUS_BIT) + + +/* + ----------- Internal state representation and initialization ----------- +*/ + +struct malloc_state { + /* Serialize access. */ + mutex_t mutex; + + /* Statistics for locking. Only used if THREAD_STATS is defined. */ + long stat_lock_direct, stat_lock_loop, stat_lock_wait; + long pad0_[1]; /* try to give the mutex its own cacheline */ + + /* The maximum chunk size to be eligible for fastbin */ + INTERNAL_SIZE_T max_fast; /* low 2 bits used as flags */ + + /* Fastbins */ + mfastbinptr fastbins[NFASTBINS]; + + /* Base of the topmost chunk -- not otherwise kept in a bin */ + mchunkptr top; + + /* The remainder from the most recent split of a small request */ + mchunkptr last_remainder; + + /* Normal bins packed as described above */ + mchunkptr bins[NBINS * 2]; + + /* Bitmap of bins */ + unsigned int binmap[BINMAPSIZE]; + + /* Linked list */ + struct malloc_state *next; + + /* Memory allocated from the system in this arena. */ + INTERNAL_SIZE_T system_mem; + INTERNAL_SIZE_T max_system_mem; +}; + +struct malloc_par { + /* Tunable parameters */ + unsigned long trim_threshold; + INTERNAL_SIZE_T top_pad; + INTERNAL_SIZE_T mmap_threshold; + + /* Memory map support */ + int n_mmaps; + int n_mmaps_max; + int max_n_mmaps; + + /* Cache malloc_getpagesize */ + unsigned int pagesize; + + /* Statistics */ + INTERNAL_SIZE_T mmapped_mem; + /*INTERNAL_SIZE_T sbrked_mem;*/ + /*INTERNAL_SIZE_T max_sbrked_mem;*/ + INTERNAL_SIZE_T max_mmapped_mem; + INTERNAL_SIZE_T max_total_mem; /* only kept for NO_THREADS */ + + /* First address handed out by MORECORE/sbrk. */ + char* sbrk_base; +}; + +/* There are several instances of this struct ("arenas") in this + malloc. If you are adapting this malloc in a way that does NOT use + a static or mmapped malloc_state, you MUST explicitly zero-fill it + before using. This malloc relies on the property that malloc_state + is initialized to all zeroes (as is true of C statics). */ + + + +/* + Initialize a malloc_state struct. + + This is called only from within malloc_consolidate, which needs + be called in the same contexts anyway. It is never called directly + outside of malloc_consolidate because some optimizing compilers try + to inline it at all call points, which turns out not to be an + optimization at all. (Inlining it in malloc_consolidate is fine though.) +*/ + +#if __STD_C +static void malloc_init_state(mstate av) +#else +static void malloc_init_state(av) mstate av; +#endif +{ + int i; + mbinptr bin; + + /* Establish circular links for normal bins */ + for (i = 1; i < NBINS; ++i) { + bin = bin_at(av,i); + bin->fd = bin->bk = bin; + } + + set_noncontiguous(av); + + set_max_fast(av, DEFAULT_MXFAST); + + av->top = initial_top(av); +} + +/* + Other internal utilities operating on mstates +*/ + +#if __STD_C +static Void_t* sYSMALLOc(INTERNAL_SIZE_T, mstate); +static void malloc_consolidate(mstate); +//static Void_t** iALLOc(mstate, size_t, size_t*, int, Void_t**); +#else +static Void_t* sYSMALLOc(); +static void malloc_consolidate(); +static Void_t** iALLOc(); +#endif + +/* ------------------- Support for multiple arenas -------------------- */ +#include "arena.c" + +/* + Debugging support + + These routines make a number of assertions about the states + of data structures that should be true at all times. If any + are not true, it's very likely that a user program has somehow + trashed memory. (It's also possible that there is a coding error + in malloc. In which case, please report it!) +*/ + +#if ! MALLOC_DEBUG + +#define check_chunk(A,P) +#define check_free_chunk(A,P) +#define check_inuse_chunk(A,P) +#define check_remalloced_chunk(A,P,N) +#define check_malloced_chunk(A,P,N) +#define check_malloc_state(A) + +#else + +#define check_chunk(A,P) do_check_chunk(A,P) +#define check_free_chunk(A,P) do_check_free_chunk(A,P) +#define check_inuse_chunk(A,P) do_check_inuse_chunk(A,P) +#define check_remalloced_chunk(A,P,N) do_check_remalloced_chunk(A,P,N) +#define check_malloced_chunk(A,P,N) do_check_malloced_chunk(A,P,N) +#define check_malloc_state(A) do_check_malloc_state(A) + +/* + Properties of all chunks +*/ + +#if __STD_C +static void do_check_chunk(mstate av, mchunkptr p) +#else +static void do_check_chunk(av, p) mstate av; mchunkptr p; +#endif +{ + unsigned long sz = chunksize(p); + /* min and max possible addresses assuming contiguous allocation */ + char* max_address = (char*)(av->top) + chunksize(av->top); + char* min_address = max_address - av->system_mem; + + if (!chunk_is_mmapped(p)) { + + /* Has legal address ... */ + if (p != av->top) { + if (contiguous(av)) { + assert(((char*)p) >= min_address); + assert(((char*)p + sz) <= ((char*)(av->top))); + } + } + else { + /* top size is always at least MINSIZE */ + assert((unsigned long)(sz) >= MINSIZE); + /* top predecessor always marked inuse */ + assert(prev_inuse(p)); + } + + } + else { +#if HAVE_MMAP + /* address is outside main heap */ + if (contiguous(av) && av->top != initial_top(av)) { + assert(((char*)p) < min_address || ((char*)p) > max_address); + } + /* chunk is page-aligned */ + assert(((p->prev_size + sz) & (mp_.pagesize-1)) == 0); + /* mem is aligned */ + assert(aligned_OK(chunk2mem(p))); +#else + /* force an appropriate assert violation if debug set */ + assert(!chunk_is_mmapped(p)); +#endif + } +} + +/* + Properties of free chunks +*/ + +#if __STD_C +static void do_check_free_chunk(mstate av, mchunkptr p) +#else +static void do_check_free_chunk(av, p) mstate av; mchunkptr p; +#endif +{ + INTERNAL_SIZE_T sz = p->size & ~(PREV_INUSE); + mchunkptr next = chunk_at_offset(p, sz); + + do_check_chunk(av, p); + + /* Chunk must claim to be free ... */ + assert(!inuse(p)); + assert (!chunk_is_mmapped(p)); + + /* Unless a special marker, must have OK fields */ + if ((unsigned long)(sz) >= MINSIZE) + { + assert((sz & MALLOC_ALIGN_MASK) == 0); + assert(aligned_OK(chunk2mem(p))); + /* ... matching footer field */ + assert(next->prev_size == sz); + /* ... and is fully consolidated */ + assert(prev_inuse(p)); + assert (next == av->top || inuse(next)); + + /* ... and has minimally sane links */ + assert(p->fd->bk == p); + assert(p->bk->fd == p); + } + else /* markers are always of size SIZE_SZ */ + assert(sz == SIZE_SZ); +} + +/* + Properties of inuse chunks +*/ + +#if __STD_C +static void do_check_inuse_chunk(mstate av, mchunkptr p) +#else +static void do_check_inuse_chunk(av, p) mstate av; mchunkptr p; +#endif +{ + mchunkptr next; + + do_check_chunk(av, p); + + assert(av == arena_for_chunk(p)); + if (chunk_is_mmapped(p)) + return; /* mmapped chunks have no next/prev */ + + /* Check whether it claims to be in use ... */ + assert(inuse(p)); + + next = next_chunk(p); + + /* ... and is surrounded by OK chunks. + Since more things can be checked with free chunks than inuse ones, + if an inuse chunk borders them and debug is on, it's worth doing them. + */ + if (!prev_inuse(p)) { + /* Note that we cannot even look at prev unless it is not inuse */ + mchunkptr prv = prev_chunk(p); + assert(next_chunk(prv) == p); + do_check_free_chunk(av, prv); + } + + if (next == av->top) { + assert(prev_inuse(next)); + assert(chunksize(next) >= MINSIZE); + } + else if (!inuse(next)) + do_check_free_chunk(av, next); +} + +/* + Properties of chunks recycled from fastbins +*/ + +#if __STD_C +static void do_check_remalloced_chunk(mstate av, mchunkptr p, INTERNAL_SIZE_T s) +#else +static void do_check_remalloced_chunk(av, p, s) +mstate av; mchunkptr p; INTERNAL_SIZE_T s; +#endif +{ + INTERNAL_SIZE_T sz = p->size & ~(PREV_INUSE); + + if (!chunk_is_mmapped(p)) { + assert(av == arena_for_chunk(p)); + } + + do_check_inuse_chunk(av, p); + + /* Legal size ... */ + assert((sz & MALLOC_ALIGN_MASK) == 0); + assert((unsigned long)(sz) >= MINSIZE); + /* ... and alignment */ + assert(aligned_OK(chunk2mem(p))); + /* chunk is less than MINSIZE more than request */ + assert((long)(sz) - (long)(s) >= 0); + assert((long)(sz) - (long)(s + MINSIZE) < 0); +} + +/* + Properties of nonrecycled chunks at the point they are malloced +*/ + +#if __STD_C +static void do_check_malloced_chunk(mstate av, mchunkptr p, INTERNAL_SIZE_T s) +#else +static void do_check_malloced_chunk(av, p, s) +mstate av; mchunkptr p; INTERNAL_SIZE_T s; +#endif +{ + /* same as recycled case ... */ + do_check_remalloced_chunk(av, p, s); + + /* + ... plus, must obey implementation invariant that prev_inuse is + always true of any allocated chunk; i.e., that each allocated + chunk borders either a previously allocated and still in-use + chunk, or the base of its memory arena. This is ensured + by making all allocations from the the `lowest' part of any found + chunk. This does not necessarily hold however for chunks + recycled via fastbins. + */ + + assert(prev_inuse(p)); +} + + +/* + Properties of malloc_state. + + This may be useful for debugging malloc, as well as detecting user + programmer errors that somehow write into malloc_state. + + If you are extending or experimenting with this malloc, you can + probably figure out how to hack this routine to print out or + display chunk addresses, sizes, bins, and other instrumentation. +*/ + +static void do_check_malloc_state(mstate av) +{ + int i; + mchunkptr p; + mchunkptr q; + mbinptr b; + unsigned int binbit; + int empty; + unsigned int idx; + INTERNAL_SIZE_T size; + unsigned long total = 0; + int max_fast_bin; + + /* internal size_t must be no wider than pointer type */ + assert(sizeof(INTERNAL_SIZE_T) <= sizeof(char*)); + + /* alignment is a power of 2 */ + assert((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT-1)) == 0); + + /* cannot run remaining checks until fully initialized */ + if (av->top == 0 || av->top == initial_top(av)) + return; + + + /* properties of fastbins */ + + /* max_fast is in allowed range */ + assert((av->max_fast & ~1) <= request2size(MAX_FAST_SIZE)); + + max_fast_bin = fastbin_index(av->max_fast); + + for (i = 0; i < NFASTBINS; ++i) { + p = av->fastbins[i]; + + /* all bins past max_fast are empty */ + if (i > max_fast_bin) + assert(p == 0); + + while (p != 0) { + /* each chunk claims to be inuse */ + do_check_inuse_chunk(av, p); + total += chunksize(p); + /* chunk belongs in this bin */ + assert(fastbin_index(chunksize(p)) == i); + p = p->fd; + } + } + + if (total != 0) + assert(have_fastchunks(av)); + else if (!have_fastchunks(av)) + assert(total == 0); + + /* check normal bins */ + for (i = 1; i < NBINS; ++i) { + b = bin_at(av,i); + + /* binmap is accurate (except for bin 1 == unsorted_chunks) */ + if (i >= 2) { + binbit = get_binmap(av,i); + empty = last(b) == b; + if (!binbit) + assert(empty); + else if (!empty) + assert(binbit); + } + + for (p = last(b); p != b; p = p->bk) { + /* each chunk claims to be free */ + do_check_free_chunk(av, p); + size = chunksize(p); + total += size; + if (i >= 2) { + /* chunk belongs in bin */ + idx = bin_index(size); + assert(idx == (unsigned int)i); + /* lists are sorted */ + if ((unsigned long) size >= (unsigned long)(FIRST_SORTED_BIN_SIZE)) { + assert(p->bk == b || + (unsigned long)chunksize(p->bk) >= + (unsigned long)chunksize(p)); + } + } + /* chunk is followed by a legal chain of inuse chunks */ + for (q = next_chunk(p); + (q != av->top && inuse(q) && + (unsigned long)(chunksize(q)) >= MINSIZE); + q = next_chunk(q)) + do_check_inuse_chunk(av, q); + } + } + + /* top chunk is OK */ + check_chunk(av, av->top); + + /* sanity checks for statistics */ + + + assert((unsigned long)(av->system_mem) <= + (unsigned long)(av->max_system_mem)); + + +} +#endif + + + +/* ----------- Routines dealing with system allocation -------------- */ + +/* No system allocation routines supported */ + + +/*------------------------ Public wrappers. --------------------------------*/ + + + +#undef DEBUG_MALLOC +Void_t* +public_mALLOc(cvmx_arena_list_t arena_list, size_t bytes) +{ + mstate ar_ptr, orig_ar_ptr; + Void_t *victim = NULL; + static mstate debug_prev_ar; // debug only! +#ifdef DEBUG_MALLOC + int arena_cnt=0; +#endif + + ar_ptr = arena_list; + + if (!ar_ptr) + { + return(NULL); + } + + if (debug_prev_ar != ar_ptr) + { + debug_printf("New arena: %p\n", ar_ptr); +#ifdef CVMX_SPINLOCK_DEBUG + cvmx_dprintf("lock wait count for arena: %p is %ld\n", ar_ptr, ar_ptr->mutex.wait_cnt); +#endif + debug_prev_ar = ar_ptr; + } + orig_ar_ptr = ar_ptr; + + // try to get an arena without contention + do + { +#ifdef DEBUG_MALLOC + arena_cnt++; +#endif + if (!mutex_trylock(&ar_ptr->mutex)) + { + // we locked it + victim = _int_malloc(ar_ptr, bytes); + (void)mutex_unlock(&ar_ptr->mutex); + if(victim) + { + break; + } + } + ar_ptr = ar_ptr->next; + } while (ar_ptr != orig_ar_ptr); + + // we couldn't get the memory without contention, so try all + // arenas. SLOW! + if (!victim) + { + ar_ptr = orig_ar_ptr; + do + { +#ifdef DEBUG_MALLOC + arena_cnt++; +#endif + mutex_lock(&ar_ptr->mutex); + victim = _int_malloc(ar_ptr, bytes); + (void)mutex_unlock(&ar_ptr->mutex); + if(victim) + { + break; + } + ar_ptr = ar_ptr->next; + } while (ar_ptr != orig_ar_ptr); + } + + + assert(!victim || chunk_is_mmapped(mem2chunk(victim)) || + ar_ptr == arena_for_chunk(mem2chunk(victim))); + +#ifdef DEBUG_MALLOC + if (!victim) + { + cvmx_dprintf("Malloc failed: size: %ld, arena_cnt: %d\n", bytes, arena_cnt); + } +#endif + + debug_printf("cvmx_malloc(%ld) = %p\n", bytes, victim); + + // remember which arena we last used..... + tsd_setspecific(arena_key, (Void_t *)ar_ptr); + return victim; +} + + + +void +public_fREe(Void_t* mem) +{ + mstate ar_ptr; + mchunkptr p; /* chunk corresponding to mem */ + + debug_printf("cvmx_free(%p)\n", mem); + + + if (mem == 0) /* free(0) has no effect */ + return; + + p = mem2chunk(mem); + + + ar_ptr = arena_for_chunk(p); + assert(ar_ptr); +#if THREAD_STATS + if(!mutex_trylock(&ar_ptr->mutex)) + ++(ar_ptr->stat_lock_direct); + else { + (void)mutex_lock(&ar_ptr->mutex); + ++(ar_ptr->stat_lock_wait); + } +#else + (void)mutex_lock(&ar_ptr->mutex); +#endif + _int_free(ar_ptr, mem); + (void)mutex_unlock(&ar_ptr->mutex); +} + +Void_t* +public_rEALLOc(cvmx_arena_list_t arena_list, Void_t* oldmem, size_t bytes) +{ + mstate ar_ptr; + INTERNAL_SIZE_T nb; /* padded request size */ + + mchunkptr oldp; /* chunk corresponding to oldmem */ + INTERNAL_SIZE_T oldsize; /* its size */ + + Void_t* newp; /* chunk to return */ + + +#if REALLOC_ZERO_BYTES_FREES + if (bytes == 0 && oldmem != NULL) { public_fREe(oldmem); return 0; } +#endif + + /* realloc of null is supposed to be same as malloc */ + if (oldmem == 0) return public_mALLOc(arena_list, bytes); + + oldp = mem2chunk(oldmem); + oldsize = chunksize(oldp); + + checked_request2size(bytes, nb); + + + ar_ptr = arena_for_chunk(oldp); + (void)mutex_lock(&ar_ptr->mutex); + + + newp = _int_realloc(ar_ptr, oldmem, bytes); + + (void)mutex_unlock(&ar_ptr->mutex); + assert(!newp || chunk_is_mmapped(mem2chunk(newp)) || + ar_ptr == arena_for_chunk(mem2chunk(newp))); + return newp; +} + +#undef DEBUG_MEMALIGN +Void_t* +public_mEMALIGn(cvmx_arena_list_t arena_list, size_t alignment, size_t bytes) +{ + mstate ar_ptr, orig_ar_ptr; + Void_t *p = NULL; +#ifdef DEBUG_MEMALIGN + int arena_cnt=0; +#endif + + + /* If need less alignment than we give anyway, just relay to malloc */ + if (alignment <= MALLOC_ALIGNMENT) return public_mALLOc(arena_list, bytes); + + /* Otherwise, ensure that it is at least a minimum chunk size */ + if (alignment < MINSIZE) alignment = MINSIZE; + + + ar_ptr = arena_list; + + if (!ar_ptr) + { + return(NULL); + } + + orig_ar_ptr = ar_ptr; + + + // try to get an arena without contention + do + { + +#ifdef DEBUG_MEMALIGN + arena_cnt++; +#endif + if (!mutex_trylock(&ar_ptr->mutex)) + { + // we locked it + p = _int_memalign(ar_ptr, alignment, bytes); + (void)mutex_unlock(&ar_ptr->mutex); + if(p) + { + break; + } + } + ar_ptr = ar_ptr->next; + } while (ar_ptr != orig_ar_ptr); + + + // we couldn't get the memory without contention, so try all + // arenas. SLOW! + if (!p) + { +#ifdef DEBUG_MEMALIGN + arena_cnt++; +#endif + ar_ptr = orig_ar_ptr; + do + { + mutex_lock(&ar_ptr->mutex); + p = _int_memalign(ar_ptr, alignment, bytes); + (void)mutex_unlock(&ar_ptr->mutex); + if(p) + { + break; + } + ar_ptr = ar_ptr->next; + } while (ar_ptr != orig_ar_ptr); + } + + + if (p) + { + assert(ar_ptr == arena_for_chunk(mem2chunk(p))); + } + else + { +#ifdef DEBUG_MEMALIGN + cvmx_dprintf("Memalign failed: align: 0x%x, size: %ld, arena_cnt: %ld\n", alignment, bytes, arena_cnt); +#endif + } + + assert(!p || ar_ptr == arena_for_chunk(mem2chunk(p))); + return p; +} + + + +Void_t* +public_cALLOc(cvmx_arena_list_t arena_list, size_t n, size_t elem_size) +{ + mstate av; + mchunkptr oldtop, p; + INTERNAL_SIZE_T sz, csz, oldtopsize; + Void_t* mem; + unsigned long clearsize; + unsigned long nclears; + INTERNAL_SIZE_T* d; + + + /* FIXME: check for overflow on multiplication. */ + sz = n * elem_size; + + mem = public_mALLOc(arena_list, sz); + if (mem) + { + memset(mem, 0, sz); + } + + return mem; +} + + +#ifndef _LIBC + +void +public_cFREe(Void_t* m) +{ + public_fREe(m); +} + +#endif /* _LIBC */ + +/* + ------------------------------ malloc ------------------------------ +*/ + +static Void_t* +_int_malloc(mstate av, size_t bytes) +{ + INTERNAL_SIZE_T nb; /* normalized request size */ + unsigned int idx; /* associated bin index */ + mbinptr bin; /* associated bin */ + mfastbinptr* fb; /* associated fastbin */ + + mchunkptr victim; /* inspected/selected chunk */ + INTERNAL_SIZE_T size; /* its size */ + int victim_index; /* its bin index */ + + mchunkptr remainder; /* remainder from a split */ + unsigned long remainder_size; /* its size */ + + unsigned int block; /* bit map traverser */ + unsigned int bit; /* bit map traverser */ + unsigned int map; /* current word of binmap */ + + mchunkptr fwd; /* misc temp for linking */ + mchunkptr bck; /* misc temp for linking */ + + /* + Convert request size to internal form by adding SIZE_SZ bytes + overhead plus possibly more to obtain necessary alignment and/or + to obtain a size of at least MINSIZE, the smallest allocatable + size. Also, checked_request2size traps (returning 0) request sizes + that are so large that they wrap around zero when padded and + aligned. + */ + + + checked_request2size(bytes, nb); + + /* + If the size qualifies as a fastbin, first check corresponding bin. + This code is safe to execute even if av is not yet initialized, so we + can try it without checking, which saves some time on this fast path. + */ + + if ((unsigned long)(nb) <= (unsigned long)(av->max_fast)) { + fb = &(av->fastbins[(fastbin_index(nb))]); + if ( (victim = *fb) != 0) { + *fb = victim->fd; + check_remalloced_chunk(av, victim, nb); + set_arena_for_chunk(victim, av); + return chunk2mem(victim); + } + } + + /* + If a small request, check regular bin. Since these "smallbins" + hold one size each, no searching within bins is necessary. + (For a large request, we need to wait until unsorted chunks are + processed to find best fit. But for small ones, fits are exact + anyway, so we can check now, which is faster.) + */ + + if (in_smallbin_range(nb)) { + idx = smallbin_index(nb); + bin = bin_at(av,idx); + + if ( (victim = last(bin)) != bin) { + if (victim == 0) /* initialization check */ + malloc_consolidate(av); + else { + bck = victim->bk; + set_inuse_bit_at_offset(victim, nb); + bin->bk = bck; + bck->fd = bin; + + set_arena_for_chunk(victim, av); + check_malloced_chunk(av, victim, nb); + return chunk2mem(victim); + } + } + } + + /* + If this is a large request, consolidate fastbins before continuing. + While it might look excessive to kill all fastbins before + even seeing if there is space available, this avoids + fragmentation problems normally associated with fastbins. + Also, in practice, programs tend to have runs of either small or + large requests, but less often mixtures, so consolidation is not + invoked all that often in most programs. And the programs that + it is called frequently in otherwise tend to fragment. + */ + + else { + idx = largebin_index(nb); + if (have_fastchunks(av)) + malloc_consolidate(av); + } + + /* + Process recently freed or remaindered chunks, taking one only if + it is exact fit, or, if this a small request, the chunk is remainder from + the most recent non-exact fit. Place other traversed chunks in + bins. Note that this step is the only place in any routine where + chunks are placed in bins. + + The outer loop here is needed because we might not realize until + near the end of malloc that we should have consolidated, so must + do so and retry. This happens at most once, and only when we would + otherwise need to expand memory to service a "small" request. + */ + + for(;;) { + + while ( (victim = unsorted_chunks(av)->bk) != unsorted_chunks(av)) { + bck = victim->bk; + size = chunksize(victim); + + /* + If a small request, try to use last remainder if it is the + only chunk in unsorted bin. This helps promote locality for + runs of consecutive small requests. This is the only + exception to best-fit, and applies only when there is + no exact fit for a small chunk. + */ + + if (in_smallbin_range(nb) && + bck == unsorted_chunks(av) && + victim == av->last_remainder && + (unsigned long)(size) > (unsigned long)(nb + MINSIZE)) { + + /* split and reattach remainder */ + remainder_size = size - nb; + remainder = chunk_at_offset(victim, nb); + unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder; + av->last_remainder = remainder; + remainder->bk = remainder->fd = unsorted_chunks(av); + + set_head(victim, nb | PREV_INUSE); + set_head(remainder, remainder_size | PREV_INUSE); + set_foot(remainder, remainder_size); + + set_arena_for_chunk(victim, av); + check_malloced_chunk(av, victim, nb); + return chunk2mem(victim); + } + + /* remove from unsorted list */ + unsorted_chunks(av)->bk = bck; + bck->fd = unsorted_chunks(av); + + /* Take now instead of binning if exact fit */ + + if (size == nb) { + set_inuse_bit_at_offset(victim, size); + set_arena_for_chunk(victim, av); + check_malloced_chunk(av, victim, nb); + return chunk2mem(victim); + } + + /* place chunk in bin */ + + if (in_smallbin_range(size)) { + victim_index = smallbin_index(size); + bck = bin_at(av, victim_index); + fwd = bck->fd; + } + else { + victim_index = largebin_index(size); + bck = bin_at(av, victim_index); + fwd = bck->fd; + + if (fwd != bck) { + /* if smaller than smallest, place first */ + if ((unsigned long)(size) < (unsigned long)(bck->bk->size)) { + fwd = bck; + bck = bck->bk; + } + else if ((unsigned long)(size) >= + (unsigned long)(FIRST_SORTED_BIN_SIZE)) { + + /* maintain large bins in sorted order */ + size |= PREV_INUSE; /* Or with inuse bit to speed comparisons */ + while ((unsigned long)(size) < (unsigned long)(fwd->size)) { + fwd = fwd->fd; + } + bck = fwd->bk; + } + } + } + + mark_bin(av, victim_index); + victim->bk = bck; + victim->fd = fwd; + fwd->bk = victim; + bck->fd = victim; + } + + /* + If a large request, scan through the chunks of current bin in + sorted order to find smallest that fits. This is the only step + where an unbounded number of chunks might be scanned without doing + anything useful with them. However the lists tend to be short. + */ + + if (!in_smallbin_range(nb)) { + bin = bin_at(av, idx); + + for (victim = last(bin); victim != bin; victim = victim->bk) { + size = chunksize(victim); + + if ((unsigned long)(size) >= (unsigned long)(nb)) { + remainder_size = size - nb; + unlink(victim, bck, fwd); + + /* Exhaust */ + if (remainder_size < MINSIZE) { + set_inuse_bit_at_offset(victim, size); + set_arena_for_chunk(victim, av); + check_malloced_chunk(av, victim, nb); + return chunk2mem(victim); + } + /* Split */ + else { + remainder = chunk_at_offset(victim, nb); + unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder; + remainder->bk = remainder->fd = unsorted_chunks(av); + set_head(victim, nb | PREV_INUSE); + set_head(remainder, remainder_size | PREV_INUSE); + set_foot(remainder, remainder_size); + set_arena_for_chunk(victim, av); + check_malloced_chunk(av, victim, nb); + return chunk2mem(victim); + } + } + } + } + + /* + Search for a chunk by scanning bins, starting with next largest + bin. This search is strictly by best-fit; i.e., the smallest + (with ties going to approximately the least recently used) chunk + that fits is selected. + + The bitmap avoids needing to check that most blocks are nonempty. + The particular case of skipping all bins during warm-up phases + when no chunks have been returned yet is faster than it might look. + */ + + ++idx; + bin = bin_at(av,idx); + block = idx2block(idx); + map = av->binmap[block]; + bit = idx2bit(idx); + + for (;;) { + + /* Skip rest of block if there are no more set bits in this block. */ + if (bit > map || bit == 0) { + do { + if (++block >= BINMAPSIZE) /* out of bins */ + goto use_top; + } while ( (map = av->binmap[block]) == 0); + + bin = bin_at(av, (block << BINMAPSHIFT)); + bit = 1; + } + + /* Advance to bin with set bit. There must be one. */ + while ((bit & map) == 0) { + bin = next_bin(bin); + bit <<= 1; + assert(bit != 0); + } + + /* Inspect the bin. It is likely to be non-empty */ + victim = last(bin); + + /* If a false alarm (empty bin), clear the bit. */ + if (victim == bin) { + av->binmap[block] = map &= ~bit; /* Write through */ + bin = next_bin(bin); + bit <<= 1; + } + + else { + size = chunksize(victim); + + /* We know the first chunk in this bin is big enough to use. */ + assert((unsigned long)(size) >= (unsigned long)(nb)); + + remainder_size = size - nb; + + /* unlink */ + bck = victim->bk; + bin->bk = bck; + bck->fd = bin; + + /* Exhaust */ + if (remainder_size < MINSIZE) { + set_inuse_bit_at_offset(victim, size); + set_arena_for_chunk(victim, av); + check_malloced_chunk(av, victim, nb); + return chunk2mem(victim); + } + + /* Split */ + else { + remainder = chunk_at_offset(victim, nb); + + unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder; + remainder->bk = remainder->fd = unsorted_chunks(av); + /* advertise as last remainder */ + if (in_smallbin_range(nb)) + av->last_remainder = remainder; + + set_head(victim, nb | PREV_INUSE); + set_head(remainder, remainder_size | PREV_INUSE); + set_foot(remainder, remainder_size); + set_arena_for_chunk(victim, av); + check_malloced_chunk(av, victim, nb); + return chunk2mem(victim); + } + } + } + + use_top: + /* + If large enough, split off the chunk bordering the end of memory + (held in av->top). Note that this is in accord with the best-fit + search rule. In effect, av->top is treated as larger (and thus + less well fitting) than any other available chunk since it can + be extended to be as large as necessary (up to system + limitations). + + We require that av->top always exists (i.e., has size >= + MINSIZE) after initialization, so if it would otherwise be + exhuasted by current request, it is replenished. (The main + reason for ensuring it exists is that we may need MINSIZE space + to put in fenceposts in sysmalloc.) + */ + + victim = av->top; + size = chunksize(victim); + + if ((unsigned long)(size) >= (unsigned long)(nb + MINSIZE)) { + remainder_size = size - nb; + remainder = chunk_at_offset(victim, nb); + av->top = remainder; + set_head(victim, nb | PREV_INUSE); + set_head(remainder, remainder_size | PREV_INUSE); + + set_arena_for_chunk(victim, av); + check_malloced_chunk(av, victim, nb); + return chunk2mem(victim); + } + + /* + If there is space available in fastbins, consolidate and retry, + to possibly avoid expanding memory. This can occur only if nb is + in smallbin range so we didn't consolidate upon entry. + */ + + else if (have_fastchunks(av)) { + assert(in_smallbin_range(nb)); + malloc_consolidate(av); + idx = smallbin_index(nb); /* restore original bin index */ + } + + /* + Otherwise, relay to handle system-dependent cases + */ + else + return(NULL); // sysmalloc not supported + } +} + +/* + ------------------------------ free ------------------------------ +*/ + +static void +_int_free(mstate av, Void_t* mem) +{ + mchunkptr p; /* chunk corresponding to mem */ + INTERNAL_SIZE_T size; /* its size */ + mfastbinptr* fb; /* associated fastbin */ + mchunkptr nextchunk; /* next contiguous chunk */ + INTERNAL_SIZE_T nextsize; /* its size */ + int nextinuse; /* true if nextchunk is used */ + INTERNAL_SIZE_T prevsize; /* size of previous contiguous chunk */ + mchunkptr bck; /* misc temp for linking */ + mchunkptr fwd; /* misc temp for linking */ + + + /* free(0) has no effect */ + if (mem != 0) { + p = mem2chunk(mem); + size = chunksize(p); + + check_inuse_chunk(av, p); + + /* + If eligible, place chunk on a fastbin so it can be found + and used quickly in malloc. + */ + + if ((unsigned long)(size) <= (unsigned long)(av->max_fast) + +#if TRIM_FASTBINS + /* + If TRIM_FASTBINS set, don't place chunks + bordering top into fastbins + */ + && (chunk_at_offset(p, size) != av->top) +#endif + ) { + + set_fastchunks(av); + fb = &(av->fastbins[fastbin_index(size)]); + p->fd = *fb; + *fb = p; + } + + /* + Consolidate other non-mmapped chunks as they arrive. + */ + + else if (!chunk_is_mmapped(p)) { + nextchunk = chunk_at_offset(p, size); + nextsize = chunksize(nextchunk); + assert(nextsize > 0); + + /* consolidate backward */ + if (!prev_inuse(p)) { + prevsize = p->prev_size; + size += prevsize; + p = chunk_at_offset(p, -((long) prevsize)); + unlink(p, bck, fwd); + } + + if (nextchunk != av->top) { + /* get and clear inuse bit */ + nextinuse = inuse_bit_at_offset(nextchunk, nextsize); + + /* consolidate forward */ + if (!nextinuse) { + unlink(nextchunk, bck, fwd); + size += nextsize; + } else + clear_inuse_bit_at_offset(nextchunk, 0); + + /* + Place the chunk in unsorted chunk list. Chunks are + not placed into regular bins until after they have + been given one chance to be used in malloc. + */ + + bck = unsorted_chunks(av); + fwd = bck->fd; + p->bk = bck; + p->fd = fwd; + bck->fd = p; + fwd->bk = p; + + set_head(p, size | PREV_INUSE); + set_foot(p, size); + + check_free_chunk(av, p); + } + + /* + If the chunk borders the current high end of memory, + consolidate into top + */ + + else { + size += nextsize; + set_head(p, size | PREV_INUSE); + av->top = p; + check_chunk(av, p); + } + + /* + If freeing a large space, consolidate possibly-surrounding + chunks. Then, if the total unused topmost memory exceeds trim + threshold, ask malloc_trim to reduce top. + + Unless max_fast is 0, we don't know if there are fastbins + bordering top, so we cannot tell for sure whether threshold + has been reached unless fastbins are consolidated. But we + don't want to consolidate on each free. As a compromise, + consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD + is reached. + */ + + if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) { + if (have_fastchunks(av)) + malloc_consolidate(av); + } + } + } +} + +/* + ------------------------- malloc_consolidate ------------------------- + + malloc_consolidate is a specialized version of free() that tears + down chunks held in fastbins. Free itself cannot be used for this + purpose since, among other things, it might place chunks back onto + fastbins. So, instead, we need to use a minor variant of the same + code. + + Also, because this routine needs to be called the first time through + malloc anyway, it turns out to be the perfect place to trigger + initialization code. +*/ + +#if __STD_C +static void malloc_consolidate(mstate av) +#else +static void malloc_consolidate(av) mstate av; +#endif +{ + mfastbinptr* fb; /* current fastbin being consolidated */ + mfastbinptr* maxfb; /* last fastbin (for loop control) */ + mchunkptr p; /* current chunk being consolidated */ + mchunkptr nextp; /* next chunk to consolidate */ + mchunkptr unsorted_bin; /* bin header */ + mchunkptr first_unsorted; /* chunk to link to */ + + /* These have same use as in free() */ + mchunkptr nextchunk; + INTERNAL_SIZE_T size; + INTERNAL_SIZE_T nextsize; + INTERNAL_SIZE_T prevsize; + int nextinuse; + mchunkptr bck; + mchunkptr fwd; + + /* + If max_fast is 0, we know that av hasn't + yet been initialized, in which case do so below + */ + + if (av->max_fast != 0) { + clear_fastchunks(av); + + unsorted_bin = unsorted_chunks(av); + + /* + Remove each chunk from fast bin and consolidate it, placing it + then in unsorted bin. Among other reasons for doing this, + placing in unsorted bin avoids needing to calculate actual bins + until malloc is sure that chunks aren't immediately going to be + reused anyway. + */ + + maxfb = &(av->fastbins[fastbin_index(av->max_fast)]); + fb = &(av->fastbins[0]); + do { + if ( (p = *fb) != 0) { + *fb = 0; + + do { + check_inuse_chunk(av, p); + nextp = p->fd; + + /* Slightly streamlined version of consolidation code in free() */ + size = p->size & ~(PREV_INUSE); + nextchunk = chunk_at_offset(p, size); + nextsize = chunksize(nextchunk); + + if (!prev_inuse(p)) { + prevsize = p->prev_size; + size += prevsize; + p = chunk_at_offset(p, -((long) prevsize)); + unlink(p, bck, fwd); + } + + if (nextchunk != av->top) { + nextinuse = inuse_bit_at_offset(nextchunk, nextsize); + + if (!nextinuse) { + size += nextsize; + unlink(nextchunk, bck, fwd); + } else + clear_inuse_bit_at_offset(nextchunk, 0); + + first_unsorted = unsorted_bin->fd; + unsorted_bin->fd = p; + first_unsorted->bk = p; + + set_head(p, size | PREV_INUSE); + p->bk = unsorted_bin; + p->fd = first_unsorted; + set_foot(p, size); + } + + else { + size += nextsize; + set_head(p, size | PREV_INUSE); + av->top = p; + } + + } while ( (p = nextp) != 0); + + } + } while (fb++ != maxfb); + } + else { + malloc_init_state(av); + check_malloc_state(av); + } +} + +/* + ------------------------------ realloc ------------------------------ +*/ + +static Void_t* +_int_realloc(mstate av, Void_t* oldmem, size_t bytes) +{ + INTERNAL_SIZE_T nb; /* padded request size */ + + mchunkptr oldp; /* chunk corresponding to oldmem */ + INTERNAL_SIZE_T oldsize; /* its size */ + + mchunkptr newp; /* chunk to return */ + INTERNAL_SIZE_T newsize; /* its size */ + Void_t* newmem; /* corresponding user mem */ + + mchunkptr next; /* next contiguous chunk after oldp */ + + mchunkptr remainder; /* extra space at end of newp */ + unsigned long remainder_size; /* its size */ + + mchunkptr bck; /* misc temp for linking */ + mchunkptr fwd; /* misc temp for linking */ + + unsigned long copysize; /* bytes to copy */ + unsigned int ncopies; /* INTERNAL_SIZE_T words to copy */ + INTERNAL_SIZE_T* s; /* copy source */ + INTERNAL_SIZE_T* d; /* copy destination */ + + +#if REALLOC_ZERO_BYTES_FREES + if (bytes == 0) { + _int_free(av, oldmem); + return 0; + } +#endif + + /* realloc of null is supposed to be same as malloc */ + if (oldmem == 0) return _int_malloc(av, bytes); + + checked_request2size(bytes, nb); + + oldp = mem2chunk(oldmem); + oldsize = chunksize(oldp); + + check_inuse_chunk(av, oldp); + + // force to act like not mmapped + if (1) { + + if ((unsigned long)(oldsize) >= (unsigned long)(nb)) { + /* already big enough; split below */ + newp = oldp; + newsize = oldsize; + } + + else { + next = chunk_at_offset(oldp, oldsize); + + /* Try to expand forward into top */ + if (next == av->top && + (unsigned long)(newsize = oldsize + chunksize(next)) >= + (unsigned long)(nb + MINSIZE)) { + set_head_size(oldp, nb ); + av->top = chunk_at_offset(oldp, nb); + set_head(av->top, (newsize - nb) | PREV_INUSE); + check_inuse_chunk(av, oldp); + set_arena_for_chunk(oldp, av); + return chunk2mem(oldp); + } + + /* Try to expand forward into next chunk; split off remainder below */ + else if (next != av->top && + !inuse(next) && + (unsigned long)(newsize = oldsize + chunksize(next)) >= + (unsigned long)(nb)) { + newp = oldp; + unlink(next, bck, fwd); + } + + /* allocate, copy, free */ + else { + newmem = _int_malloc(av, nb - MALLOC_ALIGN_MASK); + if (newmem == 0) + return 0; /* propagate failure */ + + newp = mem2chunk(newmem); + newsize = chunksize(newp); + + /* + Avoid copy if newp is next chunk after oldp. + */ + if (newp == next) { + newsize += oldsize; + newp = oldp; + } + else { + /* + Unroll copy of <= 36 bytes (72 if 8byte sizes) + We know that contents have an odd number of + INTERNAL_SIZE_T-sized words; minimally 3. + */ + + copysize = oldsize - SIZE_SZ; + s = (INTERNAL_SIZE_T*)(oldmem); + d = (INTERNAL_SIZE_T*)(newmem); + ncopies = copysize / sizeof(INTERNAL_SIZE_T); + assert(ncopies >= 3); + + if (ncopies > 9) + MALLOC_COPY(d, s, copysize); + + else { + *(d+0) = *(s+0); + *(d+1) = *(s+1); + *(d+2) = *(s+2); + if (ncopies > 4) { + *(d+3) = *(s+3); + *(d+4) = *(s+4); + if (ncopies > 6) { + *(d+5) = *(s+5); + *(d+6) = *(s+6); + if (ncopies > 8) { + *(d+7) = *(s+7); + *(d+8) = *(s+8); + } + } + } + } + + _int_free(av, oldmem); + set_arena_for_chunk(newp, av); + check_inuse_chunk(av, newp); + return chunk2mem(newp); + } + } + } + + /* If possible, free extra space in old or extended chunk */ + + assert((unsigned long)(newsize) >= (unsigned long)(nb)); + + remainder_size = newsize - nb; + + if (remainder_size < MINSIZE) { /* not enough extra to split off */ + set_head_size(newp, newsize); + set_inuse_bit_at_offset(newp, newsize); + } + else { /* split remainder */ + remainder = chunk_at_offset(newp, nb); + set_head_size(newp, nb ); + set_head(remainder, remainder_size | PREV_INUSE ); + /* Mark remainder as inuse so free() won't complain */ + set_inuse_bit_at_offset(remainder, remainder_size); + set_arena_for_chunk(remainder, av); + _int_free(av, chunk2mem(remainder)); + } + + set_arena_for_chunk(newp, av); + check_inuse_chunk(av, newp); + return chunk2mem(newp); + } + + /* + Handle mmap cases + */ + + else { + /* If !HAVE_MMAP, but chunk_is_mmapped, user must have overwritten mem */ + check_malloc_state(av); + MALLOC_FAILURE_ACTION; + return 0; + } +} + +/* + ------------------------------ memalign ------------------------------ +*/ + +static Void_t* +_int_memalign(mstate av, size_t alignment, size_t bytes) +{ + INTERNAL_SIZE_T nb; /* padded request size */ + char* m; /* memory returned by malloc call */ + mchunkptr p; /* corresponding chunk */ + char* brk; /* alignment point within p */ + mchunkptr newp; /* chunk to return */ + INTERNAL_SIZE_T newsize; /* its size */ + INTERNAL_SIZE_T leadsize; /* leading space before alignment point */ + mchunkptr remainder; /* spare room at end to split off */ + unsigned long remainder_size; /* its size */ + INTERNAL_SIZE_T size; + + /* If need less alignment than we give anyway, just relay to malloc */ + + if (alignment <= MALLOC_ALIGNMENT) return _int_malloc(av, bytes); + + /* Otherwise, ensure that it is at least a minimum chunk size */ + + if (alignment < MINSIZE) alignment = MINSIZE; + + /* Make sure alignment is power of 2 (in case MINSIZE is not). */ + if ((alignment & (alignment - 1)) != 0) { + size_t a = MALLOC_ALIGNMENT * 2; + while ((unsigned long)a < (unsigned long)alignment) a <<= 1; + alignment = a; + } + + checked_request2size(bytes, nb); + + /* + Strategy: find a spot within that chunk that meets the alignment + request, and then possibly free the leading and trailing space. + */ + + + /* Call malloc with worst case padding to hit alignment. */ + + m = (char*)(_int_malloc(av, nb + alignment + MINSIZE)); + + if (m == 0) return 0; /* propagate failure */ + + p = mem2chunk(m); + + if ((((unsigned long)(m)) % alignment) != 0) { /* misaligned */ + + /* + Find an aligned spot inside chunk. Since we need to give back + leading space in a chunk of at least MINSIZE, if the first + calculation places us at a spot with less than MINSIZE leader, + we can move to the next aligned spot -- we've allocated enough + total room so that this is always possible. + */ + + brk = (char*)mem2chunk(((unsigned long)(m + alignment - 1)) & + -((signed long) alignment)); + if ((unsigned long)(brk - (char*)(p)) < MINSIZE) + brk += alignment; + + newp = (mchunkptr)brk; + leadsize = brk - (char*)(p); + newsize = chunksize(p) - leadsize; + + /* For mmapped chunks, just adjust offset */ + if (chunk_is_mmapped(p)) { + newp->prev_size = p->prev_size + leadsize; + set_head(newp, newsize|IS_MMAPPED); + set_arena_for_chunk(newp, av); + return chunk2mem(newp); + } + + /* Otherwise, give back leader, use the rest */ + set_head(newp, newsize | PREV_INUSE ); + set_inuse_bit_at_offset(newp, newsize); + set_head_size(p, leadsize); + set_arena_for_chunk(p, av); + _int_free(av, chunk2mem(p)); + p = newp; + + assert (newsize >= nb && + (((unsigned long)(chunk2mem(p))) % alignment) == 0); + } + + /* Also give back spare room at the end */ + if (!chunk_is_mmapped(p)) { + size = chunksize(p); + if ((unsigned long)(size) > (unsigned long)(nb + MINSIZE)) { + remainder_size = size - nb; + remainder = chunk_at_offset(p, nb); + set_head(remainder, remainder_size | PREV_INUSE ); + set_head_size(p, nb); + set_arena_for_chunk(remainder, av); + _int_free(av, chunk2mem(remainder)); + } + } + + set_arena_for_chunk(p, av); + check_inuse_chunk(av, p); + return chunk2mem(p); +} + +#if 1 +/* + ------------------------------ calloc ------------------------------ +*/ + +#if __STD_C +Void_t* cALLOc(cvmx_arena_list_t arena_list, size_t n_elements, size_t elem_size) +#else +Void_t* cALLOc(n_elements, elem_size) size_t n_elements; size_t elem_size; +#endif +{ + mchunkptr p; + unsigned long clearsize; + unsigned long nclears; + INTERNAL_SIZE_T* d; + + Void_t* mem = public_mALLOc(arena_list, n_elements * elem_size); + + if (mem != 0) { + p = mem2chunk(mem); + + { + /* + Unroll clear of <= 36 bytes (72 if 8byte sizes) + We know that contents have an odd number of + INTERNAL_SIZE_T-sized words; minimally 3. + */ + + d = (INTERNAL_SIZE_T*)mem; + clearsize = chunksize(p) - SIZE_SZ; + nclears = clearsize / sizeof(INTERNAL_SIZE_T); + assert(nclears >= 3); + + if (nclears > 9) + MALLOC_ZERO(d, clearsize); + + else { + *(d+0) = 0; + *(d+1) = 0; + *(d+2) = 0; + if (nclears > 4) { + *(d+3) = 0; + *(d+4) = 0; + if (nclears > 6) { + *(d+5) = 0; + *(d+6) = 0; + if (nclears > 8) { + *(d+7) = 0; + *(d+8) = 0; + } + } + } + } + } + } + return mem; +} +#endif + + +/* + ------------------------- malloc_usable_size ------------------------- +*/ + +#if __STD_C +size_t mUSABLe(Void_t* mem) +#else +size_t mUSABLe(mem) Void_t* mem; +#endif +{ + mchunkptr p; + if (mem != 0) { + p = mem2chunk(mem); + if (chunk_is_mmapped(p)) + return chunksize(p) - 3*SIZE_SZ; /* updated size for adding arena_ptr */ + else if (inuse(p)) + return chunksize(p) - 2*SIZE_SZ; /* updated size for adding arena_ptr */ + } + return 0; +} + +/* + ------------------------------ mallinfo ------------------------------ +*/ + +struct mallinfo mALLINFo(mstate av) +{ + struct mallinfo mi; + int i; + mbinptr b; + mchunkptr p; + INTERNAL_SIZE_T avail; + INTERNAL_SIZE_T fastavail; + int nblocks; + int nfastblocks; + + /* Ensure initialization */ + if (av->top == 0) malloc_consolidate(av); + + check_malloc_state(av); + + /* Account for top */ + avail = chunksize(av->top); + nblocks = 1; /* top always exists */ + + /* traverse fastbins */ + nfastblocks = 0; + fastavail = 0; + + for (i = 0; i < NFASTBINS; ++i) { + for (p = av->fastbins[i]; p != 0; p = p->fd) { + ++nfastblocks; + fastavail += chunksize(p); + } + } + + avail += fastavail; + + /* traverse regular bins */ + for (i = 1; i < NBINS; ++i) { + b = bin_at(av, i); + for (p = last(b); p != b; p = p->bk) { + ++nblocks; + avail += chunksize(p); + } + } + + mi.smblks = nfastblocks; + mi.ordblks = nblocks; + mi.fordblks = avail; + mi.uordblks = av->system_mem - avail; + mi.arena = av->system_mem; + mi.fsmblks = fastavail; + mi.keepcost = chunksize(av->top); + return mi; +} + +/* + ------------------------------ malloc_stats ------------------------------ +*/ + +void mSTATs() +{ +} + + +/* + ------------------------------ mallopt ------------------------------ +*/ + +#if 0 +#if __STD_C +int mALLOPt(int param_number, int value) +#else +int mALLOPt(param_number, value) int param_number; int value; +#endif +{ +} +#endif + + +/* + -------------------- Alternative MORECORE functions -------------------- +*/ + + +/* + General Requirements for MORECORE. + + The MORECORE function must have the following properties: + + If MORECORE_CONTIGUOUS is false: + + * MORECORE must allocate in multiples of pagesize. It will + only be called with arguments that are multiples of pagesize. + + * MORECORE(0) must return an address that is at least + MALLOC_ALIGNMENT aligned. (Page-aligning always suffices.) + + else (i.e. If MORECORE_CONTIGUOUS is true): + + * Consecutive calls to MORECORE with positive arguments + return increasing addresses, indicating that space has been + contiguously extended. + + * MORECORE need not allocate in multiples of pagesize. + Calls to MORECORE need not have args of multiples of pagesize. + + * MORECORE need not page-align. + + In either case: + + * MORECORE may allocate more memory than requested. (Or even less, + but this will generally result in a malloc failure.) + + * MORECORE must not allocate memory when given argument zero, but + instead return one past the end address of memory from previous + nonzero call. This malloc does NOT call MORECORE(0) + until at least one call with positive arguments is made, so + the initial value returned is not important. + + * Even though consecutive calls to MORECORE need not return contiguous + addresses, it must be OK for malloc'ed chunks to span multiple + regions in those cases where they do happen to be contiguous. + + * MORECORE need not handle negative arguments -- it may instead + just return MORECORE_FAILURE when given negative arguments. + Negative arguments are always multiples of pagesize. MORECORE + must not misinterpret negative args as large positive unsigned + args. You can suppress all such calls from even occurring by defining + MORECORE_CANNOT_TRIM, + + There is some variation across systems about the type of the + argument to sbrk/MORECORE. If size_t is unsigned, then it cannot + actually be size_t, because sbrk supports negative args, so it is + normally the signed type of the same width as size_t (sometimes + declared as "intptr_t", and sometimes "ptrdiff_t"). It doesn't much + matter though. Internally, we use "long" as arguments, which should + work across all reasonable possibilities. + + Additionally, if MORECORE ever returns failure for a positive + request, and HAVE_MMAP is true, then mmap is used as a noncontiguous + system allocator. This is a useful backup strategy for systems with + holes in address spaces -- in this case sbrk cannot contiguously + expand the heap, but mmap may be able to map noncontiguous space. + + If you'd like mmap to ALWAYS be used, you can define MORECORE to be + a function that always returns MORECORE_FAILURE. + + If you are using this malloc with something other than sbrk (or its + emulation) to supply memory regions, you probably want to set + MORECORE_CONTIGUOUS as false. As an example, here is a custom + allocator kindly contributed for pre-OSX macOS. It uses virtually + but not necessarily physically contiguous non-paged memory (locked + in, present and won't get swapped out). You can use it by + uncommenting this section, adding some #includes, and setting up the + appropriate defines above: + + #define MORECORE osMoreCore + #define MORECORE_CONTIGUOUS 0 + + There is also a shutdown routine that should somehow be called for + cleanup upon program exit. + + #define MAX_POOL_ENTRIES 100 + #define MINIMUM_MORECORE_SIZE (64 * 1024) + static int next_os_pool; + void *our_os_pools[MAX_POOL_ENTRIES]; + + void *osMoreCore(int size) + { + void *ptr = 0; + static void *sbrk_top = 0; + + if (size > 0) + { + if (size < MINIMUM_MORECORE_SIZE) + size = MINIMUM_MORECORE_SIZE; + if (CurrentExecutionLevel() == kTaskLevel) + ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0); + if (ptr == 0) + { + return (void *) MORECORE_FAILURE; + } + // save ptrs so they can be freed during cleanup + our_os_pools[next_os_pool] = ptr; + next_os_pool++; + ptr = (void *) ((((unsigned long) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK); + sbrk_top = (char *) ptr + size; + return ptr; + } + else if (size < 0) + { + // we don't currently support shrink behavior + return (void *) MORECORE_FAILURE; + } + else + { + return sbrk_top; + } + } + + // cleanup any allocated memory pools + // called as last thing before shutting down driver + + void osCleanupMem(void) + { + void **ptr; + + for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++) + if (*ptr) + { + PoolDeallocate(*ptr); + *ptr = 0; + } + } + +*/ + + + +/* ------------------------------------------------------------ +History: + +[see ftp://g.oswego.edu/pub/misc/malloc.c for the history of dlmalloc] + +*/ |