diff options
author | jeff <jeff@FreeBSD.org> | 2002-03-19 09:11:49 +0000 |
---|---|---|
committer | jeff <jeff@FreeBSD.org> | 2002-03-19 09:11:49 +0000 |
commit | 2923687da3c046deea227e675d5af075b9fa52d4 (patch) | |
tree | 9added529dcba41e3e9f6e15e334a8a06d6cb0f2 /sys/vm | |
parent | d95a4801fc26e963b0da94ad73f00ce63c5ed657 (diff) | |
download | FreeBSD-src-2923687da3c046deea227e675d5af075b9fa52d4.zip FreeBSD-src-2923687da3c046deea227e675d5af075b9fa52d4.tar.gz |
This is the first part of the new kernel memory allocator. This replaces
malloc(9) and vm_zone with a slab like allocator.
Reviewed by: arch@
Diffstat (limited to 'sys/vm')
-rw-r--r-- | sys/vm/device_pager.c | 5 | ||||
-rw-r--r-- | sys/vm/swap_pager.c | 16 | ||||
-rw-r--r-- | sys/vm/uma.h | 420 | ||||
-rw-r--r-- | sys/vm/uma_core.c | 1900 | ||||
-rw-r--r-- | sys/vm/uma_int.h | 328 | ||||
-rw-r--r-- | sys/vm/vm_init.c | 1 | ||||
-rw-r--r-- | sys/vm/vm_map.c | 165 | ||||
-rw-r--r-- | sys/vm/vm_map.h | 1 | ||||
-rw-r--r-- | sys/vm/vm_object.c | 56 | ||||
-rw-r--r-- | sys/vm/vm_page.c | 16 | ||||
-rw-r--r-- | sys/vm/vm_pageout.c | 1 | ||||
-rw-r--r-- | sys/vm/vm_zone.h | 50 |
12 files changed, 2865 insertions, 94 deletions
diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c index af52cd9..bf6b3d9 100644 --- a/sys/vm/device_pager.c +++ b/sys/vm/device_pager.c @@ -73,7 +73,9 @@ static struct mtx dev_pager_mtx; static vm_zone_t fakepg_zone; +#if 0 static struct vm_zone fakepg_zone_store; +#endif static vm_page_t dev_pager_getfake __P((vm_offset_t)); static void dev_pager_putfake __P((vm_page_t)); @@ -94,8 +96,11 @@ dev_pager_init() TAILQ_INIT(&dev_pager_object_list); sx_init(&dev_pager_sx, "dev_pager create"); mtx_init(&dev_pager_mtx, "dev_pager list", MTX_DEF); +#if 0 fakepg_zone = &fakepg_zone_store; zinitna(fakepg_zone, NULL, "DP fakepg", sizeof(struct vm_page), 0, 0, 2); +#endif + fakepg_zone = zinit("DP fakepg", sizeof(struct vm_page), 0, 0, 0); } static vm_object_t diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index 5e7bf2c..7b9c49a 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -320,15 +320,15 @@ swap_pager_swap_init() if (maxswzone && n > maxswzone / sizeof(struct swblock)) n = maxswzone / sizeof(struct swblock); n2 = n; + swap_zone = zinit( + "SWAPMETA", + sizeof(struct swblock), + n, + ZONE_INTERRUPT, + 1 + ); do { - swap_zone = zinit( - "SWAPMETA", - sizeof(struct swblock), - n, - ZONE_INTERRUPT, - 1 - ); - if (swap_zone != NULL) + if (uma_zone_set_obj(swap_zone, NULL, n)) break; /* * if the allocation failed, try a zone two thirds the diff --git a/sys/vm/uma.h b/sys/vm/uma.h new file mode 100644 index 0000000..be2c90b --- /dev/null +++ b/sys/vm/uma.h @@ -0,0 +1,420 @@ +/* + * Copyright (c) 2002, Jeffrey Roberson <jroberson@chesapeake.net> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +/* + * uma.h - External definitions for the Universal Memory Allocator + * + * Jeff Roberson <jroberson@chesapeake.net> +*/ + +#ifndef VM_UMA_H +#define VM_UMA_H + +#include <sys/param.h> /* For NULL */ +#include <sys/malloc.h> /* For M_* */ + +/* User visable parameters */ +#define UMA_SMALLEST_UNIT (PAGE_SIZE / 256) /* Smallest item allocated */ + +/* Types and type defs */ + +struct uma_zone; +/* Opaque type used as a handle to the zone */ +typedef struct uma_zone * uma_zone_t; + +/* + * Item constructor + * + * Arguments: + * item A pointer to the memory which has been allocated. + * arg The arg field passed to uma_zalloc_arg + * size The size of the allocated item + * + * Returns: + * Nothing + * + * Discussion: + * The constructor is called just before the memory is returned + * to the user. It may block if neccisary. + */ +typedef void (*uma_ctor)(void *mem, int size, void *arg); + +/* + * Item destructor + * + * Arguments: + * item A pointer to the memory which has been allocated. + * size The size of the item being destructed. + * arg Argument passed through uma_zfree_arg + * + * Returns: + * Nothing + * + * Discussion: + * The destructor may perform operations that differ from those performed + * by the initializer, but it must leave the object in the same state. + * This IS type stable storage. This is called after EVERY zfree call. + */ +typedef void (*uma_dtor)(void *mem, int size, void *arg); + +/* + * Item initializer + * + * Arguments: + * item A pointer to the memory which has been allocated. + * size The size of the item being initialized. + * + * Returns: + * Nothing + * + * Discussion: + * The initializer is called when the memory is cached in the uma zone. + * this should be the same state that the destructor leaves the object in. + */ +typedef void (*uma_init)(void *mem, int size); + +/* + * Item discard function + * + * Arguments: + * item A pointer to memory which has been 'freed' but has not left the + * zone's cache. + * size The size of the item being discarded. + * + * Returns: + * Nothing + * + * Discussion: + * This routine is called when memory leaves a zone and is returned to the + * system for other uses. It is the counter part to the init function. + */ +typedef void (*uma_fini)(void *mem, int size); + +/* + * What's the difference between initializing and constructing? + * + * The item is initialized when it is cached, and this is the state that the + * object should be in when returned to the allocator. The purpose of this is + * to remove some code which would otherwise be called on each allocation by + * utilizing a known, stable state. This differs from the constructor which + * will be called on EVERY allocation. + * + * For example, in the initializer you may want to initialize embeded locks, + * NULL list pointers, set up initial states, magic numbers, etc. This way if + * the object is held in the allocator and re-used it won't be neccisary to + * re-initialize it. + * + * The constructor may be used to lock a data structure, link it on to lists, + * bump reference counts or total counts of outstanding structures, etc. + * + */ + + +/* Function proto types */ + +/* + * Create a new uma zone + * + * Arguments: + * name The text name of the zone for debugging and stats, this memory + * should not be freed until the zone has been deallocated. + * size The size of the object that is being created. + * ctor The constructor that is called when the object is allocated + * dtor The destructor that is called when the object is freed. + * init An initializer that sets up the initial state of the memory. + * fini A discard function that undoes initialization done by init. + * ctor/dtor/init/fini may all be null, see notes above. + * align A bitmask that corisponds to the requested alignment + * eg 4 would be 0x3 + * flags A set of parameters that control the behavior of the zone + * + * Returns: + * A pointer to a structure which is intended to be opaque to users of + * the interface. The value may be null if the wait flag is not set. + */ + +uma_zone_t uma_zcreate(char *name, int size, uma_ctor ctor, uma_dtor dtor, + uma_init uminit, uma_fini fini, int align, + u_int16_t flags); + +/* Definitions for uma_zcreate flags */ +#define UMA_ZONE_PAGEABLE 0x0001 /* Return items not fully backed by + physical memory XXX Not yet */ +#define UMA_ZONE_ZINIT 0x0002 /* Initialize with zeros */ +#define UMA_ZONE_STATIC 0x0004 /* Staticly sized zone */ +#define UMA_ZONE_OFFPAGE 0x0008 /* Force the slab structure allocation + off of the real memory */ +#define UMA_ZONE_MALLOC 0x0010 /* For use by malloc(9) only! */ +#define UMA_ZONE_NOFREE 0x0020 /* Do not free slabs of this type! */ + +/* Definitions for align */ +#define UMA_ALIGN_PTR (sizeof(void *) - 1) /* Alignment fit for ptr */ +#define UMA_ALIGN_LONG (sizeof(long) - 1) /* "" long */ +#define UMA_ALIGN_INT (sizeof(int) - 1) /* "" int */ +#define UMA_ALIGN_SHORT (sizeof(short) - 1) /* "" short */ +#define UMA_ALIGN_CHAR (sizeof(char) - 1) /* "" char */ +#define UMA_ALIGN_CACHE (16 - 1) /* Cache line size align */ + +/* + * Destroys a uma zone + * + * Arguments: + * zone The zone we want to destroy. + * wait This flag indicates whether or not we should wait for all + * allocations to free, or return an errno on outstanding memory. + * + * Returns: + * 0 on successful completion, or EWOULDBLOCK if there are outstanding + * allocations and the wait flag is M_NOWAIT + */ + +int uma_zdestroy(uma_zone_t zone, int wait); + +/* + * Allocates an item out of a zone + * + * Arguments: + * zone The zone we are allocating from + * arg This data is passed to the ctor function + * wait This flag indicates whether or not we are allowed to block while + * allocating memory for this zone should we run out. + * + * Returns: + * A non null pointer to an initialized element from the zone is + * garanteed if the wait flag is M_WAITOK, otherwise a null pointer may be + * returned if the zone is empty or the ctor failed. + */ + +void *uma_zalloc_arg(uma_zone_t zone, void *arg, int wait); + +/* + * Allocates an item out of a zone without supplying an argument + * + * This is just a wrapper for uma_zalloc_arg for convenience. + * + */ +static __inline void *uma_zalloc(uma_zone_t zone, int wait); + +static __inline void * +uma_zalloc(uma_zone_t zone, int wait) +{ + return uma_zalloc_arg(zone, NULL, wait); +} + +/* + * Frees an item back into the specified zone. + * + * Arguments: + * zone The zone the item was originally allocated out of. + * item The memory to be freed. + * arg Argument passed to the destructor + * + * Returns: + * Nothing. + */ + +void uma_zfree_arg(uma_zone_t zone, void *item, void *arg); + +/* + * Frees an item back to a zone without supplying an argument + * + * This is just a wrapper for uma_zfree_arg for convenience. + * + */ +static __inline void uma_zfree(uma_zone_t zone, void *item); + +static __inline void +uma_zfree(uma_zone_t zone, void *item) +{ + return uma_zfree_arg(zone, item, NULL); +} + +/* + * XXX The rest of the prototypes in this header are h0h0 magic for the VM. + * If you think you need to use it for a normal zone you're probably incorrect. + */ + +/* + * Backend page supplier routines + * + * Arguments: + * zone The zone that is requesting pages + * size The number of bytes being requested + * pflag Flags for these memory pages, see below. + * wait Indicates our willingness to block. + * + * Returns: + * A pointer to the alloced memory or NULL on failure. + */ + +typedef void *(*uma_alloc)(uma_zone_t zone, int size, u_int8_t *pflag, int wait); + +/* + * Backend page free routines + * + * Arguments: + * item A pointer to the previously allocated pages + * size The original size of the allocation + * pflag The flags for the slab. See UMA_SLAB_* below + * + * Returns: + * None + */ +typedef void (*uma_free)(void *item, int size, u_int8_t pflag); + + + +/* + * Sets up the uma allocator. (Called by vm_mem_init) + * + * Arguments: + * bootmem A pointer to memory used to bootstrap the system. + * + * Returns: + * Nothing + * + * Discussion: + * This memory is used for zones which allocate things before the + * backend page supplier can give us pages. It should be + * UMA_SLAB_SIZE * UMA_BOOT_PAGES bytes. (see uma_int.h) + * + */ + +void uma_startup(void *bootmem); + +/* + * Finishes starting up the allocator. This should + * be called when kva is ready for normal allocs. + * + * Arguments: + * hash An area of memory that will become the malloc hash + * elems The number of elements in this array + * + * Returns: + * Nothing + * + * Discussion: + * uma_startup2 is called by kmeminit() to prepare the malloc + * hash bucket, and enable use of uma for malloc ops. + */ + +void uma_startup2(void *hash, u_long elems); + +/* + * Reclaims unused memory for all zones + * + * Arguments: + * None + * Returns: + * None + * + * This should only be called by the page out daemon. + */ + +void uma_reclaim(void); + +/* + * Switches the backing object of a zone + * + * Arguments: + * zone The zone to update + * obj The obj to use for future allocations + * size The size of the object to allocate + * + * Returns: + * 0 if kva space can not be allocated + * 1 if successful + * + * Discussion: + * A NULL object can be used and uma will allocate one for you. Setting + * the size will limit the amount of memory allocated to this zone. + * + */ +struct vm_object; +int uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int size); + + +/* + * Replaces the standard page_alloc or obj_alloc functions for this zone + * + * Arguments: + * zone The zone whos back end allocator is being changed. + * allocf A pointer to the allocation function + * + * Returns: + * Nothing + * + * Discussion: + * This could be used to implement pageable allocation, or perhaps + * even DMA allocators if used in conjunction with the OFFPAGE + * zone flag. + */ + +void uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf); + +/* + * Used for freeing memory provided by the allocf above + * + * Arguments: + * zone The zone that intends to use this free routine. + * freef The page freeing routine. + * + * Returns: + * Nothing + */ + +void uma_zone_set_freef(uma_zone_t zone, uma_free freef); + +/* + * These flags are setable in the allocf and visable in the freef. + */ +#define UMA_SLAB_BOOT 0x01 /* Slab alloced from boot pages */ +#define UMA_SLAB_KMEM 0x02 /* Slab alloced from kmem_map */ +#define UMA_SLAB_KMAP 0x04 /* Slab alloced from kernel_map */ +#define UMA_SLAB_PRIV 0x08 /* Slab alloced from priv allocator */ +#define UMA_SLAB_OFFP 0x10 /* Slab is managed seperately */ +#define UMA_SLAB_MALLOC 0x20 /* Slab is a large malloc slab */ +/* 0x40 and 0x80 are available */ + +/* + * Used to pre-fill a zone with some number of items + * + * Arguments: + * zone The zone to fill + * itemcnt The number of items to reserve + * + * Returns: + * Nothing + * + * NOTE: This is blocking and should only be done at startup + */ +void uma_prealloc(uma_zone_t zone, int itemcnt); + + +#endif diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c new file mode 100644 index 0000000..5b4be4b --- /dev/null +++ b/sys/vm/uma_core.c @@ -0,0 +1,1900 @@ +/* + * Copyright (c) 2002, Jeffrey Roberson <jroberson@chesapeake.net> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +/* + * uma_core.c Implementation of the Universal Memory allocator + * + * This allocator is intended to replace the multitude of similar object caches + * in the standard FreeBSD kernel. The intent is to be flexible as well as + * effecient. A primary design goal is to return unused memory to the rest of + * the system. This will make the system as a whole more flexible due to the + * ability to move memory to subsystems which most need it instead of leaving + * pools of reserved memory unused. + * + * The basic ideas stem from similar slab/zone based allocators whose algorithms + * are well known. + * + */ + +/* + * TODO: + * - Improve memory usage for large allocations + * - Improve INVARIANTS (0xdeadc0de write out) + * - Investigate cache size adjustments + */ + +/* I should really use ktr.. */ +/* +#define UMA_DEBUG 1 +#define UMA_DEBUG_ALLOC 1 +#define UMA_DEBUG_ALLOC_1 1 +*/ + + +#include "opt_param.h" +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/types.h> +#include <sys/queue.h> +#include <sys/malloc.h> +#include <sys/lock.h> +#include <sys/sysctl.h> +#include <machine/types.h> +#include <sys/mutex.h> +#include <sys/smp.h> + +#include <vm/vm.h> +#include <vm/vm_object.h> +#include <vm/vm_page.h> +#include <vm/vm_param.h> +#include <vm/vm_map.h> +#include <vm/vm_kern.h> +#include <vm/vm_extern.h> +#include <vm/uma.h> +#include <vm/uma_int.h> + +/* + * This is the zone from which all zones are spawned. The idea is that even + * the zone heads are allocated from the allocator, so we use the bss section + * to bootstrap us. + */ +static struct uma_zone master_zone; +static uma_zone_t zones = &master_zone; + +/* This is the zone from which all of uma_slab_t's are allocated. */ +static uma_zone_t slabzone; + +/* + * The initial hash tables come out of this zone so they can be allocated + * prior to malloc coming up. + */ +static uma_zone_t hashzone; + +/* + * Zone that buckets come from. + */ +static uma_zone_t bucketzone; + +/* Linked list of all zones in the system */ +static LIST_HEAD(,uma_zone) uma_zones = LIST_HEAD_INITIALIZER(&uma_zones); + +/* This mutex protects the zone list */ +static struct mtx uma_mtx; + +/* Linked list of boot time pages */ +static LIST_HEAD(,uma_slab) uma_boot_pages = + LIST_HEAD_INITIALIZER(&uma_boot_pages); + +/* Count of free boottime pages */ +static int uma_boot_free = 0; + +/* Is the VM done starting up? */ +static int booted = 0; + +/* This is the handle used to schedule our working set calculator */ +static struct callout uma_callout; + +/* This is mp_maxid + 1, for use while looping over each cpu */ +static int maxcpu; + +/* + * This structure is passed as the zone ctor arg so that I don't have to create + * a special allocation function just for zones. + */ +struct uma_zctor_args { + char *name; + int size; + uma_ctor ctor; + uma_dtor dtor; + uma_init uminit; + uma_fini fini; + int align; + u_int16_t flags; +}; + +/* + * This is the malloc hash table which is used to find the zone that a + * malloc allocation came from. It is not currently resizeable. The + * memory for the actual hash bucket is allocated in kmeminit. + */ +struct uma_hash mhash; +struct uma_hash *mallochash = &mhash; + +/* Prototypes.. */ + +static void *obj_alloc(uma_zone_t, int, u_int8_t *, int); +static void *page_alloc(uma_zone_t, int, u_int8_t *, int); +static void page_free(void *, int, u_int8_t); +static uma_slab_t slab_zalloc(uma_zone_t, int); +static void cache_drain(uma_zone_t); +static void bucket_drain(uma_zone_t, uma_bucket_t); +static void zone_drain(uma_zone_t); +static void zone_ctor(void *, int, void *); +static void zero_init(void *, int); +static void zone_small_init(uma_zone_t zone); +static void zone_large_init(uma_zone_t zone); +static void zone_foreach(void (*zfunc)(uma_zone_t)); +static void zone_timeout(uma_zone_t zone); +static void hash_expand(struct uma_hash *); +static void uma_timeout(void *); +static void uma_startup3(void); +static void *uma_zalloc_internal(uma_zone_t, void *, int, int *, int); +static void uma_zfree_internal(uma_zone_t, + void *, void *, int); +void uma_print_zone(uma_zone_t); +void uma_print_stats(void); +static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS); + +SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD, + NULL, 0, sysctl_vm_zone, "A", "Zone Info"); +SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL); + + +/* + * Routine called by timeout which is used to fire off some time interval + * based calculations. (working set, stats, etc.) + * + * Arguments: + * arg Unused + * + * Returns: + * Nothing + */ +static void +uma_timeout(void *unused) +{ + zone_foreach(zone_timeout); + + /* Reschedule this event */ + callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL); +} + +/* + * Routine to perform timeout driven calculations. This does the working set + * as well as hash expanding, and per cpu statistics aggregation. + * + * Arguments: + * zone The zone to operate on + * + * Returns: + * Nothing + */ +static void +zone_timeout(uma_zone_t zone) +{ + uma_cache_t cache; + u_int64_t alloc; + int free; + int cpu; + + alloc = 0; + free = 0; + + /* + * Aggregate per cpu cache statistics back to the zone. + * + * I may rewrite this to set a flag in the per cpu cache instead of + * locking. If the flag is not cleared on the next round I will have + * to lock and do it here instead so that the statistics don't get too + * far out of sync. + */ + if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) { + for (cpu = 0; cpu < maxcpu; cpu++) { + if (CPU_ABSENT(cpu)) + continue; + CPU_LOCK(zone, cpu); + cache = &zone->uz_cpu[cpu]; + /* Add them up, and reset */ + alloc += cache->uc_allocs; + cache->uc_allocs = 0; + if (cache->uc_allocbucket) + free += cache->uc_allocbucket->ub_ptr + 1; + if (cache->uc_freebucket) + free += cache->uc_freebucket->ub_ptr + 1; + CPU_UNLOCK(zone, cpu); + } + } + + /* Now push these stats back into the zone.. */ + ZONE_LOCK(zone); + zone->uz_allocs += alloc; + + /* + * cachefree is an instantanious snapshot of what is in the per cpu + * caches, not an accurate counter + */ + zone->uz_cachefree = free; + + /* + * Expand the zone hash table. + * + * This is done if the number of slabs is larger than the hash size. + * What I'm trying to do here is completely reduce collisions. This + * may be a little aggressive. Should I allow for two collisions max? + */ + + if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) && + !(zone->uz_flags & UMA_ZFLAG_MALLOC)) { + if (zone->uz_pages / zone->uz_ppera + >= zone->uz_hash.uh_hashsize) + hash_expand(&zone->uz_hash); + } + + /* + * Here we compute the working set size as the total number of items + * left outstanding since the last time interval. This is slightly + * suboptimal. What we really want is the highest number of outstanding + * items during the last time quantum. This should be close enough. + * + * The working set size is used to throttle the zone_drain function. + * We don't want to return memory that we may need again immediately. + */ + alloc = zone->uz_allocs - zone->uz_oallocs; + zone->uz_oallocs = zone->uz_allocs; + zone->uz_wssize = alloc; + + ZONE_UNLOCK(zone); +} + +/* + * Expands the hash table for OFFPAGE zones. This is done from zone_timeout + * to reduce collisions. This must not be done in the regular allocation path, + * otherwise, we can recurse on the vm while allocating pages. + * + * Arguments: + * hash The hash you want to expand by a factor of two. + * + * Returns: + * Nothing + * + * Discussion: + */ +static void +hash_expand(struct uma_hash *hash) +{ + struct slabhead *newhash; + struct slabhead *oldhash; + uma_slab_t slab; + int hzonefree; + int hashsize; + int alloc; + int hval; + int i; + + + /* + * Remember the old hash size and see if it has to go back to the + * hash zone, or malloc. The hash zone is used for the initial hash + */ + + hashsize = hash->uh_hashsize; + oldhash = hash->uh_slab_hash; + + if (hashsize == UMA_HASH_SIZE_INIT) + hzonefree = 1; + else + hzonefree = 0; + + + /* We're just going to go to a power of two greater */ + if (hash->uh_hashsize) { + alloc = sizeof(hash->uh_slab_hash[0]) * (hash->uh_hashsize * 2); + /* XXX Shouldn't be abusing DEVBUF here */ + newhash = (struct slabhead *)malloc(alloc, M_DEVBUF, M_NOWAIT); + if (newhash == NULL) { + return; + } + hash->uh_hashsize *= 2; + } else { + alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT; + newhash = uma_zalloc_internal(hashzone, NULL, M_WAITOK, NULL, -1); + hash->uh_hashsize = UMA_HASH_SIZE_INIT; + } + + bzero(newhash, alloc); + + hash->uh_hashmask = hash->uh_hashsize - 1; + + /* + * I need to investigate hash algorithms for resizing without a + * full rehash. + */ + + for (i = 0; i < hashsize; i++) + while (!SLIST_EMPTY(&hash->uh_slab_hash[i])) { + slab = SLIST_FIRST(&hash->uh_slab_hash[i]); + SLIST_REMOVE_HEAD(&hash->uh_slab_hash[i], us_hlink); + hval = UMA_HASH(hash, slab->us_data); + SLIST_INSERT_HEAD(&newhash[hval], slab, us_hlink); + } + + if (hash->uh_slab_hash) { + if (hzonefree) + uma_zfree_internal(hashzone, + hash->uh_slab_hash, NULL, 0); + else + free(hash->uh_slab_hash, M_DEVBUF); + } + hash->uh_slab_hash = newhash; + + return; +} + +/* + * Frees all outstanding items in a bucket + * + * Arguments: + * zone The zone to free to, must be unlocked. + * bucket The free/alloc bucket with items, cpu queue must be locked. + * + * Returns: + * Nothing + */ + +static void +bucket_drain(uma_zone_t zone, uma_bucket_t bucket) +{ + uma_slab_t slab; + int mzone; + void *item; + + if (bucket == NULL) + return; + + slab = NULL; + mzone = 0; + + /* We have to lookup the slab again for malloc.. */ + if (zone->uz_flags & UMA_ZFLAG_MALLOC) + mzone = 1; + + while (bucket->ub_ptr > -1) { + item = bucket->ub_bucket[bucket->ub_ptr]; +#ifdef INVARIANTS + bucket->ub_bucket[bucket->ub_ptr] = NULL; + KASSERT(item != NULL, + ("bucket_drain: botched ptr, item is NULL")); +#endif + bucket->ub_ptr--; + /* + * This is extremely inefficient. The slab pointer was passed + * to uma_zfree_arg, but we lost it because the buckets don't + * hold them. This will go away when free() gets a size passed + * to it. + */ + if (mzone) + slab = hash_sfind(mallochash, + (u_int8_t *)((unsigned long)item & + (~UMA_SLAB_MASK))); + uma_zfree_internal(zone, item, slab, 1); + } +} + +/* + * Drains the per cpu caches for a zone. + * + * Arguments: + * zone The zone to drain, must be unlocked. + * + * Returns: + * Nothing + * + * This function returns with the zone locked so that the per cpu queues can + * not be filled until zone_drain is finished. + * + */ +static void +cache_drain(uma_zone_t zone) +{ + uma_bucket_t bucket; + uma_cache_t cache; + int cpu; + + /* + * Flush out the per cpu queues. + * + * XXX This causes unneccisary thrashing due to immediately having + * empty per cpu queues. I need to improve this. + */ + + /* + * We have to lock each cpu cache before locking the zone + */ + ZONE_UNLOCK(zone); + + for (cpu = 0; cpu < maxcpu; cpu++) { + if (CPU_ABSENT(cpu)) + continue; + CPU_LOCK(zone, cpu); + cache = &zone->uz_cpu[cpu]; + bucket_drain(zone, cache->uc_allocbucket); + bucket_drain(zone, cache->uc_freebucket); + } + + /* + * Drain the bucket queues and free the buckets, we just keep two per + * cpu (alloc/free). + */ + ZONE_LOCK(zone); + while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { + LIST_REMOVE(bucket, ub_link); + ZONE_UNLOCK(zone); + bucket_drain(zone, bucket); + uma_zfree_internal(bucketzone, bucket, NULL, 0); + ZONE_LOCK(zone); + } + + /* Now we do the free queue.. */ + while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { + LIST_REMOVE(bucket, ub_link); + uma_zfree_internal(bucketzone, bucket, NULL, 0); + } + + /* We unlock here, but they will all block until the zone is unlocked */ + for (cpu = 0; cpu < maxcpu; cpu++) { + if (CPU_ABSENT(cpu)) + continue; + CPU_UNLOCK(zone, cpu); + } +} + +/* + * Frees pages from a zone back to the system. This is done on demand from + * the pageout daemon. + * + * Arguments: + * zone The zone to free pages from + * + * Returns: + * Nothing. + */ +static void +zone_drain(uma_zone_t zone) +{ + uma_slab_t slab; + uma_slab_t n; + u_int64_t extra; + u_int8_t flags; + u_int8_t *mem; + int i; + + /* + * We don't want to take pages from staticly allocated zones at this + * time + */ + if (zone->uz_flags & UMA_ZFLAG_NOFREE || zone->uz_freef == NULL) + return; + + ZONE_LOCK(zone); + + if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) + cache_drain(zone); + + if (zone->uz_free < zone->uz_wssize) + goto finished; +#ifdef UMA_DEBUG + printf("%s working set size: %llu free items: %u\n", + zone->uz_name, (unsigned long long)zone->uz_wssize, zone->uz_free); +#endif + extra = zone->uz_wssize - zone->uz_free; + extra /= zone->uz_ipers; + + /* extra is now the number of extra slabs that we can free */ + + if (extra == 0) + goto finished; + + slab = LIST_FIRST(&zone->uz_free_slab); + while (slab && extra) { + n = LIST_NEXT(slab, us_link); + + /* We have no where to free these to */ + if (slab->us_flags & UMA_SLAB_BOOT) { + slab = n; + continue; + } + + LIST_REMOVE(slab, us_link); + zone->uz_pages -= zone->uz_ppera; + zone->uz_free -= zone->uz_ipers; + if (zone->uz_fini) + for (i = 0; i < zone->uz_ipers; i++) + zone->uz_fini( + slab->us_data + (zone->uz_rsize * i), + zone->uz_size); + flags = slab->us_flags; + mem = slab->us_data; + if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) { + if (zone->uz_flags & UMA_ZFLAG_MALLOC) { + UMA_HASH_REMOVE(mallochash, + slab, slab->us_data); + } else { + UMA_HASH_REMOVE(&zone->uz_hash, + slab, slab->us_data); + } + uma_zfree_internal(slabzone, slab, NULL, 0); + } else if (zone->uz_flags & UMA_ZFLAG_MALLOC) + UMA_HASH_REMOVE(mallochash, slab, slab->us_data); +#ifdef UMA_DEBUG + printf("%s: Returning %d bytes.\n", + zone->uz_name, UMA_SLAB_SIZE * zone->uz_ppera); +#endif + zone->uz_freef(mem, UMA_SLAB_SIZE * zone->uz_ppera, flags); + + slab = n; + extra--; + } + +finished: + ZONE_UNLOCK(zone); +} + +/* + * Allocate a new slab for a zone. This does not insert the slab onto a list. + * + * Arguments: + * zone The zone to allocate slabs for + * wait Shall we wait? + * + * Returns: + * The slab that was allocated or NULL if there is no memory and the + * caller specified M_NOWAIT. + * + */ +static uma_slab_t +slab_zalloc(uma_zone_t zone, int wait) +{ + uma_slab_t slab; /* Starting slab */ + u_int8_t *mem; + u_int8_t flags; + int i; + +#ifdef UMA_DEBUG + printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name); +#endif + + if (booted || (zone->uz_flags & UMA_ZFLAG_PRIVALLOC)) { + ZONE_UNLOCK(zone); + mtx_lock(&Giant); + slab = (uma_slab_t )zone->uz_allocf(zone, + zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait); + mtx_unlock(&Giant); + ZONE_LOCK(zone); + if (slab != NULL) + slab->us_data = (u_int8_t *)slab; + else + return (NULL); + } else { + + if (zone->uz_ppera > 1) + panic("UMA: Attemping to allocate multiple pages before vm has started.\n"); + if (zone->uz_flags & UMA_ZFLAG_MALLOC) + panic("Mallocing before uma_startup2 has been called.\n"); + if (uma_boot_free == 0) + panic("UMA: Ran out of pre init pages, increase UMA_BOOT_PAGES\n"); + slab = LIST_FIRST(&uma_boot_pages); + LIST_REMOVE(slab, us_link); + uma_boot_free--; + } + + mem = slab->us_data; + + /* Alloc slab structure for offpage, otherwise adjust it's position */ + if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) { + slab = (uma_slab_t )(mem + zone->uz_pgoff); + } else { + slab = uma_zalloc_internal(slabzone, NULL, wait, NULL, -1); + if (slab == NULL) /* XXX This should go away */ + panic("UMA: No free slab structures"); + if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) + UMA_HASH_INSERT(&zone->uz_hash, slab, mem); + } + if (zone->uz_flags & UMA_ZFLAG_MALLOC) { +#ifdef UMA_DEBUG + printf("Inserting %p into malloc hash from slab %p\n", + mem, slab); +#endif + UMA_HASH_INSERT(mallochash, slab, mem); + } + + slab->us_zone = zone; + slab->us_data = mem; + + /* + * This is intended to spread data out across cache lines. + * + * This code doesn't seem to work properly on x86, and on alpha + * it makes absolutely no performance difference. I'm sure it could + * use some tuning, but sun makes outrageous claims about it's + * performance. + */ +#if 0 + if (zone->uz_cachemax) { + slab->us_data += zone->uz_cacheoff; + zone->uz_cacheoff += UMA_CACHE_INC; + if (zone->uz_cacheoff > zone->uz_cachemax) + zone->uz_cacheoff = 0; + } +#endif + + slab->us_freecount = zone->uz_ipers; + slab->us_firstfree = 0; + slab->us_flags = flags; + for (i = 0; i < zone->uz_ipers; i++) + slab->us_freelist[i] = i+1; + + if (zone->uz_init) + for (i = 0; i < zone->uz_ipers; i++) + zone->uz_init(slab->us_data + (zone->uz_rsize * i), + zone->uz_size); + + zone->uz_pages += zone->uz_ppera; + zone->uz_free += zone->uz_ipers; + + return (slab); +} + +/* + * Allocates a number of pages from the system + * + * Arguments: + * zone Unused + * bytes The number of bytes requested + * wait Shall we wait? + * + * Returns: + * A pointer to the alloced memory or possibly + * NULL if M_NOWAIT is set. + */ +static void * +page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait) +{ + void *p; /* Returned page */ + + /* + * XXX The original zone allocator did this, but I don't think it's + * neccisary in current. + */ + + if (lockstatus(&kernel_map->lock, NULL)) { + *pflag = UMA_SLAB_KMEM; + p = (void *) kmem_malloc(kmem_map, bytes, wait); + } else { + *pflag = UMA_SLAB_KMAP; + p = (void *) kmem_alloc(kernel_map, bytes); + } + + return (p); +} + +/* + * Allocates a number of pages from within an object + * + * Arguments: + * zone Unused + * bytes The number of bytes requested + * wait Shall we wait? + * + * Returns: + * A pointer to the alloced memory or possibly + * NULL if M_NOWAIT is set. + */ +static void * +obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +{ + vm_offset_t zkva; + vm_offset_t retkva; + vm_page_t p; + int pages; + + + if (zone->uz_pages + zone->uz_ppera > zone->uz_maxpages) + return (NULL); + + retkva = NULL; + pages = zone->uz_pages; + + /* + * This looks a little weird since we're getting one page at a time + */ + while (bytes > 0) { + p = vm_page_alloc(zone->uz_obj, pages, + VM_ALLOC_INTERRUPT); + if (p == NULL) + return (NULL); + + zkva = zone->uz_kva + pages * PAGE_SIZE; + if (retkva == NULL) + retkva = zkva; + pmap_qenter(zkva, &p, 1); + bytes -= PAGE_SIZE; + pages += 1; + } + + *flags = UMA_SLAB_PRIV; + + return ((void *)retkva); +} + +/* + * Frees a number of pages to the system + * + * Arguments: + * mem A pointer to the memory to be freed + * size The size of the memory being freed + * flags The original p->us_flags field + * + * Returns: + * Nothing + * + */ +static void +page_free(void *mem, int size, u_int8_t flags) +{ + vm_map_t map; + if (flags & UMA_SLAB_KMEM) + map = kmem_map; + else if (flags & UMA_SLAB_KMAP) + map = kernel_map; + else + panic("UMA: page_free used with invalid flags %d\n", flags); + + kmem_free(map, (vm_offset_t)mem, size); +} + +/* + * Zero fill initializer + * + * Arguments/Returns follow uma_init specifications + * + */ +static void +zero_init(void *mem, int size) +{ + bzero(mem, size); +} + +/* + * Finish creating a small uma zone. This calculates ipers, and the zone size. + * + * Arguments + * zone The zone we should initialize + * + * Returns + * Nothing + */ +static void +zone_small_init(uma_zone_t zone) +{ + int rsize; + int memused; + int ipers; + + rsize = zone->uz_size; + + if (rsize < UMA_SMALLEST_UNIT) + rsize = UMA_SMALLEST_UNIT; + + if (rsize & zone->uz_align) + rsize = (rsize & ~zone->uz_align) + (zone->uz_align + 1); + + zone->uz_rsize = rsize; + + rsize += 1; /* Account for the byte of linkage */ + zone->uz_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize; + zone->uz_ppera = 1; + + memused = zone->uz_ipers * zone->uz_rsize; + + /* Can we do any better? */ + if ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE) { + if (zone->uz_flags & UMA_ZFLAG_INTERNAL) + return; + ipers = UMA_SLAB_SIZE / zone->uz_rsize; + if (ipers > zone->uz_ipers) { + zone->uz_flags |= UMA_ZFLAG_OFFPAGE; + zone->uz_ipers = ipers; + } + } + +} + +/* + * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do + * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be + * more complicated. + * + * Arguments + * zone The zone we should initialize + * + * Returns + * Nothing + */ +static void +zone_large_init(uma_zone_t zone) +{ + int pages; + + pages = zone->uz_size / UMA_SLAB_SIZE; + + /* Account for remainder */ + if ((pages * UMA_SLAB_SIZE) < zone->uz_size) + pages++; + + zone->uz_ppera = pages; + zone->uz_ipers = 1; + + zone->uz_flags |= UMA_ZFLAG_OFFPAGE; + zone->uz_rsize = zone->uz_size; +} + +/* + * Zone header ctor. This initializes all fields, locks, etc. And inserts + * the zone onto the global zone list. + * + * Arguments/Returns follow uma_ctor specifications + * udata Actually uma_zcreat_args + * + */ + +static void +zone_ctor(void *mem, int size, void *udata) +{ + struct uma_zctor_args *arg = udata; + uma_zone_t zone = mem; + int cplen; + int cpu; + + bzero(zone, size); + zone->uz_name = arg->name; + zone->uz_size = arg->size; + zone->uz_ctor = arg->ctor; + zone->uz_dtor = arg->dtor; + zone->uz_init = arg->uminit; + zone->uz_align = arg->align; + zone->uz_free = 0; + zone->uz_pages = 0; + zone->uz_flags = 0; + zone->uz_allocf = page_alloc; + zone->uz_freef = page_free; + + if (arg->flags & UMA_ZONE_ZINIT) + zone->uz_init = zero_init; + + if (arg->flags & UMA_ZONE_INTERNAL) + zone->uz_flags |= UMA_ZFLAG_INTERNAL; + + if (arg->flags & UMA_ZONE_MALLOC) + zone->uz_flags |= UMA_ZFLAG_MALLOC; + + if (arg->flags & UMA_ZONE_NOFREE) + zone->uz_flags |= UMA_ZFLAG_NOFREE; + + if (zone->uz_size > UMA_SLAB_SIZE) + zone_large_init(zone); + else + zone_small_init(zone); + + /* We do this so that the per cpu lock name is unique for each zone */ + memcpy(zone->uz_lname, "PCPU ", 5); + cplen = min(strlen(zone->uz_name) + 1, LOCKNAME_LEN - 6); + memcpy(zone->uz_lname+5, zone->uz_name, cplen); + zone->uz_lname[LOCKNAME_LEN - 1] = '\0'; + + /* + * If we're putting the slab header in the actual page we need to + * figure out where in each page it goes. This calculates a right + * justified offset into the memory on a ALIGN_PTR boundary. + */ + if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) { + int totsize; + int waste; + + /* Size of the slab struct and free list */ + totsize = sizeof(struct uma_slab) + zone->uz_ipers; + if (totsize & UMA_ALIGN_PTR) + totsize = (totsize & ~UMA_ALIGN_PTR) + + (UMA_ALIGN_PTR + 1); + zone->uz_pgoff = UMA_SLAB_SIZE - totsize; + + waste = zone->uz_pgoff; + waste -= (zone->uz_ipers * zone->uz_rsize); + + /* + * This calculates how much space we have for cache line size + * optimizations. It works by offseting each slab slightly. + * Currently it breaks on x86, and so it is disabled. + */ + + if (zone->uz_align < UMA_CACHE_INC && waste > UMA_CACHE_INC) { + zone->uz_cachemax = waste - UMA_CACHE_INC; + zone->uz_cacheoff = 0; + } + + totsize = zone->uz_pgoff + sizeof(struct uma_slab) + + zone->uz_ipers; + /* I don't think it's possible, but I'll make sure anyway */ + if (totsize > UMA_SLAB_SIZE) { + printf("zone %s ipers %d rsize %d size %d\n", + zone->uz_name, zone->uz_ipers, zone->uz_rsize, + zone->uz_size); + panic("UMA slab won't fit.\n"); + } + } else { + /* hash_expand here to allocate the initial hash table */ + hash_expand(&zone->uz_hash); + zone->uz_pgoff = 0; + } + +#ifdef UMA_DEBUG + printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n", + zone->uz_name, zone, + zone->uz_size, zone->uz_ipers, + zone->uz_ppera, zone->uz_pgoff); +#endif + ZONE_LOCK_INIT(zone); + + mtx_lock(&uma_mtx); + LIST_INSERT_HEAD(&uma_zones, zone, uz_link); + mtx_unlock(&uma_mtx); + + /* + * Some internal zones don't have room allocated for the per cpu + * caches. If we're internal, bail out here. + */ + + if (zone->uz_flags & UMA_ZFLAG_INTERNAL) + return; + + for (cpu = 0; cpu < maxcpu; cpu++) { + if (zone->uz_ipers < UMA_BUCKET_SIZE) + zone->uz_cpu[cpu].uc_count = zone->uz_ipers - 1; + else + zone->uz_cpu[cpu].uc_count = UMA_BUCKET_SIZE - 1; + CPU_LOCK_INIT(zone, cpu); + } +} + +/* + * Traverses every zone in the system and calls a callback + * + * Arguments: + * zfunc A pointer to a function which accepts a zone + * as an argument. + * + * Returns: + * Nothing + */ +static void +zone_foreach(void (*zfunc)(uma_zone_t)) +{ + uma_zone_t zone; + + mtx_lock(&uma_mtx); + LIST_FOREACH(zone, &uma_zones, uz_link) { + zfunc(zone); + } + mtx_unlock(&uma_mtx); +} + +/* Public functions */ +/* See uma.h */ +void +uma_startup(void *bootmem) +{ + struct uma_zctor_args args; + uma_slab_t slab; + int slabsize; + int i; + +#ifdef UMA_DEBUG + printf("Creating uma zone headers zone.\n"); +#endif +#ifdef SMP + maxcpu = mp_maxid + 1; +#else + maxcpu = 1; +#endif +#ifdef UMA_DEBUG + printf("Max cpu = %d, mp_maxid = %d\n", maxcpu, mp_maxid); + Debugger("stop"); +#endif + mtx_init(&uma_mtx, "UMA lock", MTX_DEF); + /* "manually" Create the initial zone */ + args.name = "UMA Zones"; + args.size = sizeof(struct uma_zone) + + (sizeof(struct uma_cache) * (maxcpu - 1)); + args.ctor = zone_ctor; + args.dtor = NULL; + args.uminit = zero_init; + args.fini = NULL; + args.align = 32 - 1; + args.flags = UMA_ZONE_INTERNAL; + /* The initial zone has no Per cpu queues so it's smaller */ + zone_ctor(zones, sizeof(struct uma_zone), &args); + +#ifdef UMA_DEBUG + printf("Filling boot free list.\n"); +#endif + for (i = 0; i < UMA_BOOT_PAGES; i++) { + slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE)); + slab->us_data = (u_int8_t *)slab; + slab->us_flags = UMA_SLAB_BOOT; + LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link); + uma_boot_free++; + } + +#ifdef UMA_DEBUG + printf("Creating slab zone.\n"); +#endif + + /* + * This is the max number of free list items we'll have with + * offpage slabs. + */ + + slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab); + slabsize /= UMA_MAX_WASTE; + slabsize++; /* In case there it's rounded */ + slabsize += sizeof(struct uma_slab); + + /* Now make a zone for slab headers */ + slabzone = uma_zcreate("UMA Slabs", + slabsize, + NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); + + hashzone = uma_zcreate("UMA Hash", + sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT, + NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); + + bucketzone = uma_zcreate("UMA Buckets", sizeof(struct uma_bucket), + NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); + + +#ifdef UMA_DEBUG + printf("UMA startup complete.\n"); +#endif +} + +/* see uma.h */ +void +uma_startup2(void *hashmem, u_long elems) +{ + bzero(hashmem, elems * sizeof(void *)); + mallochash->uh_slab_hash = hashmem; + mallochash->uh_hashsize = elems; + mallochash->uh_hashmask = elems - 1; + booted = 1; +#ifdef UMA_DEBUG + printf("UMA startup2 complete.\n"); +#endif +} + +/* + * Initialize our callout handle + * + */ + +static void +uma_startup3(void) +{ +#ifdef UMA_DEBUG + printf("Starting callout.\n"); +#endif + /* We'll be mpsafe once the vm is locked. */ + callout_init(&uma_callout, 0); + callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL); +#ifdef UMA_DEBUG + printf("UMA startup3 complete.\n"); +#endif +} + +/* See uma.h */ +uma_zone_t +uma_zcreate(char *name, int size, uma_ctor ctor, uma_dtor dtor, uma_init uminit, + uma_fini fini, int align, u_int16_t flags) + +{ + struct uma_zctor_args args; + + /* This stuff is essential for the zone ctor */ + args.name = name; + args.size = size; + args.ctor = ctor; + args.dtor = dtor; + args.uminit = uminit; + args.fini = fini; + args.align = align; + args.flags = flags; + + return (uma_zalloc_internal(zones, &args, M_WAITOK, NULL, -1)); +} + +/* See uma.h */ +void * +uma_zalloc_arg(uma_zone_t zone, void *udata, int wait) +{ + void *item; + uma_cache_t cache; + uma_bucket_t bucket; + int isitem; + int cpu; + + /* This is the fast path allocation */ +#ifdef UMA_DEBUG_ALLOC_1 + printf("Allocating one item from %s(%p)\n", zone->uz_name, zone); +#endif + cpu = PCPU_GET(cpuid); + CPU_LOCK(zone, cpu); + cache = &zone->uz_cpu[cpu]; + cache->uc_allocs++; + +zalloc_start: + bucket = cache->uc_allocbucket; + + if (bucket) { + if (bucket->ub_ptr > -1) { + item = bucket->ub_bucket[bucket->ub_ptr]; +#ifdef INVARIANTS + bucket->ub_bucket[bucket->ub_ptr] = NULL; +#endif + bucket->ub_ptr--; + KASSERT(item != NULL, + ("uma_zalloc: Bucket pointer mangled.")); + cache->uc_allocs++; + CPU_UNLOCK(zone, cpu); + if (zone->uz_ctor) + zone->uz_ctor(item, zone->uz_size, udata); + return (item); + } else if (cache->uc_freebucket) { + /* + * We have run out of items in our allocbucket. + * See if we can switch with our free bucket. + */ + if (cache->uc_freebucket->ub_ptr > -1) { + uma_bucket_t swap; + +#ifdef UMA_DEBUG_ALLOC + printf("uma_zalloc: Swapping empty with alloc.\n"); +#endif + swap = cache->uc_freebucket; + cache->uc_freebucket = cache->uc_allocbucket; + cache->uc_allocbucket = swap; + + goto zalloc_start; + } + } + } + /* + * We can get here for three reasons: + * + * 1) The buckets are NULL + * 2) The zone is INTERNAL, and so it has no buckets. + * 3) The alloc and free buckets are both empty. + * + * Just handoff to uma_zalloc_internal to do the hard stuff + * + */ +#ifdef UMA_DEBUG_ALLOC + printf("uma_zalloc: Falling back to zalloc_internal.\n"); +#endif + + item = uma_zalloc_internal(zone, udata, wait, &isitem, cpu); + +#ifdef UMA_DEBUG + printf("uma_zalloc: zalloc_internal completed.\n"); +#endif + + if (item && isitem == 0) + goto zalloc_start; + + /* + * If isitem is set then we should just return it. The cpu lock + * was unlocked when we couldn't get a bucket. + */ + +#ifdef INVARIANTS + if (wait == M_WAITOK) + KASSERT(item != NULL, + ("uma_zalloc: WAITOK set but we're returning NULL")); +#endif + return item; +} + +/* + * Allocates an item for an internal zone OR fills a bucket + * + * Arguments + * zone The zone to alloc for. + * udata The data to be passed to the constructor. + * wait M_WAITOK or M_NOWAIT. + * isitem The returned value is an item if this is true. + * cpu The cpu # of the cache that we should use, or -1. + * + * Returns + * NULL if there is no memory and M_NOWAIT is set + * An item if called on an interal zone + * Non NULL if called to fill a bucket and it was successful. + * + * Discussion: + * This was much cleaner before it had to do per cpu caches. It is + * complicated now because it has to handle the simple internal case, and + * the more involved bucket filling and allocation. The isitem is there + * to remove a failure case. You shouldn't fail on allocating from a zone + * because there were no buckets. This allows the exported zalloc to just + * return the item. + * + */ + +static void * +uma_zalloc_internal(uma_zone_t zone, void *udata, int wait, int *isitem, int cpu) +{ + uma_bucket_t bucket; + uma_cache_t cache; + uma_slab_t slab; + u_int8_t freei; + void *item; + + bucket = NULL; + cache = NULL; + item = NULL; + + /* + * This is to stop us from allocating per cpu buckets while we're running + * out of UMA_BOOT_PAGES. Otherwise, we would exhaust the boot pages. + */ + + if (!booted && zone == bucketzone) + return (NULL); + +#ifdef UMA_DEBUG_ALLOC + printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone); +#endif + if (isitem != NULL) + *isitem = 0; + + ZONE_LOCK(zone); + + /* We got here because we need to fill some buckets */ + if (cpu != -1) { + cache = &zone->uz_cpu[cpu]; + + zone->uz_allocs += cache->uc_allocs; + /* Check the free list */ + bucket = LIST_FIRST(&zone->uz_full_bucket); + if (bucket) { + LIST_REMOVE(bucket, ub_link); + /* Our old one is now a free bucket */ + if (cache->uc_allocbucket) { + KASSERT(cache->uc_allocbucket->ub_ptr == -1, + ("uma_zalloc_internal: Freeing a non free bucket.")); + LIST_INSERT_HEAD(&zone->uz_free_bucket, + cache->uc_allocbucket, ub_link); + } + KASSERT(bucket->ub_ptr != -1, + ("uma_zalloc_internal: Returning an empty bucket.")); + /*zone->uz_free -= bucket->ub_ptr + 1;*/ + cache->uc_allocbucket = bucket; + ZONE_UNLOCK(zone); + return (bucket); + } + /* Bump up our uc_count so we get here less */ + if (cache->uc_count < UMA_BUCKET_SIZE - 1) + cache->uc_count++; + /* Nothing on the free list, try to re-use the old one */ + bucket = cache->uc_allocbucket; + if (bucket == NULL) { + /* Nope, we need a new one */ + CPU_UNLOCK(zone, cpu); + ZONE_UNLOCK(zone); + bucket = uma_zalloc_internal(bucketzone, + NULL, wait, NULL, -1); + CPU_LOCK(zone, cpu); + ZONE_LOCK(zone); + /* Did we lose the race? */ + if (cache->uc_allocbucket) { +#ifdef UMA_DEBUG + printf("uma_zalloc_internal: Lost race with another CPU.\n"); +#endif + if (bucket) + uma_zfree_internal(bucketzone, + bucket, NULL, 0); + ZONE_UNLOCK(zone); + return (cache->uc_allocbucket); + } + cache->uc_allocbucket = bucket; + + if (bucket) { +#ifdef INVARIANTS + bzero(bucket, bucketzone->uz_size); +#endif + bucket->ub_ptr = -1; + } else { + /* + * We may not get a bucket if we recurse, so + * return an actual item. The rest of this code + * does the right thing if the cache is NULL. + */ +#ifdef UMA_DEBUG + printf("uma_zalloc_internal: Bucketzone returned NULL\n"); +#endif + CPU_UNLOCK(zone, cpu); + cache = NULL; + cpu = -1; + } + } + } + +new_slab: + + /* Find a slab with some space */ + if (zone->uz_free) { + if (!LIST_EMPTY(&zone->uz_part_slab)) { + slab = LIST_FIRST(&zone->uz_part_slab); + } else { + slab = LIST_FIRST(&zone->uz_free_slab); + LIST_REMOVE(slab, us_link); + LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); + } + } else { + /* + * This is to prevent us from recursively trying to allocate + * buckets. The problem is that if an allocation forces us to + * grab a new bucket we will call page_alloc, which will go off + * and cause the vm to allocate vm_map_entries. If we need new + * buckets there too we will recurse in kmem_alloc and bad + * things happen. So instead we return a NULL bucket, and make + * the code that allocates buckets smart enough to deal with it */ + if (zone == bucketzone && zone->uz_recurse != 0) { + ZONE_UNLOCK(zone); + return (NULL); + } + zone->uz_recurse++; + slab = slab_zalloc(zone, wait); + zone->uz_recurse--; + if (slab) { + LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); + /* + * We might not have been able to get a page, but another cpu + * could have while we were unlocked. + */ + } else if (zone->uz_free == 0) { + ZONE_UNLOCK(zone); + /* If we're filling a bucket return what we have */ + if (bucket != NULL && bucket->ub_ptr != -1) { + return (bucket); + } else + return (NULL); + } else { + /* Another cpu must have succeeded */ + if ((slab = LIST_FIRST(&zone->uz_part_slab)) == NULL) { + slab = LIST_FIRST(&zone->uz_free_slab); + LIST_REMOVE(slab, us_link); + LIST_INSERT_HEAD(&zone->uz_part_slab, + slab, us_link); + } + } + } + + while (slab->us_freecount) { + freei = slab->us_firstfree; + slab->us_firstfree = slab->us_freelist[freei]; +#ifdef INVARIANTS + slab->us_freelist[freei] = 255; +#endif + slab->us_freecount--; + zone->uz_free--; + item = slab->us_data + (zone->uz_rsize * freei); + + if (cache == NULL) { + zone->uz_allocs++; + break; + } + + bucket->ub_bucket[++bucket->ub_ptr] = item; + + /* Don't overfill the bucket! */ + if (bucket->ub_ptr == cache->uc_count) + break; + } + + /* Move this slab to the full list */ + if (slab->us_freecount == 0) { + LIST_REMOVE(slab, us_link); + LIST_INSERT_HEAD(&zone->uz_full_slab, slab, us_link); + } + + if (cache != NULL) { + /* Try to keep the buckets totally full, but don't block */ + if (bucket->ub_ptr < cache->uc_count) { + wait = M_NOWAIT; + goto new_slab; + } + } + + ZONE_UNLOCK(zone); + + /* Only construct at this time if we're not filling a bucket */ + if (cache == NULL) { + if (zone->uz_ctor) + zone->uz_ctor(item, zone->uz_size, udata); + + if (isitem != NULL) + *isitem = 1; + } + + return (item); +} + +/* See uma.h */ +void +uma_zfree_arg(uma_zone_t zone, void *item, void *udata) +{ + uma_cache_t cache; + uma_bucket_t bucket; + int cpu; + + /* This is the fast path free */ +#ifdef UMA_DEBUG_ALLOC_1 + printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone); +#endif + cpu = PCPU_GET(cpuid); + CPU_LOCK(zone, cpu); + cache = &zone->uz_cpu[cpu]; + +zfree_start: + bucket = cache->uc_freebucket; + + if (bucket) { + /* Do we have room in our bucket? */ + if (bucket->ub_ptr < cache->uc_count) { + bucket->ub_ptr++; + KASSERT(bucket->ub_bucket[bucket->ub_ptr] == NULL, + ("uma_zfree: Freeing to non free bucket index.")); + bucket->ub_bucket[bucket->ub_ptr] = item; + CPU_UNLOCK(zone, cpu); + if (zone->uz_dtor) + zone->uz_dtor(item, zone->uz_size, udata); + return; + } else if (cache->uc_allocbucket) { +#ifdef UMA_DEBUG_ALLOC + printf("uma_zfree: Swapping buckets.\n"); +#endif + /* + * We have run out of space in our freebucket. + * See if we can switch with our alloc bucket. + */ + if (cache->uc_allocbucket->ub_ptr < + cache->uc_freebucket->ub_ptr) { + uma_bucket_t swap; + + swap = cache->uc_freebucket; + cache->uc_freebucket = cache->uc_allocbucket; + cache->uc_allocbucket = swap; + + goto zfree_start; + } + } + } + + /* + * We can get here for three reasons: + * + * 1) The buckets are NULL + * 2) The zone is INTERNAL, and so it has no buckets. + * 3) The alloc and free buckets are both somewhat full. + * + */ + + ZONE_LOCK(zone); + + if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) { + bucket = cache->uc_freebucket; + cache->uc_freebucket = NULL; + + /* Can we throw this on the zone full list? */ + if (bucket != NULL) { +#ifdef UMA_DEBUG_ALLOC + printf("uma_zfree: Putting old bucket on the free list.\n"); +#endif + /* ub_ptr is pointing to the last free item */ + KASSERT(bucket->ub_ptr != -1, + ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n")); + /*zone->uz_free += bucket->ub_ptr + 1;*/ + LIST_INSERT_HEAD(&zone->uz_full_bucket, + bucket, ub_link); + bucket = LIST_FIRST(&zone->uz_free_bucket); + if (bucket) + LIST_REMOVE(bucket, ub_link); + } + /* + * Do we need to alloc one? Either the freebucket was NULL + * or the free_bucket list was empty. + */ + if (bucket == NULL) { +#ifdef UMA_DEBUG_ALLOC + printf("uma_zfree: Allocating new free bucket.\n"); +#endif + /* This has to be done so we don't recurse on a lock */ + ZONE_UNLOCK(zone); + CPU_UNLOCK(zone, cpu); + bucket = uma_zalloc_internal(bucketzone, + NULL, M_NOWAIT, NULL, -1); + CPU_LOCK(zone, cpu); + ZONE_LOCK(zone); + if (bucket) { +#ifdef INVARIANTS + bzero(bucket, bucketzone->uz_size); +#endif + bucket->ub_ptr = -1; + } + /* Did we lose the race? */ + if (cache->uc_freebucket != NULL) { + if (bucket) + uma_zfree_internal(bucketzone, + bucket, NULL, 0); + ZONE_UNLOCK(zone); + goto zfree_start; + } + /* If we couldn't get one just free directly */ + if (bucket == NULL) + goto zfree_internal; + } + cache->uc_freebucket = bucket; + ZONE_UNLOCK(zone); + goto zfree_start; + } + +zfree_internal: + + CPU_UNLOCK(zone, cpu); + ZONE_UNLOCK(zone); + uma_zfree_internal(zone, item, udata, 0); + + return; + +} + +/* + * Frees an item to an INTERNAL zone or allocates a free bucket + * + * Arguments: + * zone The zone to free to + * item The item we're freeing + * udata User supplied data for the dtor + * skip Skip the dtor, it was done in uma_zfree_arg + */ + +static void +uma_zfree_internal(uma_zone_t zone, void *item, void *udata, int skip) +{ + uma_slab_t slab; + u_int8_t *mem; + u_int8_t freei; + + ZONE_LOCK(zone); + + if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) { + mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK)); + if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) + slab = hash_sfind(&zone->uz_hash, mem); + else { + mem += zone->uz_pgoff; + slab = (uma_slab_t)mem; + } + } else { + slab = (uma_slab_t)udata; + } + + /* Do we need to remove from any lists? */ + if (slab->us_freecount+1 == zone->uz_ipers) { + LIST_REMOVE(slab, us_link); + LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link); + } else if (slab->us_freecount == 0) { + LIST_REMOVE(slab, us_link); + LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); + } + + /* Slab management stuff */ + freei = ((unsigned long)item - (unsigned long)slab->us_data) + / zone->uz_rsize; +#ifdef INVARIANTS + if (((freei * zone->uz_rsize) + slab->us_data) != item) + panic("zone: %s(%p) slab %p freed address %p unaligned.\n", + zone->uz_name, zone, slab, item); + if (freei >= zone->uz_ipers) + panic("zone: %s(%p) slab %p freelist %i out of range 0-%d\n", + zone->uz_name, zone, slab, freei, zone->uz_ipers-1); + + if (slab->us_freelist[freei] != 255) { + printf("Slab at %p, freei %d = %d.\n", + slab, freei, slab->us_freelist[freei]); + panic("Duplicate free of item %p from zone %p(%s)\n", + item, zone, zone->uz_name); + } +#endif + slab->us_freelist[freei] = slab->us_firstfree; + slab->us_firstfree = freei; + slab->us_freecount++; + + /* Zone statistics */ + zone->uz_free++; + + ZONE_UNLOCK(zone); + + if (!skip && zone->uz_dtor) + zone->uz_dtor(item, zone->uz_size, udata); +} + +/* See uma.h */ +void +uma_zone_set_freef(uma_zone_t zone, uma_free freef) +{ + ZONE_LOCK(zone); + + zone->uz_freef = freef; + + ZONE_UNLOCK(zone); +} + +/* See uma.h */ +void +uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf) +{ + ZONE_LOCK(zone); + + zone->uz_flags |= UMA_ZFLAG_PRIVALLOC; + zone->uz_allocf = allocf; + + ZONE_UNLOCK(zone); +} + +/* See uma.h */ +int +uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count) +{ + int pages; + vm_offset_t kva; + + ZONE_LOCK(zone); + mtx_lock(&Giant); + + zone->uz_obj = obj; + pages = count / zone->uz_ipers; + + if (pages * zone->uz_ipers < count) + pages++; + zone->uz_kva = NULL; + ZONE_UNLOCK(zone); + kva = kmem_alloc_pageable(kernel_map, pages * UMA_SLAB_SIZE); + ZONE_LOCK(zone); + + zone->uz_kva = kva; + + if (zone->uz_kva == 0) { + ZONE_UNLOCK(zone); + return (0); + } + + zone->uz_maxpages = pages; + + if (zone->uz_obj == NULL) + zone->uz_obj = vm_object_allocate(OBJT_DEFAULT, + zone->uz_maxpages); + else + _vm_object_allocate(OBJT_DEFAULT, + zone->uz_maxpages, zone->uz_obj); + + zone->uz_allocf = obj_alloc; + zone->uz_flags |= UMA_ZFLAG_NOFREE | UMA_ZFLAG_PRIVALLOC; + + mtx_unlock(&Giant); + ZONE_UNLOCK(zone); + + return (1); +} + +/* See uma.h */ +void +uma_prealloc(uma_zone_t zone, int items) +{ + int slabs; + uma_slab_t slab; + + ZONE_LOCK(zone); + slabs = items / zone->uz_ipers; + if (slabs * zone->uz_ipers < items) + slabs++; + + while (slabs > 0) { + slab = slab_zalloc(zone, M_WAITOK); + LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link); + slabs--; + } + ZONE_UNLOCK(zone); +} + +/* See uma.h */ +void +uma_reclaim(void) +{ + /* + * You might think that the delay below would improve performance since + * the allocator will give away memory that it may ask for immediately. + * Really, it makes things worse, since cpu cycles are so much cheaper + * than disk activity. + */ +#if 0 + static struct timeval tv = {0}; + struct timeval now; + getmicrouptime(&now); + if (now.tv_sec > tv.tv_sec + 30) + tv = now; + else + return; +#endif +#ifdef UMA_DEBUG + printf("UMA: vm asked us to release pages!\n"); +#endif + zone_foreach(zone_drain); + + /* + * Some slabs may have been freed but this zone will be visited early + * we visit again so that we can free pages that are empty once other + * zones are drained. We have to do the same for buckets. + */ + zone_drain(slabzone); + zone_drain(bucketzone); +} + +void * +uma_large_malloc(int size, int wait) +{ + void *mem; + uma_slab_t slab; + u_int8_t flags; + + slab = uma_zalloc_internal(slabzone, NULL, wait, NULL, -1); + if (slab == NULL) + return (NULL); + + mem = page_alloc(NULL, size, &flags, wait); + if (mem) { + slab->us_data = mem; + slab->us_flags = flags | UMA_SLAB_MALLOC; + slab->us_size = size; + UMA_HASH_INSERT(mallochash, slab, mem); + } else { + uma_zfree_internal(slabzone, slab, NULL, 0); + } + + + return (mem); +} + +void +uma_large_free(uma_slab_t slab) +{ + UMA_HASH_REMOVE(mallochash, slab, slab->us_data); + page_free(slab->us_data, slab->us_size, slab->us_flags); + uma_zfree_internal(slabzone, slab, NULL, 0); +} + +void +uma_print_stats(void) +{ + zone_foreach(uma_print_zone); +} + +void +uma_print_zone(uma_zone_t zone) +{ + printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n", + zone->uz_name, zone, zone->uz_size, zone->uz_rsize, zone->uz_flags, + zone->uz_ipers, zone->uz_ppera, + (zone->uz_ipers * zone->uz_pages) - zone->uz_free, zone->uz_free); +} + +/* + * Sysctl handler for vm.zone + * + * stolen from vm_zone.c + */ +static int +sysctl_vm_zone(SYSCTL_HANDLER_ARGS) +{ + int error, len, cnt; + const int linesize = 128; /* conservative */ + int totalfree; + char *tmpbuf, *offset; + uma_zone_t z; + char *p; + + cnt = 0; + LIST_FOREACH(z, &uma_zones, uz_link) + cnt++; + MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize, + M_TEMP, M_WAITOK); + len = snprintf(tmpbuf, linesize, + "\nITEM SIZE LIMIT USED FREE REQUESTS\n\n"); + if (cnt == 0) + tmpbuf[len - 1] = '\0'; + error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len); + if (error || cnt == 0) + goto out; + offset = tmpbuf; + LIST_FOREACH(z, &uma_zones, uz_link) { + if (cnt == 0) /* list may have changed size */ + break; + ZONE_LOCK(z); + totalfree = z->uz_free + z->uz_cachefree; + len = snprintf(offset, linesize, + "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n", + z->uz_name, z->uz_size, + z->uz_maxpages * z->uz_ipers, + (z->uz_ipers * (z->uz_pages / z->uz_ppera)) - totalfree, + totalfree, + (unsigned long long)z->uz_allocs); + ZONE_UNLOCK(z); + for (p = offset + 12; p > offset && *p == ' '; --p) + /* nothing */ ; + p[1] = ':'; + cnt--; + offset += len; + } + *offset++ = '\0'; + error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf); +out: + FREE(tmpbuf, M_TEMP); + return (error); +} diff --git a/sys/vm/uma_int.h b/sys/vm/uma_int.h new file mode 100644 index 0000000..77e7c38 --- /dev/null +++ b/sys/vm/uma_int.h @@ -0,0 +1,328 @@ +/* + * Copyright (c) 2002, Jeffrey Roberson <jroberson@chesapeake.net> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +/* + * + * Jeff Roberson <jroberson@chesapeake.net> + * + * This file includes definitions, structures, prototypes, and inlines that + * should not be used outside of the actual implementation of UMA. + * + */ + +/* + * Here's a quick description of the relationship between the objects: + * + * Zones contain lists of slabs which are stored in either the full bin, empty + * bin, or partially allocated bin, to reduce fragmentation. They also contain + * the user supplied value for size, which is adjusted for alignment purposes + * and rsize is the result of that. The zone also stores information for + * managing a hash of page addresses that maps pages to uma_slab_t structures + * for pages that don't have embedded uma_slab_t's. + * + * The uma_slab_t may be embedded in a UMA_SLAB_SIZE chunk of memory or it may + * be allocated off the page from a special slab zone. The free list within a + * slab is managed with a linked list of indexes, which are 8 bit values. If + * UMA_SLAB_SIZE is defined to be too large I will have to switch to 16bit + * values. Currently on alpha you can get 250 or so 32 byte items and on x86 + * you can get 250 or so 16byte items. For item sizes that would yield more + * than 10% memory waste we potentially allocate a seperate uma_slab_t if this + * will improve the number of items per slab that will fit. + * + * Other potential space optimizations are storing the 8bit of linkage in space + * wasted between items due to alignment problems. This may yield a much better + * memory footprint for certain sizes of objects. Another alternative is to + * increase the UMA_SLAB_SIZE, or allow for dynamic slab sizes. I prefer + * dynamic slab sizes because we could stick with 8 bit indexes and only use + * large slab sizes for zones with a lot of waste per slab. This may create + * ineffeciencies in the vm subsystem due to fragmentation in the address space. + * + * The only really gross cases, with regards to memory waste, are for those + * items that are just over half the page size. You can get nearly 50% waste, + * so you fall back to the memory footprint of the power of two allocator. I + * have looked at memory allocation sizes on many of the machines available to + * me, and there does not seem to be an abundance of allocations at this range + * so at this time it may not make sense to optimize for it. This can, of + * course, be solved with dynamic slab sizes. + * + */ + +/* + * This is the representation for normal (Non OFFPAGE slab) + * + * i == item + * s == slab pointer + * + * <---------------- Page (UMA_SLAB_SIZE) ------------------> + * ___________________________________________________________ + * | _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ ___________ | + * ||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i| |slab header|| + * ||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_| |___________|| + * |___________________________________________________________| + * + * + * This is an OFFPAGE slab. These can be larger than UMA_SLAB_SIZE. + * + * ___________________________________________________________ + * | _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ | + * ||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i| | + * ||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_| | + * |___________________________________________________________| + * ___________ ^ + * |slab header| | + * |___________|---* + * + */ + +#ifndef VM_UMA_INT_H +#define VM_UMA_INT_H + +#include <sys/mutex.h> + +#define UMA_SLAB_SIZE PAGE_SIZE /* How big are our slabs? */ +#define UMA_SLAB_MASK (PAGE_SIZE - 1) /* Mask to get back to the page */ +#define UMA_SLAB_SHIFT PAGE_SHIFT /* Number of bits PAGE_MASK */ + +#define UMA_BOOT_PAGES 15 /* Number of pages allocated for startup */ +#define UMA_WORKING_TIME 20 /* Seconds worth of items to keep */ + + +/* Max waste before going to off page slab management */ +#define UMA_MAX_WASTE (UMA_SLAB_SIZE / 10) + +/* + * I doubt there will be many cases where this is exceeded. This is the initial + * size of the hash table for uma_slabs that are managed off page. This hash + * does expand by powers of two. Currently it doesn't get smaller. + */ +#define UMA_HASH_SIZE_INIT 32 + + +/* + * I should investigate other hashing algorithms. This should yield a low + * number of collisions if the pages are relatively contiguous. + * + * This is the same algorithm that most processor caches use. + * + * I'm shifting and masking instead of % because it should be faster. + */ + +#define UMA_HASH(h, s) ((((unsigned long)s) >> UMA_SLAB_SHIFT) & \ + (h)->uh_hashmask) + +#define UMA_HASH_INSERT(h, s, mem) \ + SLIST_INSERT_HEAD(&(h)->uh_slab_hash[UMA_HASH((h), \ + (mem))], (s), us_hlink); +#define UMA_HASH_REMOVE(h, s, mem) \ + SLIST_REMOVE(&(h)->uh_slab_hash[UMA_HASH((h), \ + (mem))], (s), uma_slab, us_hlink); + +/* Page management structure */ + +/* Sorry for the union, but space efficiency is important */ +struct uma_slab { + uma_zone_t us_zone; /* Zone we live in */ + union { + LIST_ENTRY(uma_slab) us_link; /* slabs in zone */ + unsigned long us_size; /* Size of allocation */ + } us_type; + SLIST_ENTRY(uma_slab) us_hlink; /* Link for hash table */ + u_int8_t *us_data; /* First item */ + u_int8_t us_flags; /* Page flags see uma.h */ + u_int8_t us_freecount; /* How many are free? */ + u_int8_t us_firstfree; /* First free item index */ + u_int8_t us_freelist[1]; /* Free List (actually larger) */ +}; + +#define us_link us_type.us_link +#define us_size us_type.us_size + +typedef struct uma_slab * uma_slab_t; + +/* Hash table for freed address -> slab translation */ + +SLIST_HEAD(slabhead, uma_slab); + +struct uma_hash { + struct slabhead *uh_slab_hash; /* Hash table for slabs */ + int uh_hashsize; /* Current size of the hash table */ + int uh_hashmask; /* Mask used during hashing */ +}; + +extern struct uma_hash *mallochash; + +/* + * Structures for per cpu queues. + */ + +/* + * This size was chosen so that the struct bucket size is roughly + * 128 * sizeof(void *). This is exactly true for x86, and for alpha + * it will would be 32bits smaller if it didn't have alignment adjustments. + */ + +#define UMA_BUCKET_SIZE 125 + +struct uma_bucket { + LIST_ENTRY(uma_bucket) ub_link; /* Link into the zone */ + int16_t ub_ptr; /* Pointer to current item */ + void *ub_bucket[UMA_BUCKET_SIZE]; /* actual allocation storage */ +}; + +typedef struct uma_bucket * uma_bucket_t; + +struct uma_cache { + struct mtx uc_lock; /* Spin lock on this cpu's bucket */ + int uc_count; /* Highest value ub_ptr can have */ + uma_bucket_t uc_freebucket; /* Bucket we're freeing to */ + uma_bucket_t uc_allocbucket; /* Bucket to allocate from */ + u_int64_t uc_allocs; /* Count of allocations */ +}; + +typedef struct uma_cache * uma_cache_t; + +#define LOCKNAME_LEN 16 /* Length of the name for cpu locks */ + +/* + * Zone management structure + * + * TODO: Optimize for cache line size + * + */ +struct uma_zone { + char uz_lname[LOCKNAME_LEN]; /* Text name for the cpu lock */ + char *uz_name; /* Text name of the zone */ + LIST_ENTRY(uma_zone) uz_link; /* List of all zones */ + u_int32_t uz_align; /* Alignment mask */ + u_int32_t uz_pages; /* Total page count */ + +/* Used during alloc / free */ + struct mtx uz_lock; /* Lock for the zone */ + u_int32_t uz_free; /* Count of items free in slabs */ + u_int16_t uz_ipers; /* Items per slab */ + u_int16_t uz_flags; /* Internal flags */ + + LIST_HEAD(,uma_slab) uz_part_slab; /* partially allocated slabs */ + LIST_HEAD(,uma_slab) uz_free_slab; /* empty slab list */ + LIST_HEAD(,uma_slab) uz_full_slab; /* full slabs */ + LIST_HEAD(,uma_bucket) uz_full_bucket; /* full buckets */ + LIST_HEAD(,uma_bucket) uz_free_bucket; /* Buckets for frees */ + u_int32_t uz_size; /* Requested size of each item */ + u_int32_t uz_rsize; /* Real size of each item */ + + struct uma_hash uz_hash; + u_int16_t uz_pgoff; /* Offset to uma_slab struct */ + u_int16_t uz_ppera; /* pages per allocation from backend */ + u_int16_t uz_cacheoff; /* Next cache offset */ + u_int16_t uz_cachemax; /* Max cache offset */ + + uma_ctor uz_ctor; /* Constructor for each allocation */ + uma_dtor uz_dtor; /* Destructor */ + u_int64_t uz_allocs; /* Total number of allocations */ + + uma_init uz_init; /* Initializer for each item */ + uma_fini uz_fini; /* Discards memory */ + uma_alloc uz_allocf; /* Allocation function */ + uma_free uz_freef; /* Free routine */ + struct vm_object *uz_obj; /* Zone specific object */ + vm_offset_t uz_kva; /* Base kva for zones with objs */ + u_int32_t uz_maxpages; /* Maximum number of pages to alloc */ + u_int32_t uz_cachefree; /* Last count of items free in caches */ + u_int64_t uz_oallocs; /* old allocs count */ + u_int64_t uz_wssize; /* Working set size */ + int uz_recurse; /* Allocation recursion count */ + /* + * This HAS to be the last item because we adjust the zone size + * based on NCPU and then allocate the space for the zones. + */ + struct uma_cache uz_cpu[1]; /* Per cpu caches */ +}; + +#define UMA_CACHE_INC 16 /* How much will we move data */ + +#define UMA_ZFLAG_OFFPAGE 0x0001 /* Struct slab/freelist off page */ +#define UMA_ZFLAG_PRIVALLOC 0x0002 /* Zone has supplied it's own alloc */ +#define UMA_ZFLAG_INTERNAL 0x0004 /* Internal zone, no offpage no PCPU */ +#define UMA_ZFLAG_MALLOC 0x0008 /* Zone created by malloc */ +#define UMA_ZFLAG_NOFREE 0x0010 /* Don't free data from this zone */ +/* This lives in uflags */ +#define UMA_ZONE_INTERNAL 0x1000 /* Internal zone for uflags */ + +/* Internal prototypes */ +static __inline uma_slab_t hash_sfind(struct uma_hash *hash, u_int8_t *data); +void *uma_large_malloc(int size, int wait); +void uma_large_free(uma_slab_t slab); + +/* Lock Macros */ + +#define ZONE_LOCK_INIT(z) mtx_init(&(z)->uz_lock, (z)->uz_name, MTX_DEF) +#define ZONE_LOCK_FINI(z) mtx_destroy(&(z)->uz_lock) +#define ZONE_LOCK(z) mtx_lock(&(z)->uz_lock) +#define ZONE_UNLOCK(z) mtx_unlock(&(z)->uz_lock) + +#define CPU_LOCK_INIT(z, cpu) \ + mtx_init(&(z)->uz_cpu[(cpu)].uc_lock, (z)->uz_lname, MTX_DEF) + +#define CPU_LOCK_FINI(z, cpu) \ + mtx_destroy(&(z)->uz_cpu[(cpu)].uc_lock) + +#define CPU_LOCK(z, cpu) \ + mtx_lock(&(z)->uz_cpu[(cpu)].uc_lock) + +#define CPU_UNLOCK(z, cpu) \ + mtx_unlock(&(z)->uz_cpu[(cpu)].uc_lock) + +/* + * Find a slab within a hash table. This is used for OFFPAGE zones to lookup + * the slab structure. + * + * Arguments: + * hash The hash table to search. + * data The base page of the item. + * + * Returns: + * A pointer to a slab if successful, else NULL. + */ +static __inline uma_slab_t +hash_sfind(struct uma_hash *hash, u_int8_t *data) +{ + uma_slab_t slab; + int hval; + + hval = UMA_HASH(hash, data); + + SLIST_FOREACH(slab, &hash->uh_slab_hash[hval], us_hlink) { + if ((u_int8_t *)slab->us_data == data) + return (slab); + } + return (NULL); +} + + +#endif /* VM_UMA_INT_H */ diff --git a/sys/vm/vm_init.c b/sys/vm/vm_init.c index 54e0c13..5114470 100644 --- a/sys/vm/vm_init.c +++ b/sys/vm/vm_init.c @@ -114,7 +114,6 @@ vm_mem_init(dummy) /* * Initialize other VM packages */ - vm_zone_init(); vm_object_init(); vm_map_startup(); kmem_init(virtual_avail, virtual_end); diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 12c6d62..8eadaa1 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -88,7 +88,6 @@ #include <vm/vm_pager.h> #include <vm/vm_kern.h> #include <vm/vm_extern.h> -#include <vm/vm_zone.h> #include <vm/swap_pager.h> /* @@ -131,28 +130,111 @@ * maps and requires map entries. */ -static struct vm_zone kmapentzone_store, mapentzone_store, mapzone_store; -static vm_zone_t mapentzone, kmapentzone, mapzone, vmspace_zone; -static struct vm_object kmapentobj, mapentobj, mapobj; - -static struct vm_map_entry map_entry_init[MAX_MAPENT]; -static struct vm_map_entry kmap_entry_init[MAX_KMAPENT]; -static struct vm_map map_init[MAX_KMAP]; +static uma_zone_t mapentzone; +static uma_zone_t kmapentzone; +static uma_zone_t mapzone; +static uma_zone_t vmspace_zone; +static struct vm_object kmapentobj; +static void vmspace_zinit(void *mem, int size); +static void vmspace_zfini(void *mem, int size); +static void vm_map_zinit(void *mem, int size); +static void vm_map_zfini(void *mem, int size); +static void _vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max); + +#ifdef INVARIANTS +static void vm_map_zdtor(void *mem, int size, void *arg); +static void vmspace_zdtor(void *mem, int size, void *arg); +#endif void vm_map_startup(void) { - mapzone = &mapzone_store; - zbootinit(mapzone, "MAP", sizeof (struct vm_map), - map_init, MAX_KMAP); - kmapentzone = &kmapentzone_store; - zbootinit(kmapentzone, "KMAP ENTRY", sizeof (struct vm_map_entry), - kmap_entry_init, MAX_KMAPENT); - mapentzone = &mapentzone_store; - zbootinit(mapentzone, "MAP ENTRY", sizeof (struct vm_map_entry), - map_entry_init, MAX_MAPENT); + mapzone = uma_zcreate("MAP", sizeof(struct vm_map), NULL, +#ifdef INVARIANTS + vm_map_zdtor, +#else + NULL, +#endif + vm_map_zinit, vm_map_zfini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); + uma_prealloc(mapzone, MAX_KMAP); + kmapentzone = zinit("KMAP ENTRY", sizeof(struct vm_map_entry), 0, 0, 0); uma_prealloc(kmapentzone, MAX_KMAPENT); + mapentzone = zinit("MAP ENTRY", sizeof(struct vm_map_entry), 0, 0, 0); + uma_prealloc(mapentzone, MAX_MAPENT); +} + +static void +vmspace_zfini(void *mem, int size) +{ + struct vmspace *vm; + + vm = (struct vmspace *)mem; + + vm_map_zfini(&vm->vm_map, sizeof(vm->vm_map)); +} + +static void +vmspace_zinit(void *mem, int size) +{ + struct vmspace *vm; + + vm = (struct vmspace *)mem; + + vm_map_zinit(&vm->vm_map, sizeof(vm->vm_map)); +} + +static void +vm_map_zfini(void *mem, int size) +{ + vm_map_t map; + + GIANT_REQUIRED; + map = (vm_map_t)mem; + + lockdestroy(&map->lock); } +static void +vm_map_zinit(void *mem, int size) +{ + vm_map_t map; + + GIANT_REQUIRED; + + map = (vm_map_t)mem; + map->nentries = 0; + map->size = 0; + map->infork = 0; + lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE); +} + +#ifdef INVARIANTS +static void +vmspace_zdtor(void *mem, int size, void *arg) +{ + struct vmspace *vm; + + vm = (struct vmspace *)mem; + + vm_map_zdtor(&vm->vm_map, sizeof(vm->vm_map), arg); +} +static void +vm_map_zdtor(void *mem, int size, void *arg) +{ + vm_map_t map; + + map = (vm_map_t)mem; + KASSERT(map->nentries == 0, + ("map %p nentries == %d on free.", + map, map->nentries)); + KASSERT(map->size == 0, + ("map %p size == %lu on free.", + map, map->size)); + KASSERT(map->infork == 0, + ("map %p infork == %d on free.", + map, map->infork)); +} +#endif /* INVARIANTS */ + /* * Allocate a vmspace structure, including a vm_map and pmap, * and initialize those structures. The refcnt is set to 1. @@ -165,9 +247,9 @@ vmspace_alloc(min, max) struct vmspace *vm; GIANT_REQUIRED; - vm = zalloc(vmspace_zone); + vm = uma_zalloc(vmspace_zone, M_WAITOK); CTR1(KTR_VM, "vmspace_alloc: %p", vm); - vm_map_init(&vm->vm_map, min, max); + _vm_map_init(&vm->vm_map, min, max); pmap_pinit(vmspace_pmap(vm)); vm->vm_map.pmap = vmspace_pmap(vm); /* XXX */ vm->vm_refcnt = 1; @@ -179,13 +261,14 @@ vmspace_alloc(min, max) void vm_init2(void) { - zinitna(kmapentzone, &kmapentobj, - NULL, 0, cnt.v_page_count / 4, ZONE_INTERRUPT, 1); - zinitna(mapentzone, &mapentobj, - NULL, 0, 0, 0, 1); - zinitna(mapzone, &mapobj, - NULL, 0, 0, 0, 1); - vmspace_zone = zinit("VMSPACE", sizeof (struct vmspace), 0, 0, 3); + uma_zone_set_obj(kmapentzone, &kmapentobj, cnt.v_page_count / 4); + vmspace_zone = uma_zcreate("VMSPACE", sizeof(struct vmspace), NULL, +#ifdef INVARIANTS + vmspace_zdtor, +#else + NULL, +#endif + vmspace_zinit, vmspace_zfini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); pmap_init2(); vm_object_init2(); } @@ -203,9 +286,9 @@ vmspace_dofree(struct vmspace *vm) (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset, vm->vm_map.max_offset); vm_map_unlock(&vm->vm_map); + pmap_release(vmspace_pmap(vm)); - vm_map_destroy(&vm->vm_map); - zfree(vmspace_zone, vm); + uma_zfree(vmspace_zone, vm); } void @@ -390,9 +473,9 @@ vm_map_create(pmap_t pmap, vm_offset_t min, vm_offset_t max) GIANT_REQUIRED; - result = zalloc(mapzone); + result = uma_zalloc(mapzone, M_WAITOK); CTR1(KTR_VM, "vm_map_create: %p", result); - vm_map_init(result, min, max); + _vm_map_init(result, min, max); result->pmap = pmap; return (result); } @@ -402,30 +485,25 @@ vm_map_create(pmap_t pmap, vm_offset_t min, vm_offset_t max) * such as that in the vmspace structure. * The pmap is set elsewhere. */ -void -vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max) +static void +_vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max) { GIANT_REQUIRED; map->header.next = map->header.prev = &map->header; - map->nentries = 0; - map->size = 0; map->system_map = 0; - map->infork = 0; map->min_offset = min; map->max_offset = max; map->first_free = &map->header; map->hint = &map->header; map->timestamp = 0; - lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE); } void -vm_map_destroy(map) - struct vm_map *map; +vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max) { - GIANT_REQUIRED; - lockdestroy(&map->lock); + _vm_map_init(map, min, max); + lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE); } /* @@ -436,7 +514,8 @@ vm_map_destroy(map) static void vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry) { - zfree((map->system_map || !mapentzone) ? kmapentzone : mapentzone, entry); + uma_zfree((map->system_map || !mapentzone) + ? kmapentzone : mapentzone, entry); } /* @@ -450,8 +529,8 @@ vm_map_entry_create(vm_map_t map) { vm_map_entry_t new_entry; - new_entry = zalloc((map->system_map || !mapentzone) ? - kmapentzone : mapentzone); + new_entry = uma_zalloc((map->system_map || !mapentzone) ? + kmapentzone : mapentzone, M_WAITOK); if (new_entry == NULL) panic("vm_map_entry_create: kernel resources exhausted"); return (new_entry); diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index b23af37..eefff35 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -267,7 +267,6 @@ int vm_map_find (vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t *, vm_size_t, int vm_map_findspace (vm_map_t, vm_offset_t, vm_size_t, vm_offset_t *); int vm_map_inherit (vm_map_t, vm_offset_t, vm_offset_t, vm_inherit_t); void vm_map_init (struct vm_map *, vm_offset_t, vm_offset_t); -void vm_map_destroy (struct vm_map *); int vm_map_insert (vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t, vm_offset_t, vm_prot_t, vm_prot_t, int); int vm_map_lookup (vm_map_t *, vm_offset_t, vm_prot_t, vm_map_entry_t *, vm_object_t *, vm_pindex_t *, vm_prot_t *, boolean_t *); diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index a561c7c..e6f1ad5 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -147,11 +147,45 @@ extern int vm_pageout_page_count; static long object_collapses; static long object_bypasses; static int next_index; -static vm_zone_t obj_zone; -static struct vm_zone obj_zone_store; static int object_hash_rand; +static vm_zone_t obj_zone; #define VM_OBJECTS_INIT 256 -static struct vm_object vm_objects_init[VM_OBJECTS_INIT]; + +static void vm_object_zinit(void *mem, int size); + +#ifdef INVARIANTS +static void vm_object_zdtor(void *mem, int size, void *arg); + +static void +vm_object_zdtor(void *mem, int size, void *arg) +{ + vm_object_t object; + + object = (vm_object_t)mem; + KASSERT(object->paging_in_progress == 0, + ("object %p paging_in_progress = %d", + object, object->paging_in_progress)); + KASSERT(object->resident_page_count == 0, + ("object %p resident_page_count = %d", + object, object->resident_page_count)); + KASSERT(object->shadow_count == 0, + ("object %p shadow_count = %d", + object, object->shadow_count)); +} +#endif + +static void +vm_object_zinit(void *mem, int size) +{ + vm_object_t object; + + object = (vm_object_t)mem; + + /* These are true for any object that has been freed */ + object->paging_in_progress = 0; + object->resident_page_count = 0; + object->shadow_count = 0; +} void _vm_object_allocate(objtype_t type, vm_size_t size, vm_object_t object) @@ -169,9 +203,6 @@ _vm_object_allocate(objtype_t type, vm_size_t size, vm_object_t object) object->flags = 0; if ((object->type == OBJT_DEFAULT) || (object->type == OBJT_SWAP)) vm_object_set_flag(object, OBJ_ONEMAPPING); - object->paging_in_progress = 0; - object->resident_page_count = 0; - object->shadow_count = 0; object->pg_color = next_index; if (size > (PQ_L2_SIZE / 3 + PQ_PRIME1)) incr = PQ_L2_SIZE / 3 + PQ_PRIME1; @@ -216,16 +247,19 @@ vm_object_init(void) kmem_object = &kmem_object_store; _vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS), kmem_object); - - obj_zone = &obj_zone_store; - zbootinit(obj_zone, "VM OBJECT", sizeof (struct vm_object), - vm_objects_init, VM_OBJECTS_INIT); + obj_zone = uma_zcreate("VM OBJECT", sizeof (struct vm_object), NULL, +#ifdef INVARIANTS + vm_object_zdtor, +#else + NULL, +#endif + vm_object_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); + uma_prealloc(obj_zone, VM_OBJECTS_INIT); } void vm_object_init2(void) { - zinitna(obj_zone, NULL, NULL, 0, 0, 0, 1); } void diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 3022b73..706929a 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -118,6 +118,8 @@ #include <vm/vm_pageout.h> #include <vm/vm_pager.h> #include <vm/vm_extern.h> +#include <vm/uma.h> +#include <vm/uma_int.h> /* * Associated with page of user-allocatable memory is a @@ -176,6 +178,7 @@ vm_page_startup(vm_offset_t starta, vm_offset_t enda, vm_offset_t vaddr) vm_offset_t biggestone, biggestsize; vm_offset_t total; + vm_size_t bootpages; total = 0; biggestsize = 0; @@ -208,6 +211,19 @@ vm_page_startup(vm_offset_t starta, vm_offset_t enda, vm_offset_t vaddr) vm_pageq_init(); /* + * Allocate memory for use when boot strapping the kernel memory allocator + */ + bootpages = UMA_BOOT_PAGES * UMA_SLAB_SIZE; + new_end = end - bootpages; + new_end = trunc_page(new_end); + mapped = pmap_map(&vaddr, new_end, end, + VM_PROT_READ | VM_PROT_WRITE); + bzero((caddr_t) mapped, end - new_end); + uma_startup((caddr_t)mapped); + + end = new_end; + + /* * Allocate (and initialize) the hash table buckets. * * The number of buckets MUST BE a power of 2, and the actual value is diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index 5567628..c4b94de 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -649,6 +649,7 @@ vm_pageout_scan(int pass) * Do whatever cleanup that the pmap code can. */ pmap_collect(); + uma_reclaim(); addl_page_shortage_init = vm_pageout_deficit; vm_pageout_deficit = 0; diff --git a/sys/vm/vm_zone.h b/sys/vm/vm_zone.h index 83d7914..a355051 100644 --- a/sys/vm/vm_zone.h +++ b/sys/vm/vm_zone.h @@ -23,40 +23,30 @@ #include <sys/_lock.h> #include <sys/_mutex.h> +#include <vm/uma.h> -typedef struct vm_zone { - struct mtx zmtx; /* lock for data structure */ - void *zitems; /* linked list of items */ - int zfreecnt; /* free entries */ - int zfreemin; /* minimum number of free entries */ - int znalloc; /* number of allocations */ - vm_offset_t zkva; /* Base kva of zone */ - int zpagecount; /* Total # of allocated pages */ - int zpagemax; /* Max address space */ - int zmax; /* Max number of entries allocated */ - int ztotal; /* Total entries allocated now */ - int zsize; /* size of each entry */ - int zalloc; /* hint for # of pages to alloc */ - int zflags; /* flags for zone */ - int zallocflag; /* flag for allocation */ - struct vm_object *zobj; /* object to hold zone */ - char *zname; /* name for diags */ - /* NOTE: zent is protected by the subsystem lock, *not* by zmtx */ - SLIST_ENTRY(vm_zone) zent; /* singly-linked list of zones */ -} *vm_zone_t; - - -void vm_zone_init(void); -void vm_zone_init2(void); +typedef uma_zone_t vm_zone_t; +#if 0 +static void vm_zone_init(void); +static void vm_zone_init2(void); + +static vm_zone_t zinit(char *name, int size, int nentries, + int flags, int zalloc); int zinitna(vm_zone_t z, struct vm_object *obj, char *name, int size, int nentries, int flags, int zalloc); -vm_zone_t zinit(char *name, int size, int nentries, - int flags, int zalloc); void zbootinit(vm_zone_t z, char *name, int size, void *item, int nitems); -void zdestroy(vm_zone_t z); -void *zalloc(vm_zone_t z); -void zfree(vm_zone_t z, void *item); - +static void zdestroy(vm_zone_t z); +static void *zalloc(vm_zone_t z); +static void zfree(vm_zone_t z, void *item); +#endif + +#define vm_zone_init2() uma_startup2() + +#define zinit(name, size, nentries, flags, zalloc) \ + uma_zcreate((name), (size), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE) +#define zdestroy() +#define zalloc(z) uma_zalloc((z), M_WAITOK) +#define zfree(z, item) uma_zfree((z), (item)) #endif /* _SYS_ZONE_H */ |