diff options
35 files changed, 3181 insertions, 489 deletions
diff --git a/sys/alpha/alpha/pmap.c b/sys/alpha/alpha/pmap.c index 374adc1..1575a81 100644 --- a/sys/alpha/alpha/pmap.c +++ b/sys/alpha/alpha/pmap.c @@ -322,11 +322,9 @@ static struct mtx allpmaps_lock; * Data for the pv entry allocation mechanism */ static vm_zone_t pvzone; -static struct vm_zone pvzone_store; static struct vm_object pvzone_obj; static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; static int pmap_pagedaemon_waken = 0; -static struct pv_entry *pvinit; static PMAP_INLINE void free_pv_entry __P((pv_entry_t pv)); static pv_entry_t get_pv_entry __P((void)); @@ -349,6 +347,7 @@ static int pmap_release_free_page __P((pmap_t pmap, vm_page_t p)); static vm_page_t _pmap_allocpte __P((pmap_t pmap, unsigned ptepindex)); static vm_page_t pmap_page_lookup __P((vm_object_t object, vm_pindex_t pindex)); static int pmap_unuse_pt __P((pmap_t, vm_offset_t, vm_page_t)); +static void *pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); #ifdef SMP static void pmap_invalidate_page_action __P((void *arg)); static void pmap_invalidate_all_action __P((void *arg)); @@ -575,6 +574,13 @@ pmap_uses_prom_console() return 0; } +static void * +pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +{ + *flags = UMA_SLAB_PRIV; + return (void *)kmem_alloc(kernel_map, bytes); +} + /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap @@ -609,11 +615,16 @@ pmap_init(phys_start, phys_end) initial_pvs = vm_page_array_size; if (initial_pvs < MINPV) initial_pvs = MINPV; +#if 0 pvzone = &pvzone_store; pvinit = (struct pv_entry *) kmem_alloc(kernel_map, initial_pvs * sizeof (struct pv_entry)); zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit, vm_page_array_size); +#endif + pvzone = zinit("PV ENTRY", sizeof (struct pv_entry), 0, 0, 0); + uma_zone_set_allocf(pvzone, pmap_allocf); + uma_prealloc(pvzone, initial_pvs); /* * object for kernel page table pages */ @@ -638,7 +649,10 @@ pmap_init2() TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); pv_entry_max = shpgperproc * maxproc + vm_page_array_size; pv_entry_high_water = 9 * (pv_entry_max / 10); +#if 0 zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1); +#endif + uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max); } diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 319715d..59192c5 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -168,11 +168,9 @@ vm_offset_t kernel_vm_end; * Data for the pv entry allocation mechanism */ static vm_zone_t pvzone; -static struct vm_zone pvzone_store; static struct vm_object pvzone_obj; static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; static int pmap_pagedaemon_waken = 0; -static struct pv_entry *pvinit; /* * All those kernel PT submaps that BSD is so fond of @@ -221,6 +219,7 @@ static pt_entry_t *pmap_pte_quick __P((pmap_t pmap, vm_offset_t va)); static vm_page_t pmap_page_lookup __P((vm_object_t object, vm_pindex_t pindex)); static int pmap_unuse_pt __P((pmap_t, vm_offset_t, vm_page_t)); static vm_offset_t pmap_kmem_choose(vm_offset_t addr); +static void *pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); static pd_entry_t pdir4mb; @@ -446,6 +445,13 @@ pmap_set_opt(void) } #endif +void * +pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +{ + *flags = UMA_SLAB_PRIV; + return (void *)kmem_alloc(kernel_map, bytes); +} + /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap @@ -484,11 +490,16 @@ pmap_init(phys_start, phys_end) initial_pvs = vm_page_array_size; if (initial_pvs < MINPV) initial_pvs = MINPV; +#if 0 pvzone = &pvzone_store; pvinit = (struct pv_entry *) kmem_alloc(kernel_map, initial_pvs * sizeof (struct pv_entry)); zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit, vm_page_array_size); +#endif + pvzone = zinit("PV ENTRY", sizeof (struct pv_entry), 0, 0, 0); + uma_zone_set_allocf(pvzone, pmap_allocf); + uma_prealloc(pvzone, initial_pvs); /* * Now it is safe to enable pv_table recording. @@ -510,7 +521,10 @@ pmap_init2() pv_entry_max = shpgperproc * maxproc + vm_page_array_size; TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); pv_entry_high_water = 9 * (pv_entry_max / 10); +#if 0 zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1); +#endif + uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max); } diff --git a/sys/conf/files b/sys/conf/files index 5e41c63..c6c73a6 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1339,5 +1339,5 @@ vm/vm_pageout.c standard vm/vm_pager.c standard vm/vm_swap.c standard vm/vm_unix.c standard -vm/vm_zone.c standard +vm/uma_core.c standard vm/vnode_pager.c standard diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index 319715d..59192c5 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -168,11 +168,9 @@ vm_offset_t kernel_vm_end; * Data for the pv entry allocation mechanism */ static vm_zone_t pvzone; -static struct vm_zone pvzone_store; static struct vm_object pvzone_obj; static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; static int pmap_pagedaemon_waken = 0; -static struct pv_entry *pvinit; /* * All those kernel PT submaps that BSD is so fond of @@ -221,6 +219,7 @@ static pt_entry_t *pmap_pte_quick __P((pmap_t pmap, vm_offset_t va)); static vm_page_t pmap_page_lookup __P((vm_object_t object, vm_pindex_t pindex)); static int pmap_unuse_pt __P((pmap_t, vm_offset_t, vm_page_t)); static vm_offset_t pmap_kmem_choose(vm_offset_t addr); +static void *pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); static pd_entry_t pdir4mb; @@ -446,6 +445,13 @@ pmap_set_opt(void) } #endif +void * +pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +{ + *flags = UMA_SLAB_PRIV; + return (void *)kmem_alloc(kernel_map, bytes); +} + /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap @@ -484,11 +490,16 @@ pmap_init(phys_start, phys_end) initial_pvs = vm_page_array_size; if (initial_pvs < MINPV) initial_pvs = MINPV; +#if 0 pvzone = &pvzone_store; pvinit = (struct pv_entry *) kmem_alloc(kernel_map, initial_pvs * sizeof (struct pv_entry)); zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit, vm_page_array_size); +#endif + pvzone = zinit("PV ENTRY", sizeof (struct pv_entry), 0, 0, 0); + uma_zone_set_allocf(pvzone, pmap_allocf); + uma_prealloc(pvzone, initial_pvs); /* * Now it is safe to enable pv_table recording. @@ -510,7 +521,10 @@ pmap_init2() pv_entry_max = shpgperproc * maxproc + vm_page_array_size; TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); pv_entry_high_water = 9 * (pv_entry_max / 10); +#if 0 zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1); +#endif + uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max); } diff --git a/sys/ia64/ia64/pmap.c b/sys/ia64/ia64/pmap.c index 4ebd351..cca7807 100644 --- a/sys/ia64/ia64/pmap.c +++ b/sys/ia64/ia64/pmap.c @@ -226,7 +226,6 @@ struct mtx pmap_ridmutex; * Data for the pv entry allocation mechanism */ static vm_zone_t pvzone; -static struct vm_zone pvzone_store; static struct vm_object pvzone_obj; static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; static int pmap_pagedaemon_waken = 0; @@ -238,7 +237,6 @@ static int pvbootnext, pvbootmax; * Data for allocating PTEs for user processes. */ static vm_zone_t ptezone; -static struct vm_zone ptezone_store; static struct vm_object ptezone_obj; static struct ia64_lpte *pteinit; @@ -264,6 +262,7 @@ static void ia64_protection_init __P((void)); static void pmap_invalidate_all __P((pmap_t pmap)); static void pmap_remove_all __P((vm_page_t m)); static void pmap_enter_quick __P((pmap_t pmap, vm_offset_t va, vm_page_t m)); +static void *pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); vm_offset_t pmap_steal_memory(vm_size_t size) @@ -486,6 +485,13 @@ pmap_bootstrap() pmap_invalidate_all(kernel_pmap); } +static void * +pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +{ + *flags = UMA_SLAB_PRIV; + return (void *)kmem_alloc(kernel_map, bytes); +} + /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap @@ -518,6 +524,7 @@ pmap_init(vm_offset_t phys_start, vm_offset_t phys_end) initial_pvs = vm_page_array_size; if (initial_pvs < MINPV) initial_pvs = MINPV; +#if 0 pvzone = &pvzone_store; pvinit = (struct pv_entry *) kmem_alloc(kernel_map, initial_pvs * sizeof (struct pv_entry)); @@ -529,6 +536,14 @@ pmap_init(vm_offset_t phys_start, vm_offset_t phys_end) initial_pvs * sizeof (struct ia64_lpte)); zbootinit(ptezone, "PT ENTRY", sizeof (struct ia64_lpte), pteinit, vm_page_array_size); +#endif + pvzone = zinit("PV ENTRY", sizeof (struct pv_entry), 0, 0, 0); + uma_zone_set_allocf(pvzone, pmap_allocf); + uma_prealloc(pvzone, initial_pvs); + + ptezone = zinit("PT ENTRY", sizeof (struct ia64_lpte), 0, 0, 0); + uma_zone_set_allocf(ptezone, pmap_allocf); + uma_prealloc(ptezone, initial_pvs); /* * Create the object for the kernel's page tables. @@ -554,8 +569,12 @@ pmap_init2() TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); pv_entry_max = shpgperproc * maxproc + vm_page_array_size; pv_entry_high_water = 9 * (pv_entry_max / 10); +#if 0 zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1); zinitna(ptezone, &ptezone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1); +#endif + uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max); + uma_zone_set_obj(ptezone, &ptezone_obj, pv_entry_max); } diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 313a397..4e6d283 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -67,11 +67,13 @@ #include <vm/vm.h> #include <vm/vm_extern.h> +#include <vm/vm_zone.h> static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table"); -MALLOC_DEFINE(M_FILE, "file", "Open file structure"); static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures"); +uma_zone_t file_zone; + static d_open_t fdopen; #define NUMFDESC 64 @@ -1095,7 +1097,8 @@ falloc(td, resultfp, resultfd) * of open files at that point, otherwise put it at the front of * the list of open files. */ - MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK | M_ZERO); + fp = uma_zalloc(file_zone, M_WAITOK); + bzero(fp, sizeof(*fp)); /* * wait until after malloc (which may have blocked) returns before @@ -1108,7 +1111,7 @@ falloc(td, resultfp, resultfd) sx_xlock(&filelist_lock); nfiles--; sx_xunlock(&filelist_lock); - FREE(fp, M_FILE); + uma_zfree(file_zone, fp); return (error); } fp->f_mtxp = mtx_pool_alloc(); @@ -1149,7 +1152,7 @@ ffree(fp) nfiles--; sx_xunlock(&filelist_lock); crfree(fp->f_cred); - FREE(fp, M_FILE); + uma_zfree(file_zone, fp); } /* @@ -2111,5 +2114,8 @@ static void filelistinit(dummy) void *dummy; { + file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL, + NULL, NULL, UMA_ALIGN_PTR, 0); + sx_init(&filelist_lock, "filelist lock"); } diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c index a8eec9c..12cd4d3 100644 --- a/sys/kern/kern_malloc.c +++ b/sys/kern/kern_malloc.c @@ -52,6 +52,8 @@ #include <vm/vm_extern.h> #include <vm/pmap.h> #include <vm/vm_map.h> +#include <vm/uma.h> +#include <vm/uma_int.h> #if defined(INVARIANTS) && defined(__i386__) #include <machine/cpu.h> @@ -80,50 +82,42 @@ SYSINIT(kmem, SI_SUB_KMEM, SI_ORDER_FIRST, kmeminit, NULL) static MALLOC_DEFINE(M_FREE, "free", "should be on free list"); static struct malloc_type *kmemstatistics; -static struct kmembuckets bucket[MINBUCKET + 16]; -static struct kmemusage *kmemusage; static char *kmembase; static char *kmemlimit; -static struct mtx malloc_mtx; - -u_int vm_kmem_size; - -#ifdef INVARIANTS -/* - * This structure provides a set of masks to catch unaligned frees. - */ -static long addrmask[] = { 0, - 0x00000001, 0x00000003, 0x00000007, 0x0000000f, - 0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff, - 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff, - 0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff, +#define KMEM_ZSHIFT 4 +#define KMEM_ZBASE 16 +#define KMEM_ZMASK (KMEM_ZBASE - 1) + +#define KMEM_ZMAX 65536 +#define KMEM_ZSIZE (KMEM_ZMAX >> KMEM_ZSHIFT) +static uma_zone_t kmemzones[KMEM_ZSIZE + 1]; + + +/* These won't be powers of two for long */ +struct { + int size; + char *name; +} kmemsizes[] = { + {16, "16"}, + {32, "32"}, + {64, "64"}, + {128, "128"}, + {256, "256"}, + {512, "512"}, + {1024, "1024"}, + {2048, "2048"}, + {4096, "4096"}, + {8192, "8192"}, + {16384, "16384"}, + {32768, "32768"}, + {65536, "65536"}, + {0, NULL}, }; -/* - * The WEIRD_ADDR is used as known text to copy into free objects so - * that modifications after frees can be detected. - */ -#define WEIRD_ADDR 0xdeadc0de -#define MAX_COPY 64 +static struct mtx malloc_mtx; -/* - * Normally the first word of the structure is used to hold the list - * pointer for free objects. However, when running with diagnostics, - * we use the third and fourth fields, so as to catch modifications - * in the most commonly trashed first two words. - */ -struct freelist { - long spare0; - struct malloc_type *type; - long spare1; - caddr_t next; -}; -#else /* !INVARIANTS */ -struct freelist { - caddr_t next; -}; -#endif /* INVARIANTS */ +u_int vm_kmem_size; /* * malloc: @@ -139,17 +133,10 @@ malloc(size, type, flags) struct malloc_type *type; int flags; { - register struct kmembuckets *kbp; - register struct kmemusage *kup; - register struct freelist *freep; - long indx, npg, allocsize; int s; - caddr_t va, cp, savedlist; -#ifdef INVARIANTS - long *end, *lp; - int copysize; - const char *savedtype; -#endif + long indx; + caddr_t va; + uma_zone_t zone; register struct malloc_type *ksp = type; #if defined(INVARIANTS) @@ -157,124 +144,52 @@ malloc(size, type, flags) KASSERT(curthread->td_intr_nesting_level == 0, ("malloc(M_WAITOK) in interrupt context")); #endif - indx = BUCKETINDX(size); - kbp = &bucket[indx]; s = splmem(); - mtx_lock(&malloc_mtx); + /* mtx_lock(&malloc_mtx); XXX */ while (ksp->ks_memuse >= ksp->ks_limit) { if (flags & M_NOWAIT) { splx(s); - mtx_unlock(&malloc_mtx); + /* mtx_unlock(&malloc_mtx); XXX */ return ((void *) NULL); } if (ksp->ks_limblocks < 65535) ksp->ks_limblocks++; - msleep((caddr_t)ksp, &malloc_mtx, PSWP+2, type->ks_shortdesc, + msleep((caddr_t)ksp, /* &malloc_mtx */ NULL, PSWP+2, type->ks_shortdesc, 0); } - ksp->ks_size |= 1 << indx; -#ifdef INVARIANTS - copysize = 1 << indx < MAX_COPY ? 1 << indx : MAX_COPY; -#endif - if (kbp->kb_next == NULL) { - kbp->kb_last = NULL; - if (size > MAXALLOCSAVE) - allocsize = roundup(size, PAGE_SIZE); - else - allocsize = 1 << indx; - npg = btoc(allocsize); - - mtx_unlock(&malloc_mtx); - va = (caddr_t) kmem_malloc(kmem_map, (vm_size_t)ctob(npg), flags); - + /* mtx_unlock(&malloc_mtx); XXX */ + + if (size <= KMEM_ZMAX) { + indx = size; + if (indx & KMEM_ZMASK) + indx = (indx & ~KMEM_ZMASK) + KMEM_ZBASE; + zone = kmemzones[indx >> KMEM_ZSHIFT]; + indx = zone->uz_size; + va = uma_zalloc(zone, flags); if (va == NULL) { - splx(s); - return ((void *) NULL); - } - /* - * Enter malloc_mtx after the error check to avoid having to - * immediately exit it again if there is an error. - */ - mtx_lock(&malloc_mtx); - - kbp->kb_total += kbp->kb_elmpercl; - kup = btokup(va); - kup->ku_indx = indx; - if (allocsize > MAXALLOCSAVE) { - if (npg > 65535) - panic("malloc: allocation too large"); - kup->ku_pagecnt = npg; - ksp->ks_memuse += allocsize; + /* mtx_lock(&malloc_mtx); XXX */ goto out; } - kup->ku_freecnt = kbp->kb_elmpercl; - kbp->kb_totalfree += kbp->kb_elmpercl; - /* - * Just in case we blocked while allocating memory, - * and someone else also allocated memory for this - * bucket, don't assume the list is still empty. - */ - savedlist = kbp->kb_next; - kbp->kb_next = cp = va + (npg * PAGE_SIZE) - allocsize; - for (;;) { - freep = (struct freelist *)cp; -#ifdef INVARIANTS - /* - * Copy in known text to detect modification - * after freeing. - */ - end = (long *)&cp[copysize]; - for (lp = (long *)cp; lp < end; lp++) - *lp = WEIRD_ADDR; - freep->type = M_FREE; -#endif /* INVARIANTS */ - if (cp <= va) - break; - cp -= allocsize; - freep->next = cp; + ksp->ks_size |= indx; + } else { + /* XXX This is not the next power of two so this will break ks_size */ + indx = roundup(size, PAGE_SIZE); + zone = NULL; + va = uma_large_malloc(size, flags); + if (va == NULL) { + /* mtx_lock(&malloc_mtx); XXX */ + goto out; } - freep->next = savedlist; - if (kbp->kb_last == NULL) - kbp->kb_last = (caddr_t)freep; - } - va = kbp->kb_next; - kbp->kb_next = ((struct freelist *)va)->next; -#ifdef INVARIANTS - freep = (struct freelist *)va; - savedtype = (const char *) freep->type->ks_shortdesc; - freep->type = (struct malloc_type *)WEIRD_ADDR; - if ((intptr_t)(void *)&freep->next & 0x2) - freep->next = (caddr_t)((WEIRD_ADDR >> 16)|(WEIRD_ADDR << 16)); - else - freep->next = (caddr_t)WEIRD_ADDR; - end = (long *)&va[copysize]; - for (lp = (long *)va; lp < end; lp++) { - if (*lp == WEIRD_ADDR) - continue; - printf("%s %ld of object %p size %lu %s %s (0x%lx != 0x%lx)\n", - "Data modified on freelist: word", - (long)(lp - (long *)va), (void *)va, size, - "previous type", savedtype, *lp, (u_long)WEIRD_ADDR); - break; } - freep->spare0 = 0; -#endif /* INVARIANTS */ - kup = btokup(va); - if (kup->ku_indx != indx) - panic("malloc: wrong bucket"); - if (kup->ku_freecnt == 0) - panic("malloc: lost data"); - kup->ku_freecnt--; - kbp->kb_totalfree--; - ksp->ks_memuse += 1 << indx; -out: - kbp->kb_calls++; + /* mtx_lock(&malloc_mtx); XXX */ + ksp->ks_memuse += indx; ksp->ks_inuse++; +out: ksp->ks_calls++; if (ksp->ks_memuse > ksp->ks_maxused) ksp->ks_maxused = ksp->ks_memuse; splx(s); - mtx_unlock(&malloc_mtx); + /* mtx_unlock(&malloc_mtx); XXX */ /* XXX: Do idle pre-zeroing. */ if (va != NULL && (flags & M_ZERO)) bzero(va, size); @@ -293,124 +208,41 @@ free(addr, type) void *addr; struct malloc_type *type; { - register struct kmembuckets *kbp; - register struct kmemusage *kup; - register struct freelist *freep; - long size; + uma_slab_t slab; + void *mem; + u_long size; int s; -#ifdef INVARIANTS - struct freelist *fp; - long *end, *lp, alloc, copysize; -#endif register struct malloc_type *ksp = type; /* free(NULL, ...) does nothing */ if (addr == NULL) return; - KASSERT(kmembase <= (char *)addr && (char *)addr < kmemlimit, - ("free: address %p out of range", (void *)addr)); - kup = btokup(addr); - size = 1 << kup->ku_indx; - kbp = &bucket[kup->ku_indx]; + size = 0; s = splmem(); - mtx_lock(&malloc_mtx); -#ifdef INVARIANTS - /* - * Check for returns of data that do not point to the - * beginning of the allocation. - */ - if (size > PAGE_SIZE) - alloc = addrmask[BUCKETINDX(PAGE_SIZE)]; - else - alloc = addrmask[kup->ku_indx]; - if (((uintptr_t)(void *)addr & alloc) != 0) - panic("free: unaligned addr %p, size %ld, type %s, mask %ld", - (void *)addr, size, type->ks_shortdesc, alloc); -#endif /* INVARIANTS */ - if (size > MAXALLOCSAVE) { - mtx_unlock(&malloc_mtx); - kmem_free(kmem_map, (vm_offset_t)addr, ctob(kup->ku_pagecnt)); - mtx_lock(&malloc_mtx); - - size = kup->ku_pagecnt << PAGE_SHIFT; - ksp->ks_memuse -= size; - kup->ku_indx = 0; - kup->ku_pagecnt = 0; - if (ksp->ks_memuse + size >= ksp->ks_limit && - ksp->ks_memuse < ksp->ks_limit) - wakeup((caddr_t)ksp); - ksp->ks_inuse--; - kbp->kb_total -= 1; - splx(s); - mtx_unlock(&malloc_mtx); - return; - } - freep = (struct freelist *)addr; -#ifdef INVARIANTS - /* - * Check for multiple frees. Use a quick check to see if - * it looks free before laboriously searching the freelist. - */ - if (freep->spare0 == WEIRD_ADDR) { - fp = (struct freelist *)kbp->kb_next; - while (fp) { - if (fp->spare0 != WEIRD_ADDR) - panic("free: free item %p modified", fp); - else if (addr == (caddr_t)fp) - panic("free: multiple freed item %p", addr); - fp = (struct freelist *)fp->next; - } - } - /* - * Copy in known text to detect modification after freeing - * and to make it look free. Also, save the type being freed - * so we can list likely culprit if modification is detected - * when the object is reallocated. - */ - copysize = size < MAX_COPY ? size : MAX_COPY; - end = (long *)&((caddr_t)addr)[copysize]; - for (lp = (long *)addr; lp < end; lp++) - *lp = WEIRD_ADDR; - freep->type = type; -#endif /* INVARIANTS */ - kup->ku_freecnt++; - if (kup->ku_freecnt >= kbp->kb_elmpercl) { - if (kup->ku_freecnt > kbp->kb_elmpercl) - panic("free: multiple frees"); - else if (kbp->kb_totalfree > kbp->kb_highwat) - kbp->kb_couldfree++; + + mem = (void *)((u_long)addr & (~UMA_SLAB_MASK)); + slab = hash_sfind(mallochash, mem); + + if (slab == NULL) + panic("free: address %p(%p) has not been allocated.\n", addr, mem); + + if (!(slab->us_flags & UMA_SLAB_MALLOC)) { + size = slab->us_zone->uz_size; + uma_zfree_arg(slab->us_zone, addr, slab); + } else { + size = slab->us_size; + uma_large_free(slab); } - kbp->kb_totalfree++; + /* mtx_lock(&malloc_mtx); XXX */ + ksp->ks_memuse -= size; if (ksp->ks_memuse + size >= ksp->ks_limit && ksp->ks_memuse < ksp->ks_limit) wakeup((caddr_t)ksp); ksp->ks_inuse--; -#ifdef OLD_MALLOC_MEMORY_POLICY - if (kbp->kb_next == NULL) - kbp->kb_next = addr; - else - ((struct freelist *)kbp->kb_last)->next = addr; - freep->next = NULL; - kbp->kb_last = addr; -#else - /* - * Return memory to the head of the queue for quick reuse. This - * can improve performance by improving the probability of the - * item being in the cache when it is reused. - */ - if (kbp->kb_next == NULL) { - kbp->kb_next = addr; - kbp->kb_last = addr; - freep->next = NULL; - } else { - freep->next = kbp->kb_next; - kbp->kb_next = addr; - } -#endif splx(s); - mtx_unlock(&malloc_mtx); + /* mtx_unlock(&malloc_mtx); XXX */ } /* @@ -423,7 +255,7 @@ realloc(addr, size, type, flags) struct malloc_type *type; int flags; { - struct kmemusage *kup; + uma_slab_t slab; unsigned long alloc; void *newaddr; @@ -431,15 +263,18 @@ realloc(addr, size, type, flags) if (addr == NULL) return (malloc(size, type, flags)); + slab = hash_sfind(mallochash, + (void *)((u_long)addr & ~(UMA_SLAB_MASK))); + /* Sanity check */ - KASSERT(kmembase <= (char *)addr && (char *)addr < kmemlimit, + KASSERT(slab != NULL, ("realloc: address %p out of range", (void *)addr)); /* Get the size of the original block */ - kup = btokup(addr); - alloc = 1 << kup->ku_indx; - if (alloc > MAXALLOCSAVE) - alloc = kup->ku_pagecnt << PAGE_SHIFT; + if (slab->us_zone) + alloc = slab->us_zone->uz_size; + else + alloc = slab->us_size; /* Reuse the original block if appropriate */ if (size <= alloc @@ -484,16 +319,11 @@ kmeminit(dummy) register long indx; u_long npg; u_long mem_size; - -#if ((MAXALLOCSAVE & (MAXALLOCSAVE - 1)) != 0) -#error "kmeminit: MAXALLOCSAVE not power of 2" -#endif -#if (MAXALLOCSAVE > MINALLOCSIZE * 32768) -#error "kmeminit: MAXALLOCSAVE too big" -#endif -#if (MAXALLOCSAVE < PAGE_SIZE) -#error "kmeminit: MAXALLOCSAVE too small" -#endif + void *hashmem; + u_long hashsize; + int highbit; + int bits; + int i; mtx_init(&malloc_mtx, "malloc", MTX_DEF); @@ -544,17 +374,36 @@ kmeminit(dummy) npg = (nmbufs * MSIZE + nmbclusters * MCLBYTES + nmbcnt * sizeof(u_int) + vm_kmem_size) / PAGE_SIZE; - kmemusage = (struct kmemusage *) kmem_alloc(kernel_map, - (vm_size_t)(npg * sizeof(struct kmemusage))); kmem_map = kmem_suballoc(kernel_map, (vm_offset_t *)&kmembase, (vm_offset_t *)&kmemlimit, (vm_size_t)(npg * PAGE_SIZE)); kmem_map->system_map = 1; - for (indx = 0; indx < MINBUCKET + 16; indx++) { - if (1 << indx >= PAGE_SIZE) - bucket[indx].kb_elmpercl = 1; - else - bucket[indx].kb_elmpercl = PAGE_SIZE / (1 << indx); - bucket[indx].kb_highwat = 5 * bucket[indx].kb_elmpercl; + + hashsize = npg * sizeof(void *); + + highbit = 0; + bits = 0; + /* The hash size must be a power of two */ + for (i = 0; i < 8 * sizeof(hashsize); i++) + if (hashsize & (1 << i)) { + highbit = i; + bits++; + } + if (bits > 1) + hashsize = 1 << (highbit); + + hashmem = (void *)kmem_alloc(kernel_map, (vm_size_t)hashsize); + uma_startup2(hashmem, hashsize / sizeof(void *)); + + for (i = 0, indx = 0; kmemsizes[indx].size != 0; indx++) { + uma_zone_t zone; + int size = kmemsizes[indx].size; + char *name = kmemsizes[indx].name; + + zone = uma_zcreate(name, size, NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, UMA_ZONE_MALLOC); + for (;i <= size; i+= KMEM_ZBASE) + kmemzones[i >> KMEM_ZSHIFT] = zone; + } } @@ -588,12 +437,6 @@ malloc_uninit(data) { struct malloc_type *type = (struct malloc_type *)data; struct malloc_type *t; -#ifdef INVARIANTS - struct kmembuckets *kbp; - struct freelist *freep; - long indx; - int s; -#endif if (type->ks_magic != M_MAGIC) panic("malloc type lacks magic"); @@ -604,26 +447,6 @@ malloc_uninit(data) if (type->ks_limit == 0) panic("malloc_uninit on uninitialized type"); -#ifdef INVARIANTS - s = splmem(); - mtx_lock(&malloc_mtx); - for (indx = 0; indx < MINBUCKET + 16; indx++) { - kbp = bucket + indx; - freep = (struct freelist*)kbp->kb_next; - while (freep) { - if (freep->type == type) - freep->type = M_FREE; - freep = (struct freelist*)freep->next; - } - } - splx(s); - mtx_unlock(&malloc_mtx); - - if (type->ks_memuse != 0) - printf("malloc_uninit: %ld bytes of '%s' still allocated\n", - type->ks_memuse, type->ks_shortdesc); -#endif - if (type == kmemstatistics) kmemstatistics = type->ks_next; else { diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c index 70a6a38..1b85e3a 100644 --- a/sys/kern/sys_pipe.c +++ b/sys/kern/sys_pipe.c @@ -182,8 +182,8 @@ SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL); static void pipeinit(void *dummy __unused) { - - pipe_zone = zinit("PIPE", sizeof(struct pipe), 0, 0, 4); + pipe_zone = uma_zcreate("PIPE", sizeof(struct pipe), NULL, + NULL, NULL, NULL, UMA_ALIGN_PTR, 0); } /* diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index b9ff2de..31bb9c0 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -80,7 +80,7 @@ static struct filterops soread_filtops = static struct filterops sowrite_filtops = { 1, NULL, filt_sowdetach, filt_sowrite }; -struct vm_zone *socket_zone; +vm_zone_t socket_zone; so_gen_t so_gencnt; /* generation count for sockets */ MALLOC_DEFINE(M_SONAME, "soname", "socket name"); diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index 5f46c89..7246d16 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -61,7 +61,7 @@ #include <vm/vm_zone.h> -static struct vm_zone *unp_zone; +static vm_zone_t unp_zone; static unp_gen_t unp_gencnt; static u_int unp_count; @@ -1363,7 +1363,7 @@ unp_gc() * * 91/09/19, bsy@cs.cmu.edu */ - extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK); + extra_ref = malloc(nfiles * sizeof(struct file *), M_TEMP, M_WAITOK); sx_slock(&filelist_lock); for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; fp != 0; fp = nextfp) { @@ -1404,7 +1404,7 @@ unp_gc() } for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) closef(*fpp, (struct thread *) NULL); - free((caddr_t)extra_ref, M_FILE); + free((caddr_t)extra_ref, M_TEMP); unp_gcing = 0; } diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c index e1e6cc0..eab474e 100644 --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -60,13 +60,14 @@ /* * Allocation zone for namei */ -struct vm_zone *namei_zone; +vm_zone_t namei_zone; static void nameiinit(void *dummy __unused) { + namei_zone = uma_zcreate("NAMEI", MAXPATHLEN, NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, 0); - namei_zone = zinit("NAMEI", MAXPATHLEN, 0, 0, 2); } SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nameiinit, NULL) diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h index 00392b0..187c677 100644 --- a/sys/netinet/in_pcb.h +++ b/sys/netinet/in_pcb.h @@ -41,6 +41,7 @@ #include <net/route.h> #include <netinet6/ipsec.h> /* for IPSEC */ +#include <vm/vm_zone.h> #define in6pcb inpcb /* for KAME src sync over BSD*'s */ #define in6p_sp inp_sp /* for KAME src sync over BSD*'s */ @@ -241,7 +242,7 @@ struct inpcbinfo { /* XXX documentation, prefixes */ u_short lastport; u_short lastlow; u_short lasthi; - struct vm_zone *ipi_zone; /* zone to allocate pcbs from */ + vm_zone_t ipi_zone; /* zone to allocate pcbs from */ u_int ipi_count; /* number of pcbs in this list */ u_quad_t ipi_gencnt; /* current generation count */ }; diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index 9ec0804..3459fa1 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -114,7 +114,7 @@ static struct syncache *syncookie_lookup(struct in_conninfo *, struct tcp_syncache { struct syncache_head *hashbase; - struct vm_zone *zone; + vm_zone_t zone; u_int hashsize; u_int hashmask; u_int bucket_limit; diff --git a/sys/powerpc/aim/mmu_oea.c b/sys/powerpc/aim/mmu_oea.c index 6346311..c12b0a7 100644 --- a/sys/powerpc/aim/mmu_oea.c +++ b/sys/powerpc/aim/mmu_oea.c @@ -249,6 +249,8 @@ vm_zone_t pmap_upvo_zone; /* zone for pvo entries for unmanaged pages */ vm_zone_t pmap_mpvo_zone; /* zone for pvo entries for managed pages */ struct vm_object pmap_upvo_zone_obj; struct vm_object pmap_mpvo_zone_obj; +static vm_object_t pmap_pvo_obj; +static u_int pmap_pvo_count; #define PMAP_PVO_SIZE 1024 static struct pvo_entry *pmap_bpvo_pool; @@ -312,6 +314,7 @@ static struct pte *pmap_pvo_to_pte(const struct pvo_entry *, int); /* * Utility routines. */ +static void * pmap_pvo_allocf(uma_zone_t, int, u_int8_t *, int); static struct pvo_entry *pmap_rkva_alloc(void); static void pmap_pa_map(struct pvo_entry *, vm_offset_t, struct pte *, int *); @@ -934,10 +937,14 @@ pmap_init2(void) CTR(KTR_PMAP, "pmap_init2"); + pmap_pvo_obj = vm_object_allocate(OBJT_PHYS, 16); + pmap_pvo_count = 0; pmap_upvo_zone = zinit("UPVO entry", sizeof (struct pvo_entry), 0, 0, 0); + uma_zone_set_allocf(pmap_upvo_zone, pmap_pvo_allocf); pmap_mpvo_zone = zinit("MPVO entry", sizeof(struct pvo_entry), PMAP_PVO_SIZE, ZONE_INTERRUPT, 1); + uma_zone_set_allocf(pmap_mpvo_zone, pmap_pvo_allocf); pmap_initialized = TRUE; } @@ -1854,6 +1861,22 @@ pmap_pvo_to_pte(const struct pvo_entry *pvo, int pteidx) return (NULL); } +static void * +pmap_pvo_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +{ + vm_page_t m; + + if (bytes != PAGE_SIZE) + panic("pmap_pvo_allocf: benno was shortsighted. hit him."); + + *flags = UMA_SLAB_PRIV; + m = vm_page_alloc(pmap_pvo_obj, pmap_pvo_count, VM_ALLOC_SYSTEM); + pmap_pvo_count++; + if (m == NULL) + return (NULL); + return ((void *)VM_PAGE_TO_PHYS(m)); +} + /* * XXX: THIS STUFF SHOULD BE IN pte.c? */ diff --git a/sys/powerpc/powerpc/mmu_oea.c b/sys/powerpc/powerpc/mmu_oea.c index 6346311..c12b0a7 100644 --- a/sys/powerpc/powerpc/mmu_oea.c +++ b/sys/powerpc/powerpc/mmu_oea.c @@ -249,6 +249,8 @@ vm_zone_t pmap_upvo_zone; /* zone for pvo entries for unmanaged pages */ vm_zone_t pmap_mpvo_zone; /* zone for pvo entries for managed pages */ struct vm_object pmap_upvo_zone_obj; struct vm_object pmap_mpvo_zone_obj; +static vm_object_t pmap_pvo_obj; +static u_int pmap_pvo_count; #define PMAP_PVO_SIZE 1024 static struct pvo_entry *pmap_bpvo_pool; @@ -312,6 +314,7 @@ static struct pte *pmap_pvo_to_pte(const struct pvo_entry *, int); /* * Utility routines. */ +static void * pmap_pvo_allocf(uma_zone_t, int, u_int8_t *, int); static struct pvo_entry *pmap_rkva_alloc(void); static void pmap_pa_map(struct pvo_entry *, vm_offset_t, struct pte *, int *); @@ -934,10 +937,14 @@ pmap_init2(void) CTR(KTR_PMAP, "pmap_init2"); + pmap_pvo_obj = vm_object_allocate(OBJT_PHYS, 16); + pmap_pvo_count = 0; pmap_upvo_zone = zinit("UPVO entry", sizeof (struct pvo_entry), 0, 0, 0); + uma_zone_set_allocf(pmap_upvo_zone, pmap_pvo_allocf); pmap_mpvo_zone = zinit("MPVO entry", sizeof(struct pvo_entry), PMAP_PVO_SIZE, ZONE_INTERRUPT, 1); + uma_zone_set_allocf(pmap_mpvo_zone, pmap_pvo_allocf); pmap_initialized = TRUE; } @@ -1854,6 +1861,22 @@ pmap_pvo_to_pte(const struct pvo_entry *pvo, int pteidx) return (NULL); } +static void * +pmap_pvo_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +{ + vm_page_t m; + + if (bytes != PAGE_SIZE) + panic("pmap_pvo_allocf: benno was shortsighted. hit him."); + + *flags = UMA_SLAB_PRIV; + m = vm_page_alloc(pmap_pvo_obj, pmap_pvo_count, VM_ALLOC_SYSTEM); + pmap_pvo_count++; + if (m == NULL) + return (NULL); + return ((void *)VM_PAGE_TO_PHYS(m)); +} + /* * XXX: THIS STUFF SHOULD BE IN pte.c? */ diff --git a/sys/powerpc/powerpc/pmap.c b/sys/powerpc/powerpc/pmap.c index 6346311..c12b0a7 100644 --- a/sys/powerpc/powerpc/pmap.c +++ b/sys/powerpc/powerpc/pmap.c @@ -249,6 +249,8 @@ vm_zone_t pmap_upvo_zone; /* zone for pvo entries for unmanaged pages */ vm_zone_t pmap_mpvo_zone; /* zone for pvo entries for managed pages */ struct vm_object pmap_upvo_zone_obj; struct vm_object pmap_mpvo_zone_obj; +static vm_object_t pmap_pvo_obj; +static u_int pmap_pvo_count; #define PMAP_PVO_SIZE 1024 static struct pvo_entry *pmap_bpvo_pool; @@ -312,6 +314,7 @@ static struct pte *pmap_pvo_to_pte(const struct pvo_entry *, int); /* * Utility routines. */ +static void * pmap_pvo_allocf(uma_zone_t, int, u_int8_t *, int); static struct pvo_entry *pmap_rkva_alloc(void); static void pmap_pa_map(struct pvo_entry *, vm_offset_t, struct pte *, int *); @@ -934,10 +937,14 @@ pmap_init2(void) CTR(KTR_PMAP, "pmap_init2"); + pmap_pvo_obj = vm_object_allocate(OBJT_PHYS, 16); + pmap_pvo_count = 0; pmap_upvo_zone = zinit("UPVO entry", sizeof (struct pvo_entry), 0, 0, 0); + uma_zone_set_allocf(pmap_upvo_zone, pmap_pvo_allocf); pmap_mpvo_zone = zinit("MPVO entry", sizeof(struct pvo_entry), PMAP_PVO_SIZE, ZONE_INTERRUPT, 1); + uma_zone_set_allocf(pmap_mpvo_zone, pmap_pvo_allocf); pmap_initialized = TRUE; } @@ -1854,6 +1861,22 @@ pmap_pvo_to_pte(const struct pvo_entry *pvo, int pteidx) return (NULL); } +static void * +pmap_pvo_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +{ + vm_page_t m; + + if (bytes != PAGE_SIZE) + panic("pmap_pvo_allocf: benno was shortsighted. hit him."); + + *flags = UMA_SLAB_PRIV; + m = vm_page_alloc(pmap_pvo_obj, pmap_pvo_count, VM_ALLOC_SYSTEM); + pmap_pvo_count++; + if (m == NULL) + return (NULL); + return ((void *)VM_PAGE_TO_PHYS(m)); +} + /* * XXX: THIS STUFF SHOULD BE IN pte.c? */ diff --git a/sys/sparc64/include/pv.h b/sys/sparc64/include/pv.h index 44262a5..732ef03 100644 --- a/sys/sparc64/include/pv.h +++ b/sys/sparc64/include/pv.h @@ -48,6 +48,7 @@ extern int pv_entry_max; extern int pv_entry_high_water; extern struct pv_entry *pvinit; +void *pv_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); pv_entry_t pv_alloc(void); void pv_free(pv_entry_t pv); diff --git a/sys/sparc64/sparc64/pmap.c b/sys/sparc64/sparc64/pmap.c index 8efc097..3323861 100644 --- a/sys/sparc64/sparc64/pmap.c +++ b/sys/sparc64/sparc64/pmap.c @@ -543,11 +543,17 @@ pmap_init(vm_offset_t phys_start, vm_offset_t phys_end) panic("pmap_init: vm_map_find"); } +#if 0 pvzone = &pvzone_store; pvinit = (struct pv_entry *)kmem_alloc(kernel_map, vm_page_array_size * sizeof (struct pv_entry)); zbootinit(pvzone, "PV ENTRY", sizeof (struct pv_entry), pvinit, vm_page_array_size); +#else + pvzone = zinit("PV ENTRY", sizeof (struct pv_entry), 0, 0, 0); + uma_zone_set_allocf(pvzone, pv_allocf); + uma_prealloc(pvzone, vm_page_array_size); +#endif pmap_initialized = TRUE; } @@ -565,7 +571,11 @@ pmap_init2(void) TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); pv_entry_max = shpgperproc * maxproc + vm_page_array_size; pv_entry_high_water = 9 * (pv_entry_max / 10); +#if 0 zinitna(pvzone, &pvzone_obj, NULL, 0, pv_entry_max, ZONE_INTERRUPT, 1); +#else + uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max); +#endif } /* diff --git a/sys/sparc64/sparc64/pv.c b/sys/sparc64/sparc64/pv.c index c970630..1b3b2ed 100644 --- a/sys/sparc64/sparc64/pv.c +++ b/sys/sparc64/sparc64/pv.c @@ -61,7 +61,9 @@ #include <machine/tsb.h> vm_zone_t pvzone; +#if 0 struct vm_zone pvzone_store; +#endif struct vm_object pvzone_obj; int pv_entry_count; int pv_entry_max; @@ -81,6 +83,14 @@ pv_alloc(void) return (zalloc(pvzone)); } +void * +pv_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +{ + + *flags = UMA_SLAB_PRIV; + return (void *)kmem_alloc(kernel_map, bytes); +} + void pv_free(pv_entry_t pv) { diff --git a/sys/sys/malloc.h b/sys/sys/malloc.h index 5f3ec3d..fa64f71 100644 --- a/sys/sys/malloc.h +++ b/sys/sys/malloc.h @@ -37,8 +37,12 @@ #ifndef _SYS_MALLOC_H_ #define _SYS_MALLOC_H_ +#include <vm/uma.h> + #define splmem splhigh +#define MINALLOCSIZE UMA_SMALLEST_UNIT + /* * flags to malloc. */ @@ -80,74 +84,6 @@ MALLOC_DECLARE(M_TEMP); MALLOC_DECLARE(M_IP6OPT); /* for INET6 */ MALLOC_DECLARE(M_IP6NDP); /* for INET6 */ -#endif /* _KERNEL */ - -/* - * Array of descriptors that describe the contents of each page - */ -struct kmemusage { - short ku_indx; /* bucket index */ - union { - u_short freecnt;/* for small allocations, free pieces in page */ - u_short pagecnt;/* for large allocations, pages alloced */ - } ku_un; -}; -#define ku_freecnt ku_un.freecnt -#define ku_pagecnt ku_un.pagecnt - -/* - * Set of buckets for each size of memory block that is retained - */ -struct kmembuckets { - caddr_t kb_next; /* list of free blocks */ - caddr_t kb_last; /* last free block */ - int64_t kb_calls; /* total calls to allocate this size */ - long kb_total; /* total number of blocks allocated */ - long kb_elmpercl; /* # of elements in this sized allocation */ - long kb_totalfree; /* # of free elements in this bucket */ - long kb_highwat; /* high water mark */ - long kb_couldfree; /* over high water mark and could free */ -}; - -#ifdef _KERNEL -#define MINALLOCSIZE (1 << MINBUCKET) -#define BUCKETINDX(size) \ - ((size) <= (MINALLOCSIZE * 128) \ - ? (size) <= (MINALLOCSIZE * 8) \ - ? (size) <= (MINALLOCSIZE * 2) \ - ? (size) <= (MINALLOCSIZE * 1) \ - ? (MINBUCKET + 0) \ - : (MINBUCKET + 1) \ - : (size) <= (MINALLOCSIZE * 4) \ - ? (MINBUCKET + 2) \ - : (MINBUCKET + 3) \ - : (size) <= (MINALLOCSIZE* 32) \ - ? (size) <= (MINALLOCSIZE * 16) \ - ? (MINBUCKET + 4) \ - : (MINBUCKET + 5) \ - : (size) <= (MINALLOCSIZE * 64) \ - ? (MINBUCKET + 6) \ - : (MINBUCKET + 7) \ - : (size) <= (MINALLOCSIZE * 2048) \ - ? (size) <= (MINALLOCSIZE * 512) \ - ? (size) <= (MINALLOCSIZE * 256) \ - ? (MINBUCKET + 8) \ - : (MINBUCKET + 9) \ - : (size) <= (MINALLOCSIZE * 1024) \ - ? (MINBUCKET + 10) \ - : (MINBUCKET + 11) \ - : (size) <= (MINALLOCSIZE * 8192) \ - ? (size) <= (MINALLOCSIZE * 4096) \ - ? (MINBUCKET + 12) \ - : (MINBUCKET + 13) \ - : (size) <= (MINALLOCSIZE * 16384) \ - ? (MINBUCKET + 14) \ - : (MINBUCKET + 15)) - -/* - * Turn virtual addresses into kmemusage pointers. - */ -#define btokup(addr) (&kmemusage[((caddr_t)(addr) - kmembase) >> PAGE_SHIFT]) /* * Deprecated macro versions of not-quite-malloc() and free(). diff --git a/sys/sys/proc.h b/sys/sys/proc.h index a458f66..9250e31 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -57,6 +57,7 @@ #endif #include <sys/ucred.h> #include <machine/proc.h> /* Machine-dependent proc substruct. */ +#include <vm/vm_zone.h> /* * One structure allocated per session. @@ -408,6 +409,7 @@ struct proc { struct proc *p_pptr; /* (c + e) Pointer to parent process. */ LIST_ENTRY(proc) p_sibling; /* (e) List of sibling processes. */ LIST_HEAD(, proc) p_children; /* (e) Pointer to list of children. */ + struct mtx p_mtx; /* (k) Lock for this struct. */ /* The following fields are all zeroed upon creation in fork. */ #define p_startzero p_oppid @@ -420,7 +422,6 @@ struct proc { struct vnode *p_tracep; /* (j?) Trace to vnode. */ sigset_t p_siglist; /* (c) Sigs arrived, not delivered. */ struct vnode *p_textvp; /* (b) Vnode of executable. */ - struct mtx p_mtx; /* (k) Lock for this struct. */ char p_lock; /* (c) Proclock (prevent swap) count. */ struct klist p_klist; /* (c) Knotes attached to this proc. */ struct sigiolst p_sigiolst; /* (c) List of sigio sources. */ @@ -701,7 +702,7 @@ extern struct proclist zombproc; /* List of zombie processes. */ extern struct proc *initproc, *pageproc; /* Process slots for init, pager. */ extern struct proc *updateproc; /* Process slot for syncer (sic). */ -extern struct vm_zone *proc_zone; +extern vm_zone_t proc_zone; extern int lastpid; diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h index c99153f..940c325 100644 --- a/sys/sys/socketvar.h +++ b/sys/sys/socketvar.h @@ -40,6 +40,7 @@ #include <sys/queue.h> /* for TAILQ macros */ #include <sys/sx.h> /* SX locks */ #include <sys/selinfo.h> /* for struct selinfo */ +#include <vm/vm_zone.h> /* * Kernel structure per socket. @@ -52,7 +53,7 @@ typedef u_quad_t so_gen_t; struct accept_filter; struct socket { - struct vm_zone *so_zone; /* zone we were allocated from */ + vm_zone_t so_zone; /* zone we were allocated from */ int so_count; /* reference count */ short so_type; /* generic type, see socket.h */ short so_options; /* from socket call, see socket.h */ @@ -319,7 +320,7 @@ MALLOC_DECLARE(M_ACCF); extern int maxsockets; extern u_long sb_max; -extern struct vm_zone *socket_zone; +extern vm_zone_t socket_zone; extern so_gen_t so_gencnt; struct file; diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index 49473d5..48ddec2 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -49,6 +49,7 @@ #include <sys/selinfo.h> #include <sys/uio.h> #include <sys/acl.h> +#include <vm/vm_zone.h> /* * The vnode is the focus of all file activity in UNIX. There is a @@ -302,7 +303,7 @@ extern int vttoif_tab[]; */ extern struct vnode *rootvnode; /* root (i.e. "/") vnode */ extern int desiredvnodes; /* number of vnodes desired */ -extern struct vm_zone *namei_zone; +extern vm_zone_t namei_zone; extern int prtactive; /* nonzero to call vprint() */ extern struct vattr va_null; /* predefined null vattr structure */ extern int vfs_ioopt; diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c index af52cd9..bf6b3d9 100644 --- a/sys/vm/device_pager.c +++ b/sys/vm/device_pager.c @@ -73,7 +73,9 @@ static struct mtx dev_pager_mtx; static vm_zone_t fakepg_zone; +#if 0 static struct vm_zone fakepg_zone_store; +#endif static vm_page_t dev_pager_getfake __P((vm_offset_t)); static void dev_pager_putfake __P((vm_page_t)); @@ -94,8 +96,11 @@ dev_pager_init() TAILQ_INIT(&dev_pager_object_list); sx_init(&dev_pager_sx, "dev_pager create"); mtx_init(&dev_pager_mtx, "dev_pager list", MTX_DEF); +#if 0 fakepg_zone = &fakepg_zone_store; zinitna(fakepg_zone, NULL, "DP fakepg", sizeof(struct vm_page), 0, 0, 2); +#endif + fakepg_zone = zinit("DP fakepg", sizeof(struct vm_page), 0, 0, 0); } static vm_object_t diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index 5e7bf2c..7b9c49a 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -320,15 +320,15 @@ swap_pager_swap_init() if (maxswzone && n > maxswzone / sizeof(struct swblock)) n = maxswzone / sizeof(struct swblock); n2 = n; + swap_zone = zinit( + "SWAPMETA", + sizeof(struct swblock), + n, + ZONE_INTERRUPT, + 1 + ); do { - swap_zone = zinit( - "SWAPMETA", - sizeof(struct swblock), - n, - ZONE_INTERRUPT, - 1 - ); - if (swap_zone != NULL) + if (uma_zone_set_obj(swap_zone, NULL, n)) break; /* * if the allocation failed, try a zone two thirds the diff --git a/sys/vm/uma.h b/sys/vm/uma.h new file mode 100644 index 0000000..be2c90b --- /dev/null +++ b/sys/vm/uma.h @@ -0,0 +1,420 @@ +/* + * Copyright (c) 2002, Jeffrey Roberson <jroberson@chesapeake.net> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +/* + * uma.h - External definitions for the Universal Memory Allocator + * + * Jeff Roberson <jroberson@chesapeake.net> +*/ + +#ifndef VM_UMA_H +#define VM_UMA_H + +#include <sys/param.h> /* For NULL */ +#include <sys/malloc.h> /* For M_* */ + +/* User visable parameters */ +#define UMA_SMALLEST_UNIT (PAGE_SIZE / 256) /* Smallest item allocated */ + +/* Types and type defs */ + +struct uma_zone; +/* Opaque type used as a handle to the zone */ +typedef struct uma_zone * uma_zone_t; + +/* + * Item constructor + * + * Arguments: + * item A pointer to the memory which has been allocated. + * arg The arg field passed to uma_zalloc_arg + * size The size of the allocated item + * + * Returns: + * Nothing + * + * Discussion: + * The constructor is called just before the memory is returned + * to the user. It may block if neccisary. + */ +typedef void (*uma_ctor)(void *mem, int size, void *arg); + +/* + * Item destructor + * + * Arguments: + * item A pointer to the memory which has been allocated. + * size The size of the item being destructed. + * arg Argument passed through uma_zfree_arg + * + * Returns: + * Nothing + * + * Discussion: + * The destructor may perform operations that differ from those performed + * by the initializer, but it must leave the object in the same state. + * This IS type stable storage. This is called after EVERY zfree call. + */ +typedef void (*uma_dtor)(void *mem, int size, void *arg); + +/* + * Item initializer + * + * Arguments: + * item A pointer to the memory which has been allocated. + * size The size of the item being initialized. + * + * Returns: + * Nothing + * + * Discussion: + * The initializer is called when the memory is cached in the uma zone. + * this should be the same state that the destructor leaves the object in. + */ +typedef void (*uma_init)(void *mem, int size); + +/* + * Item discard function + * + * Arguments: + * item A pointer to memory which has been 'freed' but has not left the + * zone's cache. + * size The size of the item being discarded. + * + * Returns: + * Nothing + * + * Discussion: + * This routine is called when memory leaves a zone and is returned to the + * system for other uses. It is the counter part to the init function. + */ +typedef void (*uma_fini)(void *mem, int size); + +/* + * What's the difference between initializing and constructing? + * + * The item is initialized when it is cached, and this is the state that the + * object should be in when returned to the allocator. The purpose of this is + * to remove some code which would otherwise be called on each allocation by + * utilizing a known, stable state. This differs from the constructor which + * will be called on EVERY allocation. + * + * For example, in the initializer you may want to initialize embeded locks, + * NULL list pointers, set up initial states, magic numbers, etc. This way if + * the object is held in the allocator and re-used it won't be neccisary to + * re-initialize it. + * + * The constructor may be used to lock a data structure, link it on to lists, + * bump reference counts or total counts of outstanding structures, etc. + * + */ + + +/* Function proto types */ + +/* + * Create a new uma zone + * + * Arguments: + * name The text name of the zone for debugging and stats, this memory + * should not be freed until the zone has been deallocated. + * size The size of the object that is being created. + * ctor The constructor that is called when the object is allocated + * dtor The destructor that is called when the object is freed. + * init An initializer that sets up the initial state of the memory. + * fini A discard function that undoes initialization done by init. + * ctor/dtor/init/fini may all be null, see notes above. + * align A bitmask that corisponds to the requested alignment + * eg 4 would be 0x3 + * flags A set of parameters that control the behavior of the zone + * + * Returns: + * A pointer to a structure which is intended to be opaque to users of + * the interface. The value may be null if the wait flag is not set. + */ + +uma_zone_t uma_zcreate(char *name, int size, uma_ctor ctor, uma_dtor dtor, + uma_init uminit, uma_fini fini, int align, + u_int16_t flags); + +/* Definitions for uma_zcreate flags */ +#define UMA_ZONE_PAGEABLE 0x0001 /* Return items not fully backed by + physical memory XXX Not yet */ +#define UMA_ZONE_ZINIT 0x0002 /* Initialize with zeros */ +#define UMA_ZONE_STATIC 0x0004 /* Staticly sized zone */ +#define UMA_ZONE_OFFPAGE 0x0008 /* Force the slab structure allocation + off of the real memory */ +#define UMA_ZONE_MALLOC 0x0010 /* For use by malloc(9) only! */ +#define UMA_ZONE_NOFREE 0x0020 /* Do not free slabs of this type! */ + +/* Definitions for align */ +#define UMA_ALIGN_PTR (sizeof(void *) - 1) /* Alignment fit for ptr */ +#define UMA_ALIGN_LONG (sizeof(long) - 1) /* "" long */ +#define UMA_ALIGN_INT (sizeof(int) - 1) /* "" int */ +#define UMA_ALIGN_SHORT (sizeof(short) - 1) /* "" short */ +#define UMA_ALIGN_CHAR (sizeof(char) - 1) /* "" char */ +#define UMA_ALIGN_CACHE (16 - 1) /* Cache line size align */ + +/* + * Destroys a uma zone + * + * Arguments: + * zone The zone we want to destroy. + * wait This flag indicates whether or not we should wait for all + * allocations to free, or return an errno on outstanding memory. + * + * Returns: + * 0 on successful completion, or EWOULDBLOCK if there are outstanding + * allocations and the wait flag is M_NOWAIT + */ + +int uma_zdestroy(uma_zone_t zone, int wait); + +/* + * Allocates an item out of a zone + * + * Arguments: + * zone The zone we are allocating from + * arg This data is passed to the ctor function + * wait This flag indicates whether or not we are allowed to block while + * allocating memory for this zone should we run out. + * + * Returns: + * A non null pointer to an initialized element from the zone is + * garanteed if the wait flag is M_WAITOK, otherwise a null pointer may be + * returned if the zone is empty or the ctor failed. + */ + +void *uma_zalloc_arg(uma_zone_t zone, void *arg, int wait); + +/* + * Allocates an item out of a zone without supplying an argument + * + * This is just a wrapper for uma_zalloc_arg for convenience. + * + */ +static __inline void *uma_zalloc(uma_zone_t zone, int wait); + +static __inline void * +uma_zalloc(uma_zone_t zone, int wait) +{ + return uma_zalloc_arg(zone, NULL, wait); +} + +/* + * Frees an item back into the specified zone. + * + * Arguments: + * zone The zone the item was originally allocated out of. + * item The memory to be freed. + * arg Argument passed to the destructor + * + * Returns: + * Nothing. + */ + +void uma_zfree_arg(uma_zone_t zone, void *item, void *arg); + +/* + * Frees an item back to a zone without supplying an argument + * + * This is just a wrapper for uma_zfree_arg for convenience. + * + */ +static __inline void uma_zfree(uma_zone_t zone, void *item); + +static __inline void +uma_zfree(uma_zone_t zone, void *item) +{ + return uma_zfree_arg(zone, item, NULL); +} + +/* + * XXX The rest of the prototypes in this header are h0h0 magic for the VM. + * If you think you need to use it for a normal zone you're probably incorrect. + */ + +/* + * Backend page supplier routines + * + * Arguments: + * zone The zone that is requesting pages + * size The number of bytes being requested + * pflag Flags for these memory pages, see below. + * wait Indicates our willingness to block. + * + * Returns: + * A pointer to the alloced memory or NULL on failure. + */ + +typedef void *(*uma_alloc)(uma_zone_t zone, int size, u_int8_t *pflag, int wait); + +/* + * Backend page free routines + * + * Arguments: + * item A pointer to the previously allocated pages + * size The original size of the allocation + * pflag The flags for the slab. See UMA_SLAB_* below + * + * Returns: + * None + */ +typedef void (*uma_free)(void *item, int size, u_int8_t pflag); + + + +/* + * Sets up the uma allocator. (Called by vm_mem_init) + * + * Arguments: + * bootmem A pointer to memory used to bootstrap the system. + * + * Returns: + * Nothing + * + * Discussion: + * This memory is used for zones which allocate things before the + * backend page supplier can give us pages. It should be + * UMA_SLAB_SIZE * UMA_BOOT_PAGES bytes. (see uma_int.h) + * + */ + +void uma_startup(void *bootmem); + +/* + * Finishes starting up the allocator. This should + * be called when kva is ready for normal allocs. + * + * Arguments: + * hash An area of memory that will become the malloc hash + * elems The number of elements in this array + * + * Returns: + * Nothing + * + * Discussion: + * uma_startup2 is called by kmeminit() to prepare the malloc + * hash bucket, and enable use of uma for malloc ops. + */ + +void uma_startup2(void *hash, u_long elems); + +/* + * Reclaims unused memory for all zones + * + * Arguments: + * None + * Returns: + * None + * + * This should only be called by the page out daemon. + */ + +void uma_reclaim(void); + +/* + * Switches the backing object of a zone + * + * Arguments: + * zone The zone to update + * obj The obj to use for future allocations + * size The size of the object to allocate + * + * Returns: + * 0 if kva space can not be allocated + * 1 if successful + * + * Discussion: + * A NULL object can be used and uma will allocate one for you. Setting + * the size will limit the amount of memory allocated to this zone. + * + */ +struct vm_object; +int uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int size); + + +/* + * Replaces the standard page_alloc or obj_alloc functions for this zone + * + * Arguments: + * zone The zone whos back end allocator is being changed. + * allocf A pointer to the allocation function + * + * Returns: + * Nothing + * + * Discussion: + * This could be used to implement pageable allocation, or perhaps + * even DMA allocators if used in conjunction with the OFFPAGE + * zone flag. + */ + +void uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf); + +/* + * Used for freeing memory provided by the allocf above + * + * Arguments: + * zone The zone that intends to use this free routine. + * freef The page freeing routine. + * + * Returns: + * Nothing + */ + +void uma_zone_set_freef(uma_zone_t zone, uma_free freef); + +/* + * These flags are setable in the allocf and visable in the freef. + */ +#define UMA_SLAB_BOOT 0x01 /* Slab alloced from boot pages */ +#define UMA_SLAB_KMEM 0x02 /* Slab alloced from kmem_map */ +#define UMA_SLAB_KMAP 0x04 /* Slab alloced from kernel_map */ +#define UMA_SLAB_PRIV 0x08 /* Slab alloced from priv allocator */ +#define UMA_SLAB_OFFP 0x10 /* Slab is managed seperately */ +#define UMA_SLAB_MALLOC 0x20 /* Slab is a large malloc slab */ +/* 0x40 and 0x80 are available */ + +/* + * Used to pre-fill a zone with some number of items + * + * Arguments: + * zone The zone to fill + * itemcnt The number of items to reserve + * + * Returns: + * Nothing + * + * NOTE: This is blocking and should only be done at startup + */ +void uma_prealloc(uma_zone_t zone, int itemcnt); + + +#endif diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c new file mode 100644 index 0000000..5b4be4b --- /dev/null +++ b/sys/vm/uma_core.c @@ -0,0 +1,1900 @@ +/* + * Copyright (c) 2002, Jeffrey Roberson <jroberson@chesapeake.net> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +/* + * uma_core.c Implementation of the Universal Memory allocator + * + * This allocator is intended to replace the multitude of similar object caches + * in the standard FreeBSD kernel. The intent is to be flexible as well as + * effecient. A primary design goal is to return unused memory to the rest of + * the system. This will make the system as a whole more flexible due to the + * ability to move memory to subsystems which most need it instead of leaving + * pools of reserved memory unused. + * + * The basic ideas stem from similar slab/zone based allocators whose algorithms + * are well known. + * + */ + +/* + * TODO: + * - Improve memory usage for large allocations + * - Improve INVARIANTS (0xdeadc0de write out) + * - Investigate cache size adjustments + */ + +/* I should really use ktr.. */ +/* +#define UMA_DEBUG 1 +#define UMA_DEBUG_ALLOC 1 +#define UMA_DEBUG_ALLOC_1 1 +*/ + + +#include "opt_param.h" +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/types.h> +#include <sys/queue.h> +#include <sys/malloc.h> +#include <sys/lock.h> +#include <sys/sysctl.h> +#include <machine/types.h> +#include <sys/mutex.h> +#include <sys/smp.h> + +#include <vm/vm.h> +#include <vm/vm_object.h> +#include <vm/vm_page.h> +#include <vm/vm_param.h> +#include <vm/vm_map.h> +#include <vm/vm_kern.h> +#include <vm/vm_extern.h> +#include <vm/uma.h> +#include <vm/uma_int.h> + +/* + * This is the zone from which all zones are spawned. The idea is that even + * the zone heads are allocated from the allocator, so we use the bss section + * to bootstrap us. + */ +static struct uma_zone master_zone; +static uma_zone_t zones = &master_zone; + +/* This is the zone from which all of uma_slab_t's are allocated. */ +static uma_zone_t slabzone; + +/* + * The initial hash tables come out of this zone so they can be allocated + * prior to malloc coming up. + */ +static uma_zone_t hashzone; + +/* + * Zone that buckets come from. + */ +static uma_zone_t bucketzone; + +/* Linked list of all zones in the system */ +static LIST_HEAD(,uma_zone) uma_zones = LIST_HEAD_INITIALIZER(&uma_zones); + +/* This mutex protects the zone list */ +static struct mtx uma_mtx; + +/* Linked list of boot time pages */ +static LIST_HEAD(,uma_slab) uma_boot_pages = + LIST_HEAD_INITIALIZER(&uma_boot_pages); + +/* Count of free boottime pages */ +static int uma_boot_free = 0; + +/* Is the VM done starting up? */ +static int booted = 0; + +/* This is the handle used to schedule our working set calculator */ +static struct callout uma_callout; + +/* This is mp_maxid + 1, for use while looping over each cpu */ +static int maxcpu; + +/* + * This structure is passed as the zone ctor arg so that I don't have to create + * a special allocation function just for zones. + */ +struct uma_zctor_args { + char *name; + int size; + uma_ctor ctor; + uma_dtor dtor; + uma_init uminit; + uma_fini fini; + int align; + u_int16_t flags; +}; + +/* + * This is the malloc hash table which is used to find the zone that a + * malloc allocation came from. It is not currently resizeable. The + * memory for the actual hash bucket is allocated in kmeminit. + */ +struct uma_hash mhash; +struct uma_hash *mallochash = &mhash; + +/* Prototypes.. */ + +static void *obj_alloc(uma_zone_t, int, u_int8_t *, int); +static void *page_alloc(uma_zone_t, int, u_int8_t *, int); +static void page_free(void *, int, u_int8_t); +static uma_slab_t slab_zalloc(uma_zone_t, int); +static void cache_drain(uma_zone_t); +static void bucket_drain(uma_zone_t, uma_bucket_t); +static void zone_drain(uma_zone_t); +static void zone_ctor(void *, int, void *); +static void zero_init(void *, int); +static void zone_small_init(uma_zone_t zone); +static void zone_large_init(uma_zone_t zone); +static void zone_foreach(void (*zfunc)(uma_zone_t)); +static void zone_timeout(uma_zone_t zone); +static void hash_expand(struct uma_hash *); +static void uma_timeout(void *); +static void uma_startup3(void); +static void *uma_zalloc_internal(uma_zone_t, void *, int, int *, int); +static void uma_zfree_internal(uma_zone_t, + void *, void *, int); +void uma_print_zone(uma_zone_t); +void uma_print_stats(void); +static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS); + +SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD, + NULL, 0, sysctl_vm_zone, "A", "Zone Info"); +SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL); + + +/* + * Routine called by timeout which is used to fire off some time interval + * based calculations. (working set, stats, etc.) + * + * Arguments: + * arg Unused + * + * Returns: + * Nothing + */ +static void +uma_timeout(void *unused) +{ + zone_foreach(zone_timeout); + + /* Reschedule this event */ + callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL); +} + +/* + * Routine to perform timeout driven calculations. This does the working set + * as well as hash expanding, and per cpu statistics aggregation. + * + * Arguments: + * zone The zone to operate on + * + * Returns: + * Nothing + */ +static void +zone_timeout(uma_zone_t zone) +{ + uma_cache_t cache; + u_int64_t alloc; + int free; + int cpu; + + alloc = 0; + free = 0; + + /* + * Aggregate per cpu cache statistics back to the zone. + * + * I may rewrite this to set a flag in the per cpu cache instead of + * locking. If the flag is not cleared on the next round I will have + * to lock and do it here instead so that the statistics don't get too + * far out of sync. + */ + if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) { + for (cpu = 0; cpu < maxcpu; cpu++) { + if (CPU_ABSENT(cpu)) + continue; + CPU_LOCK(zone, cpu); + cache = &zone->uz_cpu[cpu]; + /* Add them up, and reset */ + alloc += cache->uc_allocs; + cache->uc_allocs = 0; + if (cache->uc_allocbucket) + free += cache->uc_allocbucket->ub_ptr + 1; + if (cache->uc_freebucket) + free += cache->uc_freebucket->ub_ptr + 1; + CPU_UNLOCK(zone, cpu); + } + } + + /* Now push these stats back into the zone.. */ + ZONE_LOCK(zone); + zone->uz_allocs += alloc; + + /* + * cachefree is an instantanious snapshot of what is in the per cpu + * caches, not an accurate counter + */ + zone->uz_cachefree = free; + + /* + * Expand the zone hash table. + * + * This is done if the number of slabs is larger than the hash size. + * What I'm trying to do here is completely reduce collisions. This + * may be a little aggressive. Should I allow for two collisions max? + */ + + if ((zone->uz_flags & UMA_ZFLAG_OFFPAGE) && + !(zone->uz_flags & UMA_ZFLAG_MALLOC)) { + if (zone->uz_pages / zone->uz_ppera + >= zone->uz_hash.uh_hashsize) + hash_expand(&zone->uz_hash); + } + + /* + * Here we compute the working set size as the total number of items + * left outstanding since the last time interval. This is slightly + * suboptimal. What we really want is the highest number of outstanding + * items during the last time quantum. This should be close enough. + * + * The working set size is used to throttle the zone_drain function. + * We don't want to return memory that we may need again immediately. + */ + alloc = zone->uz_allocs - zone->uz_oallocs; + zone->uz_oallocs = zone->uz_allocs; + zone->uz_wssize = alloc; + + ZONE_UNLOCK(zone); +} + +/* + * Expands the hash table for OFFPAGE zones. This is done from zone_timeout + * to reduce collisions. This must not be done in the regular allocation path, + * otherwise, we can recurse on the vm while allocating pages. + * + * Arguments: + * hash The hash you want to expand by a factor of two. + * + * Returns: + * Nothing + * + * Discussion: + */ +static void +hash_expand(struct uma_hash *hash) +{ + struct slabhead *newhash; + struct slabhead *oldhash; + uma_slab_t slab; + int hzonefree; + int hashsize; + int alloc; + int hval; + int i; + + + /* + * Remember the old hash size and see if it has to go back to the + * hash zone, or malloc. The hash zone is used for the initial hash + */ + + hashsize = hash->uh_hashsize; + oldhash = hash->uh_slab_hash; + + if (hashsize == UMA_HASH_SIZE_INIT) + hzonefree = 1; + else + hzonefree = 0; + + + /* We're just going to go to a power of two greater */ + if (hash->uh_hashsize) { + alloc = sizeof(hash->uh_slab_hash[0]) * (hash->uh_hashsize * 2); + /* XXX Shouldn't be abusing DEVBUF here */ + newhash = (struct slabhead *)malloc(alloc, M_DEVBUF, M_NOWAIT); + if (newhash == NULL) { + return; + } + hash->uh_hashsize *= 2; + } else { + alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT; + newhash = uma_zalloc_internal(hashzone, NULL, M_WAITOK, NULL, -1); + hash->uh_hashsize = UMA_HASH_SIZE_INIT; + } + + bzero(newhash, alloc); + + hash->uh_hashmask = hash->uh_hashsize - 1; + + /* + * I need to investigate hash algorithms for resizing without a + * full rehash. + */ + + for (i = 0; i < hashsize; i++) + while (!SLIST_EMPTY(&hash->uh_slab_hash[i])) { + slab = SLIST_FIRST(&hash->uh_slab_hash[i]); + SLIST_REMOVE_HEAD(&hash->uh_slab_hash[i], us_hlink); + hval = UMA_HASH(hash, slab->us_data); + SLIST_INSERT_HEAD(&newhash[hval], slab, us_hlink); + } + + if (hash->uh_slab_hash) { + if (hzonefree) + uma_zfree_internal(hashzone, + hash->uh_slab_hash, NULL, 0); + else + free(hash->uh_slab_hash, M_DEVBUF); + } + hash->uh_slab_hash = newhash; + + return; +} + +/* + * Frees all outstanding items in a bucket + * + * Arguments: + * zone The zone to free to, must be unlocked. + * bucket The free/alloc bucket with items, cpu queue must be locked. + * + * Returns: + * Nothing + */ + +static void +bucket_drain(uma_zone_t zone, uma_bucket_t bucket) +{ + uma_slab_t slab; + int mzone; + void *item; + + if (bucket == NULL) + return; + + slab = NULL; + mzone = 0; + + /* We have to lookup the slab again for malloc.. */ + if (zone->uz_flags & UMA_ZFLAG_MALLOC) + mzone = 1; + + while (bucket->ub_ptr > -1) { + item = bucket->ub_bucket[bucket->ub_ptr]; +#ifdef INVARIANTS + bucket->ub_bucket[bucket->ub_ptr] = NULL; + KASSERT(item != NULL, + ("bucket_drain: botched ptr, item is NULL")); +#endif + bucket->ub_ptr--; + /* + * This is extremely inefficient. The slab pointer was passed + * to uma_zfree_arg, but we lost it because the buckets don't + * hold them. This will go away when free() gets a size passed + * to it. + */ + if (mzone) + slab = hash_sfind(mallochash, + (u_int8_t *)((unsigned long)item & + (~UMA_SLAB_MASK))); + uma_zfree_internal(zone, item, slab, 1); + } +} + +/* + * Drains the per cpu caches for a zone. + * + * Arguments: + * zone The zone to drain, must be unlocked. + * + * Returns: + * Nothing + * + * This function returns with the zone locked so that the per cpu queues can + * not be filled until zone_drain is finished. + * + */ +static void +cache_drain(uma_zone_t zone) +{ + uma_bucket_t bucket; + uma_cache_t cache; + int cpu; + + /* + * Flush out the per cpu queues. + * + * XXX This causes unneccisary thrashing due to immediately having + * empty per cpu queues. I need to improve this. + */ + + /* + * We have to lock each cpu cache before locking the zone + */ + ZONE_UNLOCK(zone); + + for (cpu = 0; cpu < maxcpu; cpu++) { + if (CPU_ABSENT(cpu)) + continue; + CPU_LOCK(zone, cpu); + cache = &zone->uz_cpu[cpu]; + bucket_drain(zone, cache->uc_allocbucket); + bucket_drain(zone, cache->uc_freebucket); + } + + /* + * Drain the bucket queues and free the buckets, we just keep two per + * cpu (alloc/free). + */ + ZONE_LOCK(zone); + while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) { + LIST_REMOVE(bucket, ub_link); + ZONE_UNLOCK(zone); + bucket_drain(zone, bucket); + uma_zfree_internal(bucketzone, bucket, NULL, 0); + ZONE_LOCK(zone); + } + + /* Now we do the free queue.. */ + while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) { + LIST_REMOVE(bucket, ub_link); + uma_zfree_internal(bucketzone, bucket, NULL, 0); + } + + /* We unlock here, but they will all block until the zone is unlocked */ + for (cpu = 0; cpu < maxcpu; cpu++) { + if (CPU_ABSENT(cpu)) + continue; + CPU_UNLOCK(zone, cpu); + } +} + +/* + * Frees pages from a zone back to the system. This is done on demand from + * the pageout daemon. + * + * Arguments: + * zone The zone to free pages from + * + * Returns: + * Nothing. + */ +static void +zone_drain(uma_zone_t zone) +{ + uma_slab_t slab; + uma_slab_t n; + u_int64_t extra; + u_int8_t flags; + u_int8_t *mem; + int i; + + /* + * We don't want to take pages from staticly allocated zones at this + * time + */ + if (zone->uz_flags & UMA_ZFLAG_NOFREE || zone->uz_freef == NULL) + return; + + ZONE_LOCK(zone); + + if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) + cache_drain(zone); + + if (zone->uz_free < zone->uz_wssize) + goto finished; +#ifdef UMA_DEBUG + printf("%s working set size: %llu free items: %u\n", + zone->uz_name, (unsigned long long)zone->uz_wssize, zone->uz_free); +#endif + extra = zone->uz_wssize - zone->uz_free; + extra /= zone->uz_ipers; + + /* extra is now the number of extra slabs that we can free */ + + if (extra == 0) + goto finished; + + slab = LIST_FIRST(&zone->uz_free_slab); + while (slab && extra) { + n = LIST_NEXT(slab, us_link); + + /* We have no where to free these to */ + if (slab->us_flags & UMA_SLAB_BOOT) { + slab = n; + continue; + } + + LIST_REMOVE(slab, us_link); + zone->uz_pages -= zone->uz_ppera; + zone->uz_free -= zone->uz_ipers; + if (zone->uz_fini) + for (i = 0; i < zone->uz_ipers; i++) + zone->uz_fini( + slab->us_data + (zone->uz_rsize * i), + zone->uz_size); + flags = slab->us_flags; + mem = slab->us_data; + if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) { + if (zone->uz_flags & UMA_ZFLAG_MALLOC) { + UMA_HASH_REMOVE(mallochash, + slab, slab->us_data); + } else { + UMA_HASH_REMOVE(&zone->uz_hash, + slab, slab->us_data); + } + uma_zfree_internal(slabzone, slab, NULL, 0); + } else if (zone->uz_flags & UMA_ZFLAG_MALLOC) + UMA_HASH_REMOVE(mallochash, slab, slab->us_data); +#ifdef UMA_DEBUG + printf("%s: Returning %d bytes.\n", + zone->uz_name, UMA_SLAB_SIZE * zone->uz_ppera); +#endif + zone->uz_freef(mem, UMA_SLAB_SIZE * zone->uz_ppera, flags); + + slab = n; + extra--; + } + +finished: + ZONE_UNLOCK(zone); +} + +/* + * Allocate a new slab for a zone. This does not insert the slab onto a list. + * + * Arguments: + * zone The zone to allocate slabs for + * wait Shall we wait? + * + * Returns: + * The slab that was allocated or NULL if there is no memory and the + * caller specified M_NOWAIT. + * + */ +static uma_slab_t +slab_zalloc(uma_zone_t zone, int wait) +{ + uma_slab_t slab; /* Starting slab */ + u_int8_t *mem; + u_int8_t flags; + int i; + +#ifdef UMA_DEBUG + printf("slab_zalloc: Allocating a new slab for %s\n", zone->uz_name); +#endif + + if (booted || (zone->uz_flags & UMA_ZFLAG_PRIVALLOC)) { + ZONE_UNLOCK(zone); + mtx_lock(&Giant); + slab = (uma_slab_t )zone->uz_allocf(zone, + zone->uz_ppera * UMA_SLAB_SIZE, &flags, wait); + mtx_unlock(&Giant); + ZONE_LOCK(zone); + if (slab != NULL) + slab->us_data = (u_int8_t *)slab; + else + return (NULL); + } else { + + if (zone->uz_ppera > 1) + panic("UMA: Attemping to allocate multiple pages before vm has started.\n"); + if (zone->uz_flags & UMA_ZFLAG_MALLOC) + panic("Mallocing before uma_startup2 has been called.\n"); + if (uma_boot_free == 0) + panic("UMA: Ran out of pre init pages, increase UMA_BOOT_PAGES\n"); + slab = LIST_FIRST(&uma_boot_pages); + LIST_REMOVE(slab, us_link); + uma_boot_free--; + } + + mem = slab->us_data; + + /* Alloc slab structure for offpage, otherwise adjust it's position */ + if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) { + slab = (uma_slab_t )(mem + zone->uz_pgoff); + } else { + slab = uma_zalloc_internal(slabzone, NULL, wait, NULL, -1); + if (slab == NULL) /* XXX This should go away */ + panic("UMA: No free slab structures"); + if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) + UMA_HASH_INSERT(&zone->uz_hash, slab, mem); + } + if (zone->uz_flags & UMA_ZFLAG_MALLOC) { +#ifdef UMA_DEBUG + printf("Inserting %p into malloc hash from slab %p\n", + mem, slab); +#endif + UMA_HASH_INSERT(mallochash, slab, mem); + } + + slab->us_zone = zone; + slab->us_data = mem; + + /* + * This is intended to spread data out across cache lines. + * + * This code doesn't seem to work properly on x86, and on alpha + * it makes absolutely no performance difference. I'm sure it could + * use some tuning, but sun makes outrageous claims about it's + * performance. + */ +#if 0 + if (zone->uz_cachemax) { + slab->us_data += zone->uz_cacheoff; + zone->uz_cacheoff += UMA_CACHE_INC; + if (zone->uz_cacheoff > zone->uz_cachemax) + zone->uz_cacheoff = 0; + } +#endif + + slab->us_freecount = zone->uz_ipers; + slab->us_firstfree = 0; + slab->us_flags = flags; + for (i = 0; i < zone->uz_ipers; i++) + slab->us_freelist[i] = i+1; + + if (zone->uz_init) + for (i = 0; i < zone->uz_ipers; i++) + zone->uz_init(slab->us_data + (zone->uz_rsize * i), + zone->uz_size); + + zone->uz_pages += zone->uz_ppera; + zone->uz_free += zone->uz_ipers; + + return (slab); +} + +/* + * Allocates a number of pages from the system + * + * Arguments: + * zone Unused + * bytes The number of bytes requested + * wait Shall we wait? + * + * Returns: + * A pointer to the alloced memory or possibly + * NULL if M_NOWAIT is set. + */ +static void * +page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait) +{ + void *p; /* Returned page */ + + /* + * XXX The original zone allocator did this, but I don't think it's + * neccisary in current. + */ + + if (lockstatus(&kernel_map->lock, NULL)) { + *pflag = UMA_SLAB_KMEM; + p = (void *) kmem_malloc(kmem_map, bytes, wait); + } else { + *pflag = UMA_SLAB_KMAP; + p = (void *) kmem_alloc(kernel_map, bytes); + } + + return (p); +} + +/* + * Allocates a number of pages from within an object + * + * Arguments: + * zone Unused + * bytes The number of bytes requested + * wait Shall we wait? + * + * Returns: + * A pointer to the alloced memory or possibly + * NULL if M_NOWAIT is set. + */ +static void * +obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +{ + vm_offset_t zkva; + vm_offset_t retkva; + vm_page_t p; + int pages; + + + if (zone->uz_pages + zone->uz_ppera > zone->uz_maxpages) + return (NULL); + + retkva = NULL; + pages = zone->uz_pages; + + /* + * This looks a little weird since we're getting one page at a time + */ + while (bytes > 0) { + p = vm_page_alloc(zone->uz_obj, pages, + VM_ALLOC_INTERRUPT); + if (p == NULL) + return (NULL); + + zkva = zone->uz_kva + pages * PAGE_SIZE; + if (retkva == NULL) + retkva = zkva; + pmap_qenter(zkva, &p, 1); + bytes -= PAGE_SIZE; + pages += 1; + } + + *flags = UMA_SLAB_PRIV; + + return ((void *)retkva); +} + +/* + * Frees a number of pages to the system + * + * Arguments: + * mem A pointer to the memory to be freed + * size The size of the memory being freed + * flags The original p->us_flags field + * + * Returns: + * Nothing + * + */ +static void +page_free(void *mem, int size, u_int8_t flags) +{ + vm_map_t map; + if (flags & UMA_SLAB_KMEM) + map = kmem_map; + else if (flags & UMA_SLAB_KMAP) + map = kernel_map; + else + panic("UMA: page_free used with invalid flags %d\n", flags); + + kmem_free(map, (vm_offset_t)mem, size); +} + +/* + * Zero fill initializer + * + * Arguments/Returns follow uma_init specifications + * + */ +static void +zero_init(void *mem, int size) +{ + bzero(mem, size); +} + +/* + * Finish creating a small uma zone. This calculates ipers, and the zone size. + * + * Arguments + * zone The zone we should initialize + * + * Returns + * Nothing + */ +static void +zone_small_init(uma_zone_t zone) +{ + int rsize; + int memused; + int ipers; + + rsize = zone->uz_size; + + if (rsize < UMA_SMALLEST_UNIT) + rsize = UMA_SMALLEST_UNIT; + + if (rsize & zone->uz_align) + rsize = (rsize & ~zone->uz_align) + (zone->uz_align + 1); + + zone->uz_rsize = rsize; + + rsize += 1; /* Account for the byte of linkage */ + zone->uz_ipers = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) / rsize; + zone->uz_ppera = 1; + + memused = zone->uz_ipers * zone->uz_rsize; + + /* Can we do any better? */ + if ((UMA_SLAB_SIZE - memused) >= UMA_MAX_WASTE) { + if (zone->uz_flags & UMA_ZFLAG_INTERNAL) + return; + ipers = UMA_SLAB_SIZE / zone->uz_rsize; + if (ipers > zone->uz_ipers) { + zone->uz_flags |= UMA_ZFLAG_OFFPAGE; + zone->uz_ipers = ipers; + } + } + +} + +/* + * Finish creating a large (> UMA_SLAB_SIZE) uma zone. Just give in and do + * OFFPAGE for now. When I can allow for more dynamic slab sizes this will be + * more complicated. + * + * Arguments + * zone The zone we should initialize + * + * Returns + * Nothing + */ +static void +zone_large_init(uma_zone_t zone) +{ + int pages; + + pages = zone->uz_size / UMA_SLAB_SIZE; + + /* Account for remainder */ + if ((pages * UMA_SLAB_SIZE) < zone->uz_size) + pages++; + + zone->uz_ppera = pages; + zone->uz_ipers = 1; + + zone->uz_flags |= UMA_ZFLAG_OFFPAGE; + zone->uz_rsize = zone->uz_size; +} + +/* + * Zone header ctor. This initializes all fields, locks, etc. And inserts + * the zone onto the global zone list. + * + * Arguments/Returns follow uma_ctor specifications + * udata Actually uma_zcreat_args + * + */ + +static void +zone_ctor(void *mem, int size, void *udata) +{ + struct uma_zctor_args *arg = udata; + uma_zone_t zone = mem; + int cplen; + int cpu; + + bzero(zone, size); + zone->uz_name = arg->name; + zone->uz_size = arg->size; + zone->uz_ctor = arg->ctor; + zone->uz_dtor = arg->dtor; + zone->uz_init = arg->uminit; + zone->uz_align = arg->align; + zone->uz_free = 0; + zone->uz_pages = 0; + zone->uz_flags = 0; + zone->uz_allocf = page_alloc; + zone->uz_freef = page_free; + + if (arg->flags & UMA_ZONE_ZINIT) + zone->uz_init = zero_init; + + if (arg->flags & UMA_ZONE_INTERNAL) + zone->uz_flags |= UMA_ZFLAG_INTERNAL; + + if (arg->flags & UMA_ZONE_MALLOC) + zone->uz_flags |= UMA_ZFLAG_MALLOC; + + if (arg->flags & UMA_ZONE_NOFREE) + zone->uz_flags |= UMA_ZFLAG_NOFREE; + + if (zone->uz_size > UMA_SLAB_SIZE) + zone_large_init(zone); + else + zone_small_init(zone); + + /* We do this so that the per cpu lock name is unique for each zone */ + memcpy(zone->uz_lname, "PCPU ", 5); + cplen = min(strlen(zone->uz_name) + 1, LOCKNAME_LEN - 6); + memcpy(zone->uz_lname+5, zone->uz_name, cplen); + zone->uz_lname[LOCKNAME_LEN - 1] = '\0'; + + /* + * If we're putting the slab header in the actual page we need to + * figure out where in each page it goes. This calculates a right + * justified offset into the memory on a ALIGN_PTR boundary. + */ + if (!(zone->uz_flags & UMA_ZFLAG_OFFPAGE)) { + int totsize; + int waste; + + /* Size of the slab struct and free list */ + totsize = sizeof(struct uma_slab) + zone->uz_ipers; + if (totsize & UMA_ALIGN_PTR) + totsize = (totsize & ~UMA_ALIGN_PTR) + + (UMA_ALIGN_PTR + 1); + zone->uz_pgoff = UMA_SLAB_SIZE - totsize; + + waste = zone->uz_pgoff; + waste -= (zone->uz_ipers * zone->uz_rsize); + + /* + * This calculates how much space we have for cache line size + * optimizations. It works by offseting each slab slightly. + * Currently it breaks on x86, and so it is disabled. + */ + + if (zone->uz_align < UMA_CACHE_INC && waste > UMA_CACHE_INC) { + zone->uz_cachemax = waste - UMA_CACHE_INC; + zone->uz_cacheoff = 0; + } + + totsize = zone->uz_pgoff + sizeof(struct uma_slab) + + zone->uz_ipers; + /* I don't think it's possible, but I'll make sure anyway */ + if (totsize > UMA_SLAB_SIZE) { + printf("zone %s ipers %d rsize %d size %d\n", + zone->uz_name, zone->uz_ipers, zone->uz_rsize, + zone->uz_size); + panic("UMA slab won't fit.\n"); + } + } else { + /* hash_expand here to allocate the initial hash table */ + hash_expand(&zone->uz_hash); + zone->uz_pgoff = 0; + } + +#ifdef UMA_DEBUG + printf("%s(%p) size = %d ipers = %d ppera = %d pgoff = %d\n", + zone->uz_name, zone, + zone->uz_size, zone->uz_ipers, + zone->uz_ppera, zone->uz_pgoff); +#endif + ZONE_LOCK_INIT(zone); + + mtx_lock(&uma_mtx); + LIST_INSERT_HEAD(&uma_zones, zone, uz_link); + mtx_unlock(&uma_mtx); + + /* + * Some internal zones don't have room allocated for the per cpu + * caches. If we're internal, bail out here. + */ + + if (zone->uz_flags & UMA_ZFLAG_INTERNAL) + return; + + for (cpu = 0; cpu < maxcpu; cpu++) { + if (zone->uz_ipers < UMA_BUCKET_SIZE) + zone->uz_cpu[cpu].uc_count = zone->uz_ipers - 1; + else + zone->uz_cpu[cpu].uc_count = UMA_BUCKET_SIZE - 1; + CPU_LOCK_INIT(zone, cpu); + } +} + +/* + * Traverses every zone in the system and calls a callback + * + * Arguments: + * zfunc A pointer to a function which accepts a zone + * as an argument. + * + * Returns: + * Nothing + */ +static void +zone_foreach(void (*zfunc)(uma_zone_t)) +{ + uma_zone_t zone; + + mtx_lock(&uma_mtx); + LIST_FOREACH(zone, &uma_zones, uz_link) { + zfunc(zone); + } + mtx_unlock(&uma_mtx); +} + +/* Public functions */ +/* See uma.h */ +void +uma_startup(void *bootmem) +{ + struct uma_zctor_args args; + uma_slab_t slab; + int slabsize; + int i; + +#ifdef UMA_DEBUG + printf("Creating uma zone headers zone.\n"); +#endif +#ifdef SMP + maxcpu = mp_maxid + 1; +#else + maxcpu = 1; +#endif +#ifdef UMA_DEBUG + printf("Max cpu = %d, mp_maxid = %d\n", maxcpu, mp_maxid); + Debugger("stop"); +#endif + mtx_init(&uma_mtx, "UMA lock", MTX_DEF); + /* "manually" Create the initial zone */ + args.name = "UMA Zones"; + args.size = sizeof(struct uma_zone) + + (sizeof(struct uma_cache) * (maxcpu - 1)); + args.ctor = zone_ctor; + args.dtor = NULL; + args.uminit = zero_init; + args.fini = NULL; + args.align = 32 - 1; + args.flags = UMA_ZONE_INTERNAL; + /* The initial zone has no Per cpu queues so it's smaller */ + zone_ctor(zones, sizeof(struct uma_zone), &args); + +#ifdef UMA_DEBUG + printf("Filling boot free list.\n"); +#endif + for (i = 0; i < UMA_BOOT_PAGES; i++) { + slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE)); + slab->us_data = (u_int8_t *)slab; + slab->us_flags = UMA_SLAB_BOOT; + LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link); + uma_boot_free++; + } + +#ifdef UMA_DEBUG + printf("Creating slab zone.\n"); +#endif + + /* + * This is the max number of free list items we'll have with + * offpage slabs. + */ + + slabsize = UMA_SLAB_SIZE - sizeof(struct uma_slab); + slabsize /= UMA_MAX_WASTE; + slabsize++; /* In case there it's rounded */ + slabsize += sizeof(struct uma_slab); + + /* Now make a zone for slab headers */ + slabzone = uma_zcreate("UMA Slabs", + slabsize, + NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); + + hashzone = uma_zcreate("UMA Hash", + sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT, + NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); + + bucketzone = uma_zcreate("UMA Buckets", sizeof(struct uma_bucket), + NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, UMA_ZONE_INTERNAL); + + +#ifdef UMA_DEBUG + printf("UMA startup complete.\n"); +#endif +} + +/* see uma.h */ +void +uma_startup2(void *hashmem, u_long elems) +{ + bzero(hashmem, elems * sizeof(void *)); + mallochash->uh_slab_hash = hashmem; + mallochash->uh_hashsize = elems; + mallochash->uh_hashmask = elems - 1; + booted = 1; +#ifdef UMA_DEBUG + printf("UMA startup2 complete.\n"); +#endif +} + +/* + * Initialize our callout handle + * + */ + +static void +uma_startup3(void) +{ +#ifdef UMA_DEBUG + printf("Starting callout.\n"); +#endif + /* We'll be mpsafe once the vm is locked. */ + callout_init(&uma_callout, 0); + callout_reset(&uma_callout, UMA_WORKING_TIME * hz, uma_timeout, NULL); +#ifdef UMA_DEBUG + printf("UMA startup3 complete.\n"); +#endif +} + +/* See uma.h */ +uma_zone_t +uma_zcreate(char *name, int size, uma_ctor ctor, uma_dtor dtor, uma_init uminit, + uma_fini fini, int align, u_int16_t flags) + +{ + struct uma_zctor_args args; + + /* This stuff is essential for the zone ctor */ + args.name = name; + args.size = size; + args.ctor = ctor; + args.dtor = dtor; + args.uminit = uminit; + args.fini = fini; + args.align = align; + args.flags = flags; + + return (uma_zalloc_internal(zones, &args, M_WAITOK, NULL, -1)); +} + +/* See uma.h */ +void * +uma_zalloc_arg(uma_zone_t zone, void *udata, int wait) +{ + void *item; + uma_cache_t cache; + uma_bucket_t bucket; + int isitem; + int cpu; + + /* This is the fast path allocation */ +#ifdef UMA_DEBUG_ALLOC_1 + printf("Allocating one item from %s(%p)\n", zone->uz_name, zone); +#endif + cpu = PCPU_GET(cpuid); + CPU_LOCK(zone, cpu); + cache = &zone->uz_cpu[cpu]; + cache->uc_allocs++; + +zalloc_start: + bucket = cache->uc_allocbucket; + + if (bucket) { + if (bucket->ub_ptr > -1) { + item = bucket->ub_bucket[bucket->ub_ptr]; +#ifdef INVARIANTS + bucket->ub_bucket[bucket->ub_ptr] = NULL; +#endif + bucket->ub_ptr--; + KASSERT(item != NULL, + ("uma_zalloc: Bucket pointer mangled.")); + cache->uc_allocs++; + CPU_UNLOCK(zone, cpu); + if (zone->uz_ctor) + zone->uz_ctor(item, zone->uz_size, udata); + return (item); + } else if (cache->uc_freebucket) { + /* + * We have run out of items in our allocbucket. + * See if we can switch with our free bucket. + */ + if (cache->uc_freebucket->ub_ptr > -1) { + uma_bucket_t swap; + +#ifdef UMA_DEBUG_ALLOC + printf("uma_zalloc: Swapping empty with alloc.\n"); +#endif + swap = cache->uc_freebucket; + cache->uc_freebucket = cache->uc_allocbucket; + cache->uc_allocbucket = swap; + + goto zalloc_start; + } + } + } + /* + * We can get here for three reasons: + * + * 1) The buckets are NULL + * 2) The zone is INTERNAL, and so it has no buckets. + * 3) The alloc and free buckets are both empty. + * + * Just handoff to uma_zalloc_internal to do the hard stuff + * + */ +#ifdef UMA_DEBUG_ALLOC + printf("uma_zalloc: Falling back to zalloc_internal.\n"); +#endif + + item = uma_zalloc_internal(zone, udata, wait, &isitem, cpu); + +#ifdef UMA_DEBUG + printf("uma_zalloc: zalloc_internal completed.\n"); +#endif + + if (item && isitem == 0) + goto zalloc_start; + + /* + * If isitem is set then we should just return it. The cpu lock + * was unlocked when we couldn't get a bucket. + */ + +#ifdef INVARIANTS + if (wait == M_WAITOK) + KASSERT(item != NULL, + ("uma_zalloc: WAITOK set but we're returning NULL")); +#endif + return item; +} + +/* + * Allocates an item for an internal zone OR fills a bucket + * + * Arguments + * zone The zone to alloc for. + * udata The data to be passed to the constructor. + * wait M_WAITOK or M_NOWAIT. + * isitem The returned value is an item if this is true. + * cpu The cpu # of the cache that we should use, or -1. + * + * Returns + * NULL if there is no memory and M_NOWAIT is set + * An item if called on an interal zone + * Non NULL if called to fill a bucket and it was successful. + * + * Discussion: + * This was much cleaner before it had to do per cpu caches. It is + * complicated now because it has to handle the simple internal case, and + * the more involved bucket filling and allocation. The isitem is there + * to remove a failure case. You shouldn't fail on allocating from a zone + * because there were no buckets. This allows the exported zalloc to just + * return the item. + * + */ + +static void * +uma_zalloc_internal(uma_zone_t zone, void *udata, int wait, int *isitem, int cpu) +{ + uma_bucket_t bucket; + uma_cache_t cache; + uma_slab_t slab; + u_int8_t freei; + void *item; + + bucket = NULL; + cache = NULL; + item = NULL; + + /* + * This is to stop us from allocating per cpu buckets while we're running + * out of UMA_BOOT_PAGES. Otherwise, we would exhaust the boot pages. + */ + + if (!booted && zone == bucketzone) + return (NULL); + +#ifdef UMA_DEBUG_ALLOC + printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone); +#endif + if (isitem != NULL) + *isitem = 0; + + ZONE_LOCK(zone); + + /* We got here because we need to fill some buckets */ + if (cpu != -1) { + cache = &zone->uz_cpu[cpu]; + + zone->uz_allocs += cache->uc_allocs; + /* Check the free list */ + bucket = LIST_FIRST(&zone->uz_full_bucket); + if (bucket) { + LIST_REMOVE(bucket, ub_link); + /* Our old one is now a free bucket */ + if (cache->uc_allocbucket) { + KASSERT(cache->uc_allocbucket->ub_ptr == -1, + ("uma_zalloc_internal: Freeing a non free bucket.")); + LIST_INSERT_HEAD(&zone->uz_free_bucket, + cache->uc_allocbucket, ub_link); + } + KASSERT(bucket->ub_ptr != -1, + ("uma_zalloc_internal: Returning an empty bucket.")); + /*zone->uz_free -= bucket->ub_ptr + 1;*/ + cache->uc_allocbucket = bucket; + ZONE_UNLOCK(zone); + return (bucket); + } + /* Bump up our uc_count so we get here less */ + if (cache->uc_count < UMA_BUCKET_SIZE - 1) + cache->uc_count++; + /* Nothing on the free list, try to re-use the old one */ + bucket = cache->uc_allocbucket; + if (bucket == NULL) { + /* Nope, we need a new one */ + CPU_UNLOCK(zone, cpu); + ZONE_UNLOCK(zone); + bucket = uma_zalloc_internal(bucketzone, + NULL, wait, NULL, -1); + CPU_LOCK(zone, cpu); + ZONE_LOCK(zone); + /* Did we lose the race? */ + if (cache->uc_allocbucket) { +#ifdef UMA_DEBUG + printf("uma_zalloc_internal: Lost race with another CPU.\n"); +#endif + if (bucket) + uma_zfree_internal(bucketzone, + bucket, NULL, 0); + ZONE_UNLOCK(zone); + return (cache->uc_allocbucket); + } + cache->uc_allocbucket = bucket; + + if (bucket) { +#ifdef INVARIANTS + bzero(bucket, bucketzone->uz_size); +#endif + bucket->ub_ptr = -1; + } else { + /* + * We may not get a bucket if we recurse, so + * return an actual item. The rest of this code + * does the right thing if the cache is NULL. + */ +#ifdef UMA_DEBUG + printf("uma_zalloc_internal: Bucketzone returned NULL\n"); +#endif + CPU_UNLOCK(zone, cpu); + cache = NULL; + cpu = -1; + } + } + } + +new_slab: + + /* Find a slab with some space */ + if (zone->uz_free) { + if (!LIST_EMPTY(&zone->uz_part_slab)) { + slab = LIST_FIRST(&zone->uz_part_slab); + } else { + slab = LIST_FIRST(&zone->uz_free_slab); + LIST_REMOVE(slab, us_link); + LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); + } + } else { + /* + * This is to prevent us from recursively trying to allocate + * buckets. The problem is that if an allocation forces us to + * grab a new bucket we will call page_alloc, which will go off + * and cause the vm to allocate vm_map_entries. If we need new + * buckets there too we will recurse in kmem_alloc and bad + * things happen. So instead we return a NULL bucket, and make + * the code that allocates buckets smart enough to deal with it */ + if (zone == bucketzone && zone->uz_recurse != 0) { + ZONE_UNLOCK(zone); + return (NULL); + } + zone->uz_recurse++; + slab = slab_zalloc(zone, wait); + zone->uz_recurse--; + if (slab) { + LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); + /* + * We might not have been able to get a page, but another cpu + * could have while we were unlocked. + */ + } else if (zone->uz_free == 0) { + ZONE_UNLOCK(zone); + /* If we're filling a bucket return what we have */ + if (bucket != NULL && bucket->ub_ptr != -1) { + return (bucket); + } else + return (NULL); + } else { + /* Another cpu must have succeeded */ + if ((slab = LIST_FIRST(&zone->uz_part_slab)) == NULL) { + slab = LIST_FIRST(&zone->uz_free_slab); + LIST_REMOVE(slab, us_link); + LIST_INSERT_HEAD(&zone->uz_part_slab, + slab, us_link); + } + } + } + + while (slab->us_freecount) { + freei = slab->us_firstfree; + slab->us_firstfree = slab->us_freelist[freei]; +#ifdef INVARIANTS + slab->us_freelist[freei] = 255; +#endif + slab->us_freecount--; + zone->uz_free--; + item = slab->us_data + (zone->uz_rsize * freei); + + if (cache == NULL) { + zone->uz_allocs++; + break; + } + + bucket->ub_bucket[++bucket->ub_ptr] = item; + + /* Don't overfill the bucket! */ + if (bucket->ub_ptr == cache->uc_count) + break; + } + + /* Move this slab to the full list */ + if (slab->us_freecount == 0) { + LIST_REMOVE(slab, us_link); + LIST_INSERT_HEAD(&zone->uz_full_slab, slab, us_link); + } + + if (cache != NULL) { + /* Try to keep the buckets totally full, but don't block */ + if (bucket->ub_ptr < cache->uc_count) { + wait = M_NOWAIT; + goto new_slab; + } + } + + ZONE_UNLOCK(zone); + + /* Only construct at this time if we're not filling a bucket */ + if (cache == NULL) { + if (zone->uz_ctor) + zone->uz_ctor(item, zone->uz_size, udata); + + if (isitem != NULL) + *isitem = 1; + } + + return (item); +} + +/* See uma.h */ +void +uma_zfree_arg(uma_zone_t zone, void *item, void *udata) +{ + uma_cache_t cache; + uma_bucket_t bucket; + int cpu; + + /* This is the fast path free */ +#ifdef UMA_DEBUG_ALLOC_1 + printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone); +#endif + cpu = PCPU_GET(cpuid); + CPU_LOCK(zone, cpu); + cache = &zone->uz_cpu[cpu]; + +zfree_start: + bucket = cache->uc_freebucket; + + if (bucket) { + /* Do we have room in our bucket? */ + if (bucket->ub_ptr < cache->uc_count) { + bucket->ub_ptr++; + KASSERT(bucket->ub_bucket[bucket->ub_ptr] == NULL, + ("uma_zfree: Freeing to non free bucket index.")); + bucket->ub_bucket[bucket->ub_ptr] = item; + CPU_UNLOCK(zone, cpu); + if (zone->uz_dtor) + zone->uz_dtor(item, zone->uz_size, udata); + return; + } else if (cache->uc_allocbucket) { +#ifdef UMA_DEBUG_ALLOC + printf("uma_zfree: Swapping buckets.\n"); +#endif + /* + * We have run out of space in our freebucket. + * See if we can switch with our alloc bucket. + */ + if (cache->uc_allocbucket->ub_ptr < + cache->uc_freebucket->ub_ptr) { + uma_bucket_t swap; + + swap = cache->uc_freebucket; + cache->uc_freebucket = cache->uc_allocbucket; + cache->uc_allocbucket = swap; + + goto zfree_start; + } + } + } + + /* + * We can get here for three reasons: + * + * 1) The buckets are NULL + * 2) The zone is INTERNAL, and so it has no buckets. + * 3) The alloc and free buckets are both somewhat full. + * + */ + + ZONE_LOCK(zone); + + if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL)) { + bucket = cache->uc_freebucket; + cache->uc_freebucket = NULL; + + /* Can we throw this on the zone full list? */ + if (bucket != NULL) { +#ifdef UMA_DEBUG_ALLOC + printf("uma_zfree: Putting old bucket on the free list.\n"); +#endif + /* ub_ptr is pointing to the last free item */ + KASSERT(bucket->ub_ptr != -1, + ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n")); + /*zone->uz_free += bucket->ub_ptr + 1;*/ + LIST_INSERT_HEAD(&zone->uz_full_bucket, + bucket, ub_link); + bucket = LIST_FIRST(&zone->uz_free_bucket); + if (bucket) + LIST_REMOVE(bucket, ub_link); + } + /* + * Do we need to alloc one? Either the freebucket was NULL + * or the free_bucket list was empty. + */ + if (bucket == NULL) { +#ifdef UMA_DEBUG_ALLOC + printf("uma_zfree: Allocating new free bucket.\n"); +#endif + /* This has to be done so we don't recurse on a lock */ + ZONE_UNLOCK(zone); + CPU_UNLOCK(zone, cpu); + bucket = uma_zalloc_internal(bucketzone, + NULL, M_NOWAIT, NULL, -1); + CPU_LOCK(zone, cpu); + ZONE_LOCK(zone); + if (bucket) { +#ifdef INVARIANTS + bzero(bucket, bucketzone->uz_size); +#endif + bucket->ub_ptr = -1; + } + /* Did we lose the race? */ + if (cache->uc_freebucket != NULL) { + if (bucket) + uma_zfree_internal(bucketzone, + bucket, NULL, 0); + ZONE_UNLOCK(zone); + goto zfree_start; + } + /* If we couldn't get one just free directly */ + if (bucket == NULL) + goto zfree_internal; + } + cache->uc_freebucket = bucket; + ZONE_UNLOCK(zone); + goto zfree_start; + } + +zfree_internal: + + CPU_UNLOCK(zone, cpu); + ZONE_UNLOCK(zone); + uma_zfree_internal(zone, item, udata, 0); + + return; + +} + +/* + * Frees an item to an INTERNAL zone or allocates a free bucket + * + * Arguments: + * zone The zone to free to + * item The item we're freeing + * udata User supplied data for the dtor + * skip Skip the dtor, it was done in uma_zfree_arg + */ + +static void +uma_zfree_internal(uma_zone_t zone, void *item, void *udata, int skip) +{ + uma_slab_t slab; + u_int8_t *mem; + u_int8_t freei; + + ZONE_LOCK(zone); + + if (!(zone->uz_flags & UMA_ZFLAG_MALLOC)) { + mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK)); + if (zone->uz_flags & UMA_ZFLAG_OFFPAGE) + slab = hash_sfind(&zone->uz_hash, mem); + else { + mem += zone->uz_pgoff; + slab = (uma_slab_t)mem; + } + } else { + slab = (uma_slab_t)udata; + } + + /* Do we need to remove from any lists? */ + if (slab->us_freecount+1 == zone->uz_ipers) { + LIST_REMOVE(slab, us_link); + LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link); + } else if (slab->us_freecount == 0) { + LIST_REMOVE(slab, us_link); + LIST_INSERT_HEAD(&zone->uz_part_slab, slab, us_link); + } + + /* Slab management stuff */ + freei = ((unsigned long)item - (unsigned long)slab->us_data) + / zone->uz_rsize; +#ifdef INVARIANTS + if (((freei * zone->uz_rsize) + slab->us_data) != item) + panic("zone: %s(%p) slab %p freed address %p unaligned.\n", + zone->uz_name, zone, slab, item); + if (freei >= zone->uz_ipers) + panic("zone: %s(%p) slab %p freelist %i out of range 0-%d\n", + zone->uz_name, zone, slab, freei, zone->uz_ipers-1); + + if (slab->us_freelist[freei] != 255) { + printf("Slab at %p, freei %d = %d.\n", + slab, freei, slab->us_freelist[freei]); + panic("Duplicate free of item %p from zone %p(%s)\n", + item, zone, zone->uz_name); + } +#endif + slab->us_freelist[freei] = slab->us_firstfree; + slab->us_firstfree = freei; + slab->us_freecount++; + + /* Zone statistics */ + zone->uz_free++; + + ZONE_UNLOCK(zone); + + if (!skip && zone->uz_dtor) + zone->uz_dtor(item, zone->uz_size, udata); +} + +/* See uma.h */ +void +uma_zone_set_freef(uma_zone_t zone, uma_free freef) +{ + ZONE_LOCK(zone); + + zone->uz_freef = freef; + + ZONE_UNLOCK(zone); +} + +/* See uma.h */ +void +uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf) +{ + ZONE_LOCK(zone); + + zone->uz_flags |= UMA_ZFLAG_PRIVALLOC; + zone->uz_allocf = allocf; + + ZONE_UNLOCK(zone); +} + +/* See uma.h */ +int +uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count) +{ + int pages; + vm_offset_t kva; + + ZONE_LOCK(zone); + mtx_lock(&Giant); + + zone->uz_obj = obj; + pages = count / zone->uz_ipers; + + if (pages * zone->uz_ipers < count) + pages++; + zone->uz_kva = NULL; + ZONE_UNLOCK(zone); + kva = kmem_alloc_pageable(kernel_map, pages * UMA_SLAB_SIZE); + ZONE_LOCK(zone); + + zone->uz_kva = kva; + + if (zone->uz_kva == 0) { + ZONE_UNLOCK(zone); + return (0); + } + + zone->uz_maxpages = pages; + + if (zone->uz_obj == NULL) + zone->uz_obj = vm_object_allocate(OBJT_DEFAULT, + zone->uz_maxpages); + else + _vm_object_allocate(OBJT_DEFAULT, + zone->uz_maxpages, zone->uz_obj); + + zone->uz_allocf = obj_alloc; + zone->uz_flags |= UMA_ZFLAG_NOFREE | UMA_ZFLAG_PRIVALLOC; + + mtx_unlock(&Giant); + ZONE_UNLOCK(zone); + + return (1); +} + +/* See uma.h */ +void +uma_prealloc(uma_zone_t zone, int items) +{ + int slabs; + uma_slab_t slab; + + ZONE_LOCK(zone); + slabs = items / zone->uz_ipers; + if (slabs * zone->uz_ipers < items) + slabs++; + + while (slabs > 0) { + slab = slab_zalloc(zone, M_WAITOK); + LIST_INSERT_HEAD(&zone->uz_free_slab, slab, us_link); + slabs--; + } + ZONE_UNLOCK(zone); +} + +/* See uma.h */ +void +uma_reclaim(void) +{ + /* + * You might think that the delay below would improve performance since + * the allocator will give away memory that it may ask for immediately. + * Really, it makes things worse, since cpu cycles are so much cheaper + * than disk activity. + */ +#if 0 + static struct timeval tv = {0}; + struct timeval now; + getmicrouptime(&now); + if (now.tv_sec > tv.tv_sec + 30) + tv = now; + else + return; +#endif +#ifdef UMA_DEBUG + printf("UMA: vm asked us to release pages!\n"); +#endif + zone_foreach(zone_drain); + + /* + * Some slabs may have been freed but this zone will be visited early + * we visit again so that we can free pages that are empty once other + * zones are drained. We have to do the same for buckets. + */ + zone_drain(slabzone); + zone_drain(bucketzone); +} + +void * +uma_large_malloc(int size, int wait) +{ + void *mem; + uma_slab_t slab; + u_int8_t flags; + + slab = uma_zalloc_internal(slabzone, NULL, wait, NULL, -1); + if (slab == NULL) + return (NULL); + + mem = page_alloc(NULL, size, &flags, wait); + if (mem) { + slab->us_data = mem; + slab->us_flags = flags | UMA_SLAB_MALLOC; + slab->us_size = size; + UMA_HASH_INSERT(mallochash, slab, mem); + } else { + uma_zfree_internal(slabzone, slab, NULL, 0); + } + + + return (mem); +} + +void +uma_large_free(uma_slab_t slab) +{ + UMA_HASH_REMOVE(mallochash, slab, slab->us_data); + page_free(slab->us_data, slab->us_size, slab->us_flags); + uma_zfree_internal(slabzone, slab, NULL, 0); +} + +void +uma_print_stats(void) +{ + zone_foreach(uma_print_zone); +} + +void +uma_print_zone(uma_zone_t zone) +{ + printf("%s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n", + zone->uz_name, zone, zone->uz_size, zone->uz_rsize, zone->uz_flags, + zone->uz_ipers, zone->uz_ppera, + (zone->uz_ipers * zone->uz_pages) - zone->uz_free, zone->uz_free); +} + +/* + * Sysctl handler for vm.zone + * + * stolen from vm_zone.c + */ +static int +sysctl_vm_zone(SYSCTL_HANDLER_ARGS) +{ + int error, len, cnt; + const int linesize = 128; /* conservative */ + int totalfree; + char *tmpbuf, *offset; + uma_zone_t z; + char *p; + + cnt = 0; + LIST_FOREACH(z, &uma_zones, uz_link) + cnt++; + MALLOC(tmpbuf, char *, (cnt == 0 ? 1 : cnt) * linesize, + M_TEMP, M_WAITOK); + len = snprintf(tmpbuf, linesize, + "\nITEM SIZE LIMIT USED FREE REQUESTS\n\n"); + if (cnt == 0) + tmpbuf[len - 1] = '\0'; + error = SYSCTL_OUT(req, tmpbuf, cnt == 0 ? len-1 : len); + if (error || cnt == 0) + goto out; + offset = tmpbuf; + LIST_FOREACH(z, &uma_zones, uz_link) { + if (cnt == 0) /* list may have changed size */ + break; + ZONE_LOCK(z); + totalfree = z->uz_free + z->uz_cachefree; + len = snprintf(offset, linesize, + "%-12.12s %6.6u, %8.8u, %6.6u, %6.6u, %8.8llu\n", + z->uz_name, z->uz_size, + z->uz_maxpages * z->uz_ipers, + (z->uz_ipers * (z->uz_pages / z->uz_ppera)) - totalfree, + totalfree, + (unsigned long long)z->uz_allocs); + ZONE_UNLOCK(z); + for (p = offset + 12; p > offset && *p == ' '; --p) + /* nothing */ ; + p[1] = ':'; + cnt--; + offset += len; + } + *offset++ = '\0'; + error = SYSCTL_OUT(req, tmpbuf, offset - tmpbuf); +out: + FREE(tmpbuf, M_TEMP); + return (error); +} diff --git a/sys/vm/uma_int.h b/sys/vm/uma_int.h new file mode 100644 index 0000000..77e7c38 --- /dev/null +++ b/sys/vm/uma_int.h @@ -0,0 +1,328 @@ +/* + * Copyright (c) 2002, Jeffrey Roberson <jroberson@chesapeake.net> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +/* + * + * Jeff Roberson <jroberson@chesapeake.net> + * + * This file includes definitions, structures, prototypes, and inlines that + * should not be used outside of the actual implementation of UMA. + * + */ + +/* + * Here's a quick description of the relationship between the objects: + * + * Zones contain lists of slabs which are stored in either the full bin, empty + * bin, or partially allocated bin, to reduce fragmentation. They also contain + * the user supplied value for size, which is adjusted for alignment purposes + * and rsize is the result of that. The zone also stores information for + * managing a hash of page addresses that maps pages to uma_slab_t structures + * for pages that don't have embedded uma_slab_t's. + * + * The uma_slab_t may be embedded in a UMA_SLAB_SIZE chunk of memory or it may + * be allocated off the page from a special slab zone. The free list within a + * slab is managed with a linked list of indexes, which are 8 bit values. If + * UMA_SLAB_SIZE is defined to be too large I will have to switch to 16bit + * values. Currently on alpha you can get 250 or so 32 byte items and on x86 + * you can get 250 or so 16byte items. For item sizes that would yield more + * than 10% memory waste we potentially allocate a seperate uma_slab_t if this + * will improve the number of items per slab that will fit. + * + * Other potential space optimizations are storing the 8bit of linkage in space + * wasted between items due to alignment problems. This may yield a much better + * memory footprint for certain sizes of objects. Another alternative is to + * increase the UMA_SLAB_SIZE, or allow for dynamic slab sizes. I prefer + * dynamic slab sizes because we could stick with 8 bit indexes and only use + * large slab sizes for zones with a lot of waste per slab. This may create + * ineffeciencies in the vm subsystem due to fragmentation in the address space. + * + * The only really gross cases, with regards to memory waste, are for those + * items that are just over half the page size. You can get nearly 50% waste, + * so you fall back to the memory footprint of the power of two allocator. I + * have looked at memory allocation sizes on many of the machines available to + * me, and there does not seem to be an abundance of allocations at this range + * so at this time it may not make sense to optimize for it. This can, of + * course, be solved with dynamic slab sizes. + * + */ + +/* + * This is the representation for normal (Non OFFPAGE slab) + * + * i == item + * s == slab pointer + * + * <---------------- Page (UMA_SLAB_SIZE) ------------------> + * ___________________________________________________________ + * | _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ ___________ | + * ||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i| |slab header|| + * ||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_| |___________|| + * |___________________________________________________________| + * + * + * This is an OFFPAGE slab. These can be larger than UMA_SLAB_SIZE. + * + * ___________________________________________________________ + * | _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ | + * ||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i| | + * ||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_| | + * |___________________________________________________________| + * ___________ ^ + * |slab header| | + * |___________|---* + * + */ + +#ifndef VM_UMA_INT_H +#define VM_UMA_INT_H + +#include <sys/mutex.h> + +#define UMA_SLAB_SIZE PAGE_SIZE /* How big are our slabs? */ +#define UMA_SLAB_MASK (PAGE_SIZE - 1) /* Mask to get back to the page */ +#define UMA_SLAB_SHIFT PAGE_SHIFT /* Number of bits PAGE_MASK */ + +#define UMA_BOOT_PAGES 15 /* Number of pages allocated for startup */ +#define UMA_WORKING_TIME 20 /* Seconds worth of items to keep */ + + +/* Max waste before going to off page slab management */ +#define UMA_MAX_WASTE (UMA_SLAB_SIZE / 10) + +/* + * I doubt there will be many cases where this is exceeded. This is the initial + * size of the hash table for uma_slabs that are managed off page. This hash + * does expand by powers of two. Currently it doesn't get smaller. + */ +#define UMA_HASH_SIZE_INIT 32 + + +/* + * I should investigate other hashing algorithms. This should yield a low + * number of collisions if the pages are relatively contiguous. + * + * This is the same algorithm that most processor caches use. + * + * I'm shifting and masking instead of % because it should be faster. + */ + +#define UMA_HASH(h, s) ((((unsigned long)s) >> UMA_SLAB_SHIFT) & \ + (h)->uh_hashmask) + +#define UMA_HASH_INSERT(h, s, mem) \ + SLIST_INSERT_HEAD(&(h)->uh_slab_hash[UMA_HASH((h), \ + (mem))], (s), us_hlink); +#define UMA_HASH_REMOVE(h, s, mem) \ + SLIST_REMOVE(&(h)->uh_slab_hash[UMA_HASH((h), \ + (mem))], (s), uma_slab, us_hlink); + +/* Page management structure */ + +/* Sorry for the union, but space efficiency is important */ +struct uma_slab { + uma_zone_t us_zone; /* Zone we live in */ + union { + LIST_ENTRY(uma_slab) us_link; /* slabs in zone */ + unsigned long us_size; /* Size of allocation */ + } us_type; + SLIST_ENTRY(uma_slab) us_hlink; /* Link for hash table */ + u_int8_t *us_data; /* First item */ + u_int8_t us_flags; /* Page flags see uma.h */ + u_int8_t us_freecount; /* How many are free? */ + u_int8_t us_firstfree; /* First free item index */ + u_int8_t us_freelist[1]; /* Free List (actually larger) */ +}; + +#define us_link us_type.us_link +#define us_size us_type.us_size + +typedef struct uma_slab * uma_slab_t; + +/* Hash table for freed address -> slab translation */ + +SLIST_HEAD(slabhead, uma_slab); + +struct uma_hash { + struct slabhead *uh_slab_hash; /* Hash table for slabs */ + int uh_hashsize; /* Current size of the hash table */ + int uh_hashmask; /* Mask used during hashing */ +}; + +extern struct uma_hash *mallochash; + +/* + * Structures for per cpu queues. + */ + +/* + * This size was chosen so that the struct bucket size is roughly + * 128 * sizeof(void *). This is exactly true for x86, and for alpha + * it will would be 32bits smaller if it didn't have alignment adjustments. + */ + +#define UMA_BUCKET_SIZE 125 + +struct uma_bucket { + LIST_ENTRY(uma_bucket) ub_link; /* Link into the zone */ + int16_t ub_ptr; /* Pointer to current item */ + void *ub_bucket[UMA_BUCKET_SIZE]; /* actual allocation storage */ +}; + +typedef struct uma_bucket * uma_bucket_t; + +struct uma_cache { + struct mtx uc_lock; /* Spin lock on this cpu's bucket */ + int uc_count; /* Highest value ub_ptr can have */ + uma_bucket_t uc_freebucket; /* Bucket we're freeing to */ + uma_bucket_t uc_allocbucket; /* Bucket to allocate from */ + u_int64_t uc_allocs; /* Count of allocations */ +}; + +typedef struct uma_cache * uma_cache_t; + +#define LOCKNAME_LEN 16 /* Length of the name for cpu locks */ + +/* + * Zone management structure + * + * TODO: Optimize for cache line size + * + */ +struct uma_zone { + char uz_lname[LOCKNAME_LEN]; /* Text name for the cpu lock */ + char *uz_name; /* Text name of the zone */ + LIST_ENTRY(uma_zone) uz_link; /* List of all zones */ + u_int32_t uz_align; /* Alignment mask */ + u_int32_t uz_pages; /* Total page count */ + +/* Used during alloc / free */ + struct mtx uz_lock; /* Lock for the zone */ + u_int32_t uz_free; /* Count of items free in slabs */ + u_int16_t uz_ipers; /* Items per slab */ + u_int16_t uz_flags; /* Internal flags */ + + LIST_HEAD(,uma_slab) uz_part_slab; /* partially allocated slabs */ + LIST_HEAD(,uma_slab) uz_free_slab; /* empty slab list */ + LIST_HEAD(,uma_slab) uz_full_slab; /* full slabs */ + LIST_HEAD(,uma_bucket) uz_full_bucket; /* full buckets */ + LIST_HEAD(,uma_bucket) uz_free_bucket; /* Buckets for frees */ + u_int32_t uz_size; /* Requested size of each item */ + u_int32_t uz_rsize; /* Real size of each item */ + + struct uma_hash uz_hash; + u_int16_t uz_pgoff; /* Offset to uma_slab struct */ + u_int16_t uz_ppera; /* pages per allocation from backend */ + u_int16_t uz_cacheoff; /* Next cache offset */ + u_int16_t uz_cachemax; /* Max cache offset */ + + uma_ctor uz_ctor; /* Constructor for each allocation */ + uma_dtor uz_dtor; /* Destructor */ + u_int64_t uz_allocs; /* Total number of allocations */ + + uma_init uz_init; /* Initializer for each item */ + uma_fini uz_fini; /* Discards memory */ + uma_alloc uz_allocf; /* Allocation function */ + uma_free uz_freef; /* Free routine */ + struct vm_object *uz_obj; /* Zone specific object */ + vm_offset_t uz_kva; /* Base kva for zones with objs */ + u_int32_t uz_maxpages; /* Maximum number of pages to alloc */ + u_int32_t uz_cachefree; /* Last count of items free in caches */ + u_int64_t uz_oallocs; /* old allocs count */ + u_int64_t uz_wssize; /* Working set size */ + int uz_recurse; /* Allocation recursion count */ + /* + * This HAS to be the last item because we adjust the zone size + * based on NCPU and then allocate the space for the zones. + */ + struct uma_cache uz_cpu[1]; /* Per cpu caches */ +}; + +#define UMA_CACHE_INC 16 /* How much will we move data */ + +#define UMA_ZFLAG_OFFPAGE 0x0001 /* Struct slab/freelist off page */ +#define UMA_ZFLAG_PRIVALLOC 0x0002 /* Zone has supplied it's own alloc */ +#define UMA_ZFLAG_INTERNAL 0x0004 /* Internal zone, no offpage no PCPU */ +#define UMA_ZFLAG_MALLOC 0x0008 /* Zone created by malloc */ +#define UMA_ZFLAG_NOFREE 0x0010 /* Don't free data from this zone */ +/* This lives in uflags */ +#define UMA_ZONE_INTERNAL 0x1000 /* Internal zone for uflags */ + +/* Internal prototypes */ +static __inline uma_slab_t hash_sfind(struct uma_hash *hash, u_int8_t *data); +void *uma_large_malloc(int size, int wait); +void uma_large_free(uma_slab_t slab); + +/* Lock Macros */ + +#define ZONE_LOCK_INIT(z) mtx_init(&(z)->uz_lock, (z)->uz_name, MTX_DEF) +#define ZONE_LOCK_FINI(z) mtx_destroy(&(z)->uz_lock) +#define ZONE_LOCK(z) mtx_lock(&(z)->uz_lock) +#define ZONE_UNLOCK(z) mtx_unlock(&(z)->uz_lock) + +#define CPU_LOCK_INIT(z, cpu) \ + mtx_init(&(z)->uz_cpu[(cpu)].uc_lock, (z)->uz_lname, MTX_DEF) + +#define CPU_LOCK_FINI(z, cpu) \ + mtx_destroy(&(z)->uz_cpu[(cpu)].uc_lock) + +#define CPU_LOCK(z, cpu) \ + mtx_lock(&(z)->uz_cpu[(cpu)].uc_lock) + +#define CPU_UNLOCK(z, cpu) \ + mtx_unlock(&(z)->uz_cpu[(cpu)].uc_lock) + +/* + * Find a slab within a hash table. This is used for OFFPAGE zones to lookup + * the slab structure. + * + * Arguments: + * hash The hash table to search. + * data The base page of the item. + * + * Returns: + * A pointer to a slab if successful, else NULL. + */ +static __inline uma_slab_t +hash_sfind(struct uma_hash *hash, u_int8_t *data) +{ + uma_slab_t slab; + int hval; + + hval = UMA_HASH(hash, data); + + SLIST_FOREACH(slab, &hash->uh_slab_hash[hval], us_hlink) { + if ((u_int8_t *)slab->us_data == data) + return (slab); + } + return (NULL); +} + + +#endif /* VM_UMA_INT_H */ diff --git a/sys/vm/vm_init.c b/sys/vm/vm_init.c index 54e0c13..5114470 100644 --- a/sys/vm/vm_init.c +++ b/sys/vm/vm_init.c @@ -114,7 +114,6 @@ vm_mem_init(dummy) /* * Initialize other VM packages */ - vm_zone_init(); vm_object_init(); vm_map_startup(); kmem_init(virtual_avail, virtual_end); diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 12c6d62..8eadaa1 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -88,7 +88,6 @@ #include <vm/vm_pager.h> #include <vm/vm_kern.h> #include <vm/vm_extern.h> -#include <vm/vm_zone.h> #include <vm/swap_pager.h> /* @@ -131,28 +130,111 @@ * maps and requires map entries. */ -static struct vm_zone kmapentzone_store, mapentzone_store, mapzone_store; -static vm_zone_t mapentzone, kmapentzone, mapzone, vmspace_zone; -static struct vm_object kmapentobj, mapentobj, mapobj; - -static struct vm_map_entry map_entry_init[MAX_MAPENT]; -static struct vm_map_entry kmap_entry_init[MAX_KMAPENT]; -static struct vm_map map_init[MAX_KMAP]; +static uma_zone_t mapentzone; +static uma_zone_t kmapentzone; +static uma_zone_t mapzone; +static uma_zone_t vmspace_zone; +static struct vm_object kmapentobj; +static void vmspace_zinit(void *mem, int size); +static void vmspace_zfini(void *mem, int size); +static void vm_map_zinit(void *mem, int size); +static void vm_map_zfini(void *mem, int size); +static void _vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max); + +#ifdef INVARIANTS +static void vm_map_zdtor(void *mem, int size, void *arg); +static void vmspace_zdtor(void *mem, int size, void *arg); +#endif void vm_map_startup(void) { - mapzone = &mapzone_store; - zbootinit(mapzone, "MAP", sizeof (struct vm_map), - map_init, MAX_KMAP); - kmapentzone = &kmapentzone_store; - zbootinit(kmapentzone, "KMAP ENTRY", sizeof (struct vm_map_entry), - kmap_entry_init, MAX_KMAPENT); - mapentzone = &mapentzone_store; - zbootinit(mapentzone, "MAP ENTRY", sizeof (struct vm_map_entry), - map_entry_init, MAX_MAPENT); + mapzone = uma_zcreate("MAP", sizeof(struct vm_map), NULL, +#ifdef INVARIANTS + vm_map_zdtor, +#else + NULL, +#endif + vm_map_zinit, vm_map_zfini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); + uma_prealloc(mapzone, MAX_KMAP); + kmapentzone = zinit("KMAP ENTRY", sizeof(struct vm_map_entry), 0, 0, 0); uma_prealloc(kmapentzone, MAX_KMAPENT); + mapentzone = zinit("MAP ENTRY", sizeof(struct vm_map_entry), 0, 0, 0); + uma_prealloc(mapentzone, MAX_MAPENT); +} + +static void +vmspace_zfini(void *mem, int size) +{ + struct vmspace *vm; + + vm = (struct vmspace *)mem; + + vm_map_zfini(&vm->vm_map, sizeof(vm->vm_map)); +} + +static void +vmspace_zinit(void *mem, int size) +{ + struct vmspace *vm; + + vm = (struct vmspace *)mem; + + vm_map_zinit(&vm->vm_map, sizeof(vm->vm_map)); +} + +static void +vm_map_zfini(void *mem, int size) +{ + vm_map_t map; + + GIANT_REQUIRED; + map = (vm_map_t)mem; + + lockdestroy(&map->lock); } +static void +vm_map_zinit(void *mem, int size) +{ + vm_map_t map; + + GIANT_REQUIRED; + + map = (vm_map_t)mem; + map->nentries = 0; + map->size = 0; + map->infork = 0; + lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE); +} + +#ifdef INVARIANTS +static void +vmspace_zdtor(void *mem, int size, void *arg) +{ + struct vmspace *vm; + + vm = (struct vmspace *)mem; + + vm_map_zdtor(&vm->vm_map, sizeof(vm->vm_map), arg); +} +static void +vm_map_zdtor(void *mem, int size, void *arg) +{ + vm_map_t map; + + map = (vm_map_t)mem; + KASSERT(map->nentries == 0, + ("map %p nentries == %d on free.", + map, map->nentries)); + KASSERT(map->size == 0, + ("map %p size == %lu on free.", + map, map->size)); + KASSERT(map->infork == 0, + ("map %p infork == %d on free.", + map, map->infork)); +} +#endif /* INVARIANTS */ + /* * Allocate a vmspace structure, including a vm_map and pmap, * and initialize those structures. The refcnt is set to 1. @@ -165,9 +247,9 @@ vmspace_alloc(min, max) struct vmspace *vm; GIANT_REQUIRED; - vm = zalloc(vmspace_zone); + vm = uma_zalloc(vmspace_zone, M_WAITOK); CTR1(KTR_VM, "vmspace_alloc: %p", vm); - vm_map_init(&vm->vm_map, min, max); + _vm_map_init(&vm->vm_map, min, max); pmap_pinit(vmspace_pmap(vm)); vm->vm_map.pmap = vmspace_pmap(vm); /* XXX */ vm->vm_refcnt = 1; @@ -179,13 +261,14 @@ vmspace_alloc(min, max) void vm_init2(void) { - zinitna(kmapentzone, &kmapentobj, - NULL, 0, cnt.v_page_count / 4, ZONE_INTERRUPT, 1); - zinitna(mapentzone, &mapentobj, - NULL, 0, 0, 0, 1); - zinitna(mapzone, &mapobj, - NULL, 0, 0, 0, 1); - vmspace_zone = zinit("VMSPACE", sizeof (struct vmspace), 0, 0, 3); + uma_zone_set_obj(kmapentzone, &kmapentobj, cnt.v_page_count / 4); + vmspace_zone = uma_zcreate("VMSPACE", sizeof(struct vmspace), NULL, +#ifdef INVARIANTS + vmspace_zdtor, +#else + NULL, +#endif + vmspace_zinit, vmspace_zfini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); pmap_init2(); vm_object_init2(); } @@ -203,9 +286,9 @@ vmspace_dofree(struct vmspace *vm) (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset, vm->vm_map.max_offset); vm_map_unlock(&vm->vm_map); + pmap_release(vmspace_pmap(vm)); - vm_map_destroy(&vm->vm_map); - zfree(vmspace_zone, vm); + uma_zfree(vmspace_zone, vm); } void @@ -390,9 +473,9 @@ vm_map_create(pmap_t pmap, vm_offset_t min, vm_offset_t max) GIANT_REQUIRED; - result = zalloc(mapzone); + result = uma_zalloc(mapzone, M_WAITOK); CTR1(KTR_VM, "vm_map_create: %p", result); - vm_map_init(result, min, max); + _vm_map_init(result, min, max); result->pmap = pmap; return (result); } @@ -402,30 +485,25 @@ vm_map_create(pmap_t pmap, vm_offset_t min, vm_offset_t max) * such as that in the vmspace structure. * The pmap is set elsewhere. */ -void -vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max) +static void +_vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max) { GIANT_REQUIRED; map->header.next = map->header.prev = &map->header; - map->nentries = 0; - map->size = 0; map->system_map = 0; - map->infork = 0; map->min_offset = min; map->max_offset = max; map->first_free = &map->header; map->hint = &map->header; map->timestamp = 0; - lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE); } void -vm_map_destroy(map) - struct vm_map *map; +vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max) { - GIANT_REQUIRED; - lockdestroy(&map->lock); + _vm_map_init(map, min, max); + lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE); } /* @@ -436,7 +514,8 @@ vm_map_destroy(map) static void vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry) { - zfree((map->system_map || !mapentzone) ? kmapentzone : mapentzone, entry); + uma_zfree((map->system_map || !mapentzone) + ? kmapentzone : mapentzone, entry); } /* @@ -450,8 +529,8 @@ vm_map_entry_create(vm_map_t map) { vm_map_entry_t new_entry; - new_entry = zalloc((map->system_map || !mapentzone) ? - kmapentzone : mapentzone); + new_entry = uma_zalloc((map->system_map || !mapentzone) ? + kmapentzone : mapentzone, M_WAITOK); if (new_entry == NULL) panic("vm_map_entry_create: kernel resources exhausted"); return (new_entry); diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index b23af37..eefff35 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -267,7 +267,6 @@ int vm_map_find (vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t *, vm_size_t, int vm_map_findspace (vm_map_t, vm_offset_t, vm_size_t, vm_offset_t *); int vm_map_inherit (vm_map_t, vm_offset_t, vm_offset_t, vm_inherit_t); void vm_map_init (struct vm_map *, vm_offset_t, vm_offset_t); -void vm_map_destroy (struct vm_map *); int vm_map_insert (vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t, vm_offset_t, vm_prot_t, vm_prot_t, int); int vm_map_lookup (vm_map_t *, vm_offset_t, vm_prot_t, vm_map_entry_t *, vm_object_t *, vm_pindex_t *, vm_prot_t *, boolean_t *); diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index a561c7c..e6f1ad5 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -147,11 +147,45 @@ extern int vm_pageout_page_count; static long object_collapses; static long object_bypasses; static int next_index; -static vm_zone_t obj_zone; -static struct vm_zone obj_zone_store; static int object_hash_rand; +static vm_zone_t obj_zone; #define VM_OBJECTS_INIT 256 -static struct vm_object vm_objects_init[VM_OBJECTS_INIT]; + +static void vm_object_zinit(void *mem, int size); + +#ifdef INVARIANTS +static void vm_object_zdtor(void *mem, int size, void *arg); + +static void +vm_object_zdtor(void *mem, int size, void *arg) +{ + vm_object_t object; + + object = (vm_object_t)mem; + KASSERT(object->paging_in_progress == 0, + ("object %p paging_in_progress = %d", + object, object->paging_in_progress)); + KASSERT(object->resident_page_count == 0, + ("object %p resident_page_count = %d", + object, object->resident_page_count)); + KASSERT(object->shadow_count == 0, + ("object %p shadow_count = %d", + object, object->shadow_count)); +} +#endif + +static void +vm_object_zinit(void *mem, int size) +{ + vm_object_t object; + + object = (vm_object_t)mem; + + /* These are true for any object that has been freed */ + object->paging_in_progress = 0; + object->resident_page_count = 0; + object->shadow_count = 0; +} void _vm_object_allocate(objtype_t type, vm_size_t size, vm_object_t object) @@ -169,9 +203,6 @@ _vm_object_allocate(objtype_t type, vm_size_t size, vm_object_t object) object->flags = 0; if ((object->type == OBJT_DEFAULT) || (object->type == OBJT_SWAP)) vm_object_set_flag(object, OBJ_ONEMAPPING); - object->paging_in_progress = 0; - object->resident_page_count = 0; - object->shadow_count = 0; object->pg_color = next_index; if (size > (PQ_L2_SIZE / 3 + PQ_PRIME1)) incr = PQ_L2_SIZE / 3 + PQ_PRIME1; @@ -216,16 +247,19 @@ vm_object_init(void) kmem_object = &kmem_object_store; _vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS), kmem_object); - - obj_zone = &obj_zone_store; - zbootinit(obj_zone, "VM OBJECT", sizeof (struct vm_object), - vm_objects_init, VM_OBJECTS_INIT); + obj_zone = uma_zcreate("VM OBJECT", sizeof (struct vm_object), NULL, +#ifdef INVARIANTS + vm_object_zdtor, +#else + NULL, +#endif + vm_object_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); + uma_prealloc(obj_zone, VM_OBJECTS_INIT); } void vm_object_init2(void) { - zinitna(obj_zone, NULL, NULL, 0, 0, 0, 1); } void diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 3022b73..706929a 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -118,6 +118,8 @@ #include <vm/vm_pageout.h> #include <vm/vm_pager.h> #include <vm/vm_extern.h> +#include <vm/uma.h> +#include <vm/uma_int.h> /* * Associated with page of user-allocatable memory is a @@ -176,6 +178,7 @@ vm_page_startup(vm_offset_t starta, vm_offset_t enda, vm_offset_t vaddr) vm_offset_t biggestone, biggestsize; vm_offset_t total; + vm_size_t bootpages; total = 0; biggestsize = 0; @@ -208,6 +211,19 @@ vm_page_startup(vm_offset_t starta, vm_offset_t enda, vm_offset_t vaddr) vm_pageq_init(); /* + * Allocate memory for use when boot strapping the kernel memory allocator + */ + bootpages = UMA_BOOT_PAGES * UMA_SLAB_SIZE; + new_end = end - bootpages; + new_end = trunc_page(new_end); + mapped = pmap_map(&vaddr, new_end, end, + VM_PROT_READ | VM_PROT_WRITE); + bzero((caddr_t) mapped, end - new_end); + uma_startup((caddr_t)mapped); + + end = new_end; + + /* * Allocate (and initialize) the hash table buckets. * * The number of buckets MUST BE a power of 2, and the actual value is diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index 5567628..c4b94de 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -649,6 +649,7 @@ vm_pageout_scan(int pass) * Do whatever cleanup that the pmap code can. */ pmap_collect(); + uma_reclaim(); addl_page_shortage_init = vm_pageout_deficit; vm_pageout_deficit = 0; diff --git a/sys/vm/vm_zone.h b/sys/vm/vm_zone.h index 83d7914..a355051 100644 --- a/sys/vm/vm_zone.h +++ b/sys/vm/vm_zone.h @@ -23,40 +23,30 @@ #include <sys/_lock.h> #include <sys/_mutex.h> +#include <vm/uma.h> -typedef struct vm_zone { - struct mtx zmtx; /* lock for data structure */ - void *zitems; /* linked list of items */ - int zfreecnt; /* free entries */ - int zfreemin; /* minimum number of free entries */ - int znalloc; /* number of allocations */ - vm_offset_t zkva; /* Base kva of zone */ - int zpagecount; /* Total # of allocated pages */ - int zpagemax; /* Max address space */ - int zmax; /* Max number of entries allocated */ - int ztotal; /* Total entries allocated now */ - int zsize; /* size of each entry */ - int zalloc; /* hint for # of pages to alloc */ - int zflags; /* flags for zone */ - int zallocflag; /* flag for allocation */ - struct vm_object *zobj; /* object to hold zone */ - char *zname; /* name for diags */ - /* NOTE: zent is protected by the subsystem lock, *not* by zmtx */ - SLIST_ENTRY(vm_zone) zent; /* singly-linked list of zones */ -} *vm_zone_t; - - -void vm_zone_init(void); -void vm_zone_init2(void); +typedef uma_zone_t vm_zone_t; +#if 0 +static void vm_zone_init(void); +static void vm_zone_init2(void); + +static vm_zone_t zinit(char *name, int size, int nentries, + int flags, int zalloc); int zinitna(vm_zone_t z, struct vm_object *obj, char *name, int size, int nentries, int flags, int zalloc); -vm_zone_t zinit(char *name, int size, int nentries, - int flags, int zalloc); void zbootinit(vm_zone_t z, char *name, int size, void *item, int nitems); -void zdestroy(vm_zone_t z); -void *zalloc(vm_zone_t z); -void zfree(vm_zone_t z, void *item); - +static void zdestroy(vm_zone_t z); +static void *zalloc(vm_zone_t z); +static void zfree(vm_zone_t z, void *item); +#endif + +#define vm_zone_init2() uma_startup2() + +#define zinit(name, size, nentries, flags, zalloc) \ + uma_zcreate((name), (size), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE) +#define zdestroy() +#define zalloc(z) uma_zalloc((z), M_WAITOK) +#define zfree(z, item) uma_zfree((z), (item)) #endif /* _SYS_ZONE_H */ |