diff options
author | jeff <jeff@FreeBSD.org> | 2013-08-07 06:21:20 +0000 |
---|---|---|
committer | jeff <jeff@FreeBSD.org> | 2013-08-07 06:21:20 +0000 |
commit | de4ecca21340ce4d0bf9182cac133c14e031218e (patch) | |
tree | 950bad07f0aeeeae78036d82b9aa11ae998c3654 /sys/kern | |
parent | e141f5c0bac3839e4886a26e1ba796f4e46e6455 (diff) | |
download | FreeBSD-src-de4ecca21340ce4d0bf9182cac133c14e031218e.zip FreeBSD-src-de4ecca21340ce4d0bf9182cac133c14e031218e.tar.gz |
Replace kernel virtual address space allocation with vmem. This provides
transparent layering and better fragmentation.
- Normalize functions that allocate memory to use kmem_*
- Those that allocate address space are named kva_*
- Those that operate on maps are named kmap_*
- Implement recursive allocation handling for kmem_arena in vmem.
Reviewed by: alc
Tested by: pho
Sponsored by: EMC / Isilon Storage Division
Diffstat (limited to 'sys/kern')
-rw-r--r-- | sys/kern/imgact_gzip.c | 4 | ||||
-rw-r--r-- | sys/kern/init_main.c | 5 | ||||
-rw-r--r-- | sys/kern/kern_exec.c | 4 | ||||
-rw-r--r-- | sys/kern/kern_malloc.c | 66 | ||||
-rw-r--r-- | sys/kern/kern_mbuf.c | 5 | ||||
-rw-r--r-- | sys/kern/kern_sharedpage.c | 2 | ||||
-rw-r--r-- | sys/kern/subr_busdma_bufalloc.c | 6 | ||||
-rw-r--r-- | sys/kern/subr_vmem.c | 99 | ||||
-rw-r--r-- | sys/kern/vfs_bio.c | 2 |
9 files changed, 149 insertions, 44 deletions
diff --git a/sys/kern/imgact_gzip.c b/sys/kern/imgact_gzip.c index 7c48ac6..230854b 100644 --- a/sys/kern/imgact_gzip.c +++ b/sys/kern/imgact_gzip.c @@ -137,7 +137,7 @@ exec_gzip_imgact(imgp) } if (igz.inbuf) - kmem_free_wakeup(exec_map, (vm_offset_t)igz.inbuf, PAGE_SIZE); + kmap_free_wakeup(exec_map, (vm_offset_t)igz.inbuf, PAGE_SIZE); if (igz.error || error) { printf("Output=%lu ", igz.output); printf("Inflate_error=%d igz.error=%d where=%d\n", @@ -310,7 +310,7 @@ NextByte(void *vp) return igz->inbuf[(igz->idx++) - igz->offset]; } if (igz->inbuf) - kmem_free_wakeup(exec_map, (vm_offset_t)igz->inbuf, PAGE_SIZE); + kmap_free_wakeup(exec_map, (vm_offset_t)igz->inbuf, PAGE_SIZE); igz->offset = igz->idx & ~PAGE_MASK; error = vm_mmap(exec_map, /* map */ diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index 1eb3647..247e431 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -461,11 +461,6 @@ proc0_init(void *dummy __unused) sleepinit(); /* - * additional VM structures - */ - vm_init2(); - - /* * Create process 0 (the swapper). */ LIST_INSERT_HEAD(&allproc, p, p_list); diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index c0e1435..156c80d 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -1192,7 +1192,7 @@ int exec_alloc_args(struct image_args *args) { - args->buf = (char *)kmem_alloc_wait(exec_map, PATH_MAX + ARG_MAX); + args->buf = (char *)kmap_alloc_wait(exec_map, PATH_MAX + ARG_MAX); return (args->buf != NULL ? 0 : ENOMEM); } @@ -1201,7 +1201,7 @@ exec_free_args(struct image_args *args) { if (args->buf != NULL) { - kmem_free_wakeup(exec_map, (vm_offset_t)args->buf, + kmap_free_wakeup(exec_map, (vm_offset_t)args->buf, PATH_MAX + ARG_MAX); args->buf = NULL; } diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c index 5e4a502..63d8386 100644 --- a/sys/kern/kern_malloc.c +++ b/sys/kern/kern_malloc.c @@ -62,9 +62,11 @@ __FBSDID("$FreeBSD$"); #include <sys/sbuf.h> #include <sys/sysctl.h> #include <sys/time.h> +#include <sys/vmem.h> #include <vm/vm.h> #include <vm/pmap.h> +#include <vm/vm_pageout.h> #include <vm/vm_param.h> #include <vm/vm_kern.h> #include <vm/vm_extern.h> @@ -113,12 +115,7 @@ MALLOC_DEFINE(M_TEMP, "temp", "misc temporary data buffers"); MALLOC_DEFINE(M_IP6OPT, "ip6opt", "IPv6 options"); MALLOC_DEFINE(M_IP6NDP, "ip6ndp", "IPv6 Neighbor Discovery"); -static void kmeminit(void *); -SYSINIT(kmem, SI_SUB_KMEM, SI_ORDER_FIRST, kmeminit, NULL); - static struct malloc_type *kmemstatistics; -static vm_offset_t kmembase; -static vm_offset_t kmemlimit; static int kmemcount; #define KMEM_ZSHIFT 4 @@ -203,12 +200,12 @@ SYSCTL_UINT(_vm, OID_AUTO, kmem_size_scale, CTLFLAG_RDTUN, &vm_kmem_size_scale, static int sysctl_kmem_map_size(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_vm, OID_AUTO, kmem_map_size, CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, NULL, 0, - sysctl_kmem_map_size, "LU", "Current kmem_map allocation size"); + sysctl_kmem_map_size, "LU", "Current kmem allocation size"); static int sysctl_kmem_map_free(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_vm, OID_AUTO, kmem_map_free, CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, NULL, 0, - sysctl_kmem_map_free, "LU", "Largest contiguous free range in kmem_map"); + sysctl_kmem_map_free, "LU", "Free space in kmem"); /* * The malloc_mtx protects the kmemstatistics linked list. @@ -253,7 +250,7 @@ sysctl_kmem_map_size(SYSCTL_HANDLER_ARGS) { u_long size; - size = kmem_map->size; + size = vmem_size(kmem_arena, VMEM_ALLOC); return (sysctl_handle_long(oidp, &size, 0, req)); } @@ -262,10 +259,7 @@ sysctl_kmem_map_free(SYSCTL_HANDLER_ARGS) { u_long size; - vm_map_lock_read(kmem_map); - size = kmem_map->root != NULL ? kmem_map->root->max_free : - kmem_map->max_offset - kmem_map->min_offset; - vm_map_unlock_read(kmem_map); + size = vmem_size(kmem_arena, VMEM_FREE); return (sysctl_handle_long(oidp, &size, 0, req)); } @@ -420,7 +414,7 @@ contigmalloc(unsigned long size, struct malloc_type *type, int flags, { void *ret; - ret = (void *)kmem_alloc_contig(kernel_map, size, flags, low, high, + ret = (void *)kmem_alloc_contig(kernel_arena, size, flags, low, high, alignment, boundary, VM_MEMATTR_DEFAULT); if (ret != NULL) malloc_type_allocated(type, round_page(size)); @@ -438,7 +432,7 @@ void contigfree(void *addr, unsigned long size, struct malloc_type *type) { - kmem_free(kernel_map, (vm_offset_t)addr, size); + kmem_free(kernel_arena, (vm_offset_t)addr, size); malloc_type_freed(type, round_page(size)); } @@ -681,18 +675,24 @@ reallocf(void *addr, unsigned long size, struct malloc_type *mtp, int flags) } /* - * Initialize the kernel memory allocator + * Wake the page daemon when we exhaust KVA. It will call the lowmem handler + * and uma_reclaim() callbacks in a context that is safe. */ -/* ARGSUSED*/ static void -kmeminit(void *dummy) +kmem_reclaim(vmem_t *vm, int flags) +{ + + pagedaemon_wakeup(); +} + +/* + * Initialize the kernel memory arena. + */ +void +kmeminit(void) { - uint8_t indx; u_long mem_size, tmp; - int i; - mtx_init(&malloc_mtx, "malloc", NULL, MTX_DEF); - /* * Try to auto-tune the kernel memory size, so that it is * more applicable for a wider range of machine sizes. The @@ -745,9 +745,9 @@ kmeminit(void *dummy) #else tmp = vm_kmem_size; #endif - kmem_map = kmem_suballoc(kernel_map, &kmembase, &kmemlimit, - tmp, TRUE); - kmem_map->system_map = 1; + vmem_init(kmem_arena, "kmem arena", kva_alloc(tmp), tmp, PAGE_SIZE, + PAGE_SIZE * 16, 0); + vmem_set_reclaim(kmem_arena, kmem_reclaim); #ifdef DEBUG_MEMGUARD /* @@ -755,8 +755,23 @@ kmeminit(void *dummy) * replacement allocator used for detecting tamper-after-free * scenarios as they occur. It is only used for debugging. */ - memguard_init(kmem_map); + memguard_init(kmem_arena); #endif +} + +/* + * Initialize the kernel memory allocator + */ +/* ARGSUSED*/ +static void +mallocinit(void *dummy) +{ + int i; + uint8_t indx; + + mtx_init(&malloc_mtx, "malloc", NULL, MTX_DEF); + + kmeminit(); uma_startup2(); @@ -787,6 +802,7 @@ kmeminit(void *dummy) } } +SYSINIT(kmem, SI_SUB_KMEM, SI_ORDER_FIRST, mallocinit, NULL); void malloc_init(void *data) diff --git a/sys/kern/kern_mbuf.c b/sys/kern/kern_mbuf.c index 9e85806..df9b854 100644 --- a/sys/kern/kern_mbuf.c +++ b/sys/kern/kern_mbuf.c @@ -121,8 +121,7 @@ tunable_mbinit(void *dummy) * available kernel memory (physical or kmem). * At most it can be 3/4 of available kernel memory. */ - realmem = qmin((quad_t)physmem * PAGE_SIZE, - vm_map_max(kmem_map) - vm_map_min(kmem_map)); + realmem = qmin((quad_t)physmem * PAGE_SIZE, vm_kmem_size); maxmbufmem = realmem / 2; TUNABLE_QUAD_FETCH("kern.ipc.maxmbufmem", &maxmbufmem); if (maxmbufmem > realmem / 4 * 3) @@ -395,7 +394,7 @@ mbuf_jumbo_alloc(uma_zone_t zone, int bytes, uint8_t *flags, int wait) /* Inform UMA that this allocator uses kernel_map/object. */ *flags = UMA_SLAB_KERNEL; - return ((void *)kmem_alloc_contig(kernel_map, bytes, wait, + return ((void *)kmem_alloc_contig(kernel_arena, bytes, wait, (vm_paddr_t)0, ~(vm_paddr_t)0, 1, 0, VM_MEMATTR_DEFAULT)); } diff --git a/sys/kern/kern_sharedpage.c b/sys/kern/kern_sharedpage.c index 20b9038..622592b 100644 --- a/sys/kern/kern_sharedpage.c +++ b/sys/kern/kern_sharedpage.c @@ -112,7 +112,7 @@ shared_page_init(void *dummy __unused) VM_ALLOC_ZERO); m->valid = VM_PAGE_BITS_ALL; VM_OBJECT_WUNLOCK(shared_page_obj); - addr = kmem_alloc_nofault(kernel_map, PAGE_SIZE); + addr = kva_alloc(PAGE_SIZE); pmap_qenter(addr, &m, 1); shared_page_mapping = (char *)addr; } diff --git a/sys/kern/subr_busdma_bufalloc.c b/sys/kern/subr_busdma_bufalloc.c index 9406d95..a80a233 100644 --- a/sys/kern/subr_busdma_bufalloc.c +++ b/sys/kern/subr_busdma_bufalloc.c @@ -152,10 +152,10 @@ busdma_bufalloc_alloc_uncacheable(uma_zone_t zone, int size, u_int8_t *pflag, { #ifdef VM_MEMATTR_UNCACHEABLE - /* Inform UMA that this allocator uses kernel_map/object. */ + /* Inform UMA that this allocator uses kernel_arena/object. */ *pflag = UMA_SLAB_KERNEL; - return ((void *)kmem_alloc_attr(kernel_map, size, wait, 0, + return ((void *)kmem_alloc_attr(kernel_arena, size, wait, 0, BUS_SPACE_MAXADDR, VM_MEMATTR_UNCACHEABLE)); #else @@ -169,6 +169,6 @@ void busdma_bufalloc_free_uncacheable(void *item, int size, u_int8_t pflag) { - kmem_free(kernel_map, (vm_offset_t)item, size); + kmem_free(kernel_arena, (vm_offset_t)item, size); } diff --git a/sys/kern/subr_vmem.c b/sys/kern/subr_vmem.c index 6aa8ad3..d3a758d 100644 --- a/sys/kern/subr_vmem.c +++ b/sys/kern/subr_vmem.c @@ -61,6 +61,7 @@ __FBSDID("$FreeBSD$"); #include <vm/vm.h> #include <vm/pmap.h> #include <vm/vm_map.h> +#include <vm/vm_object.h> #include <vm/vm_kern.h> #include <vm/vm_extern.h> #include <vm/vm_param.h> @@ -213,8 +214,12 @@ static LIST_HEAD(, vmem) vmem_list = LIST_HEAD_INITIALIZER(vmem_list); static uma_zone_t vmem_bt_zone; /* boot time arena storage. */ +static struct vmem kernel_arena_storage; +static struct vmem kmem_arena_storage; static struct vmem buffer_arena_storage; static struct vmem transient_arena_storage; +vmem_t *kernel_arena = &kernel_arena_storage; +vmem_t *kmem_arena = &kmem_arena_storage; vmem_t *buffer_arena = &buffer_arena_storage; vmem_t *transient_arena = &transient_arena_storage; @@ -231,6 +236,14 @@ bt_fill(vmem_t *vm, int flags) VMEM_ASSERT_LOCKED(vm); /* + * Only allow the kmem arena to dip into reserve tags. It is the + * vmem where new tags come from. + */ + flags &= BT_FLAGS; + if (vm != kmem_arena) + flags &= ~M_USE_RESERVE; + + /* * Loop until we meet the reserve. To minimize the lock shuffle * and prevent simultaneous fills we first try a NOWAIT regardless * of the caller's flags. Specify M_NOVM so we don't recurse while @@ -545,6 +558,77 @@ qc_drain(vmem_t *vm) zone_drain(vm->vm_qcache[i].qc_cache); } +#ifndef UMA_MD_SMALL_ALLOC + +static struct mtx_padalign vmem_bt_lock; + +/* + * vmem_bt_alloc: Allocate a new page of boundary tags. + * + * On architectures with uma_small_alloc there is no recursion; no address + * space need be allocated to allocate boundary tags. For the others, we + * must handle recursion. Boundary tags are necessary to allocate new + * boundary tags. + * + * UMA guarantees that enough tags are held in reserve to allocate a new + * page of kva. We dip into this reserve by specifying M_USE_RESERVE only + * when allocating the page to hold new boundary tags. In this way the + * reserve is automatically filled by the allocation that uses the reserve. + * + * We still have to guarantee that the new tags are allocated atomically since + * many threads may try concurrently. The bt_lock provides this guarantee. + * We convert WAITOK allocations to NOWAIT and then handle the blocking here + * on failure. It's ok to return NULL for a WAITOK allocation as UMA will + * loop again after checking to see if we lost the race to allocate. + * + * There is a small race between vmem_bt_alloc() returning the page and the + * zone lock being acquired to add the page to the zone. For WAITOK + * allocations we just pause briefly. NOWAIT may experience a transient + * failure. To alleviate this we permit a small number of simultaneous + * fills to proceed concurrently so NOWAIT is less likely to fail unless + * we are really out of KVA. + */ +static void * +vmem_bt_alloc(uma_zone_t zone, int bytes, uint8_t *pflag, int wait) +{ + vmem_addr_t addr; + + *pflag = UMA_SLAB_KMEM; + + /* + * Single thread boundary tag allocation so that the address space + * and memory are added in one atomic operation. + */ + mtx_lock(&vmem_bt_lock); + if (vmem_xalloc(kmem_arena, bytes, 0, 0, 0, VMEM_ADDR_MIN, + VMEM_ADDR_MAX, M_NOWAIT | M_NOVM | M_USE_RESERVE | M_BESTFIT, + &addr) == 0) { + if (kmem_back(kmem_object, addr, bytes, + M_NOWAIT | M_USE_RESERVE) == 0) { + mtx_unlock(&vmem_bt_lock); + return ((void *)addr); + } + vmem_xfree(kmem_arena, addr, bytes); + mtx_unlock(&vmem_bt_lock); + /* + * Out of memory, not address space. This may not even be + * possible due to M_USE_RESERVE page allocation. + */ + if (wait & M_WAITOK) + VM_WAIT; + return (NULL); + } + mtx_unlock(&vmem_bt_lock); + /* + * We're either out of address space or lost a fill race. + */ + if (wait & M_WAITOK) + pause("btalloc", 1); + + return (NULL); +} +#endif + void vmem_startup(void) { @@ -553,6 +637,17 @@ vmem_startup(void) vmem_bt_zone = uma_zcreate("vmem btag", sizeof(struct vmem_btag), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM); +#ifndef UMA_MD_SMALL_ALLOC + mtx_init(&vmem_bt_lock, "btag lock", NULL, MTX_DEF); + uma_prealloc(vmem_bt_zone, BT_MAXALLOC); + /* + * Reserve enough tags to allocate new tags. We allow multiple + * CPUs to attempt to allocate new tags concurrently to limit + * false restarts in UMA. + */ + uma_zone_reserve(vmem_bt_zone, BT_MAXALLOC * (mp_ncpus + 1) / 2); + uma_zone_set_allocf(vmem_bt_zone, vmem_bt_alloc); +#endif } /* ---- rehash */ @@ -661,15 +756,15 @@ vmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, int type) btspan->bt_type = type; btspan->bt_start = addr; btspan->bt_size = size; + bt_insseg_tail(vm, btspan); btfree = bt_alloc(vm); btfree->bt_type = BT_TYPE_FREE; btfree->bt_start = addr; btfree->bt_size = size; - - bt_insseg_tail(vm, btspan); bt_insseg(vm, btfree, btspan); bt_insfree(vm, btfree); + vm->vm_size += size; } diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 205e9b3..93fb27d 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -856,7 +856,7 @@ bufinit(void) bogus_page = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | VM_ALLOC_WIRED); - unmapped_buf = (caddr_t)kmem_alloc_nofault(kernel_map, MAXPHYS); + unmapped_buf = (caddr_t)kva_alloc(MAXPHYS); } #ifdef INVARIANTS |