summaryrefslogtreecommitdiffstats
path: root/sys/kern
diff options
context:
space:
mode:
authorjeff <jeff@FreeBSD.org>2013-08-07 06:21:20 +0000
committerjeff <jeff@FreeBSD.org>2013-08-07 06:21:20 +0000
commitde4ecca21340ce4d0bf9182cac133c14e031218e (patch)
tree950bad07f0aeeeae78036d82b9aa11ae998c3654 /sys/kern
parente141f5c0bac3839e4886a26e1ba796f4e46e6455 (diff)
downloadFreeBSD-src-de4ecca21340ce4d0bf9182cac133c14e031218e.zip
FreeBSD-src-de4ecca21340ce4d0bf9182cac133c14e031218e.tar.gz
Replace kernel virtual address space allocation with vmem. This provides
transparent layering and better fragmentation. - Normalize functions that allocate memory to use kmem_* - Those that allocate address space are named kva_* - Those that operate on maps are named kmap_* - Implement recursive allocation handling for kmem_arena in vmem. Reviewed by: alc Tested by: pho Sponsored by: EMC / Isilon Storage Division
Diffstat (limited to 'sys/kern')
-rw-r--r--sys/kern/imgact_gzip.c4
-rw-r--r--sys/kern/init_main.c5
-rw-r--r--sys/kern/kern_exec.c4
-rw-r--r--sys/kern/kern_malloc.c66
-rw-r--r--sys/kern/kern_mbuf.c5
-rw-r--r--sys/kern/kern_sharedpage.c2
-rw-r--r--sys/kern/subr_busdma_bufalloc.c6
-rw-r--r--sys/kern/subr_vmem.c99
-rw-r--r--sys/kern/vfs_bio.c2
9 files changed, 149 insertions, 44 deletions
diff --git a/sys/kern/imgact_gzip.c b/sys/kern/imgact_gzip.c
index 7c48ac6..230854b 100644
--- a/sys/kern/imgact_gzip.c
+++ b/sys/kern/imgact_gzip.c
@@ -137,7 +137,7 @@ exec_gzip_imgact(imgp)
}
if (igz.inbuf)
- kmem_free_wakeup(exec_map, (vm_offset_t)igz.inbuf, PAGE_SIZE);
+ kmap_free_wakeup(exec_map, (vm_offset_t)igz.inbuf, PAGE_SIZE);
if (igz.error || error) {
printf("Output=%lu ", igz.output);
printf("Inflate_error=%d igz.error=%d where=%d\n",
@@ -310,7 +310,7 @@ NextByte(void *vp)
return igz->inbuf[(igz->idx++) - igz->offset];
}
if (igz->inbuf)
- kmem_free_wakeup(exec_map, (vm_offset_t)igz->inbuf, PAGE_SIZE);
+ kmap_free_wakeup(exec_map, (vm_offset_t)igz->inbuf, PAGE_SIZE);
igz->offset = igz->idx & ~PAGE_MASK;
error = vm_mmap(exec_map, /* map */
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index 1eb3647..247e431 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c
@@ -461,11 +461,6 @@ proc0_init(void *dummy __unused)
sleepinit();
/*
- * additional VM structures
- */
- vm_init2();
-
- /*
* Create process 0 (the swapper).
*/
LIST_INSERT_HEAD(&allproc, p, p_list);
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index c0e1435..156c80d 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -1192,7 +1192,7 @@ int
exec_alloc_args(struct image_args *args)
{
- args->buf = (char *)kmem_alloc_wait(exec_map, PATH_MAX + ARG_MAX);
+ args->buf = (char *)kmap_alloc_wait(exec_map, PATH_MAX + ARG_MAX);
return (args->buf != NULL ? 0 : ENOMEM);
}
@@ -1201,7 +1201,7 @@ exec_free_args(struct image_args *args)
{
if (args->buf != NULL) {
- kmem_free_wakeup(exec_map, (vm_offset_t)args->buf,
+ kmap_free_wakeup(exec_map, (vm_offset_t)args->buf,
PATH_MAX + ARG_MAX);
args->buf = NULL;
}
diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c
index 5e4a502..63d8386 100644
--- a/sys/kern/kern_malloc.c
+++ b/sys/kern/kern_malloc.c
@@ -62,9 +62,11 @@ __FBSDID("$FreeBSD$");
#include <sys/sbuf.h>
#include <sys/sysctl.h>
#include <sys/time.h>
+#include <sys/vmem.h>
#include <vm/vm.h>
#include <vm/pmap.h>
+#include <vm/vm_pageout.h>
#include <vm/vm_param.h>
#include <vm/vm_kern.h>
#include <vm/vm_extern.h>
@@ -113,12 +115,7 @@ MALLOC_DEFINE(M_TEMP, "temp", "misc temporary data buffers");
MALLOC_DEFINE(M_IP6OPT, "ip6opt", "IPv6 options");
MALLOC_DEFINE(M_IP6NDP, "ip6ndp", "IPv6 Neighbor Discovery");
-static void kmeminit(void *);
-SYSINIT(kmem, SI_SUB_KMEM, SI_ORDER_FIRST, kmeminit, NULL);
-
static struct malloc_type *kmemstatistics;
-static vm_offset_t kmembase;
-static vm_offset_t kmemlimit;
static int kmemcount;
#define KMEM_ZSHIFT 4
@@ -203,12 +200,12 @@ SYSCTL_UINT(_vm, OID_AUTO, kmem_size_scale, CTLFLAG_RDTUN, &vm_kmem_size_scale,
static int sysctl_kmem_map_size(SYSCTL_HANDLER_ARGS);
SYSCTL_PROC(_vm, OID_AUTO, kmem_map_size,
CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, NULL, 0,
- sysctl_kmem_map_size, "LU", "Current kmem_map allocation size");
+ sysctl_kmem_map_size, "LU", "Current kmem allocation size");
static int sysctl_kmem_map_free(SYSCTL_HANDLER_ARGS);
SYSCTL_PROC(_vm, OID_AUTO, kmem_map_free,
CTLFLAG_RD | CTLTYPE_ULONG | CTLFLAG_MPSAFE, NULL, 0,
- sysctl_kmem_map_free, "LU", "Largest contiguous free range in kmem_map");
+ sysctl_kmem_map_free, "LU", "Free space in kmem");
/*
* The malloc_mtx protects the kmemstatistics linked list.
@@ -253,7 +250,7 @@ sysctl_kmem_map_size(SYSCTL_HANDLER_ARGS)
{
u_long size;
- size = kmem_map->size;
+ size = vmem_size(kmem_arena, VMEM_ALLOC);
return (sysctl_handle_long(oidp, &size, 0, req));
}
@@ -262,10 +259,7 @@ sysctl_kmem_map_free(SYSCTL_HANDLER_ARGS)
{
u_long size;
- vm_map_lock_read(kmem_map);
- size = kmem_map->root != NULL ? kmem_map->root->max_free :
- kmem_map->max_offset - kmem_map->min_offset;
- vm_map_unlock_read(kmem_map);
+ size = vmem_size(kmem_arena, VMEM_FREE);
return (sysctl_handle_long(oidp, &size, 0, req));
}
@@ -420,7 +414,7 @@ contigmalloc(unsigned long size, struct malloc_type *type, int flags,
{
void *ret;
- ret = (void *)kmem_alloc_contig(kernel_map, size, flags, low, high,
+ ret = (void *)kmem_alloc_contig(kernel_arena, size, flags, low, high,
alignment, boundary, VM_MEMATTR_DEFAULT);
if (ret != NULL)
malloc_type_allocated(type, round_page(size));
@@ -438,7 +432,7 @@ void
contigfree(void *addr, unsigned long size, struct malloc_type *type)
{
- kmem_free(kernel_map, (vm_offset_t)addr, size);
+ kmem_free(kernel_arena, (vm_offset_t)addr, size);
malloc_type_freed(type, round_page(size));
}
@@ -681,18 +675,24 @@ reallocf(void *addr, unsigned long size, struct malloc_type *mtp, int flags)
}
/*
- * Initialize the kernel memory allocator
+ * Wake the page daemon when we exhaust KVA. It will call the lowmem handler
+ * and uma_reclaim() callbacks in a context that is safe.
*/
-/* ARGSUSED*/
static void
-kmeminit(void *dummy)
+kmem_reclaim(vmem_t *vm, int flags)
+{
+
+ pagedaemon_wakeup();
+}
+
+/*
+ * Initialize the kernel memory arena.
+ */
+void
+kmeminit(void)
{
- uint8_t indx;
u_long mem_size, tmp;
- int i;
- mtx_init(&malloc_mtx, "malloc", NULL, MTX_DEF);
-
/*
* Try to auto-tune the kernel memory size, so that it is
* more applicable for a wider range of machine sizes. The
@@ -745,9 +745,9 @@ kmeminit(void *dummy)
#else
tmp = vm_kmem_size;
#endif
- kmem_map = kmem_suballoc(kernel_map, &kmembase, &kmemlimit,
- tmp, TRUE);
- kmem_map->system_map = 1;
+ vmem_init(kmem_arena, "kmem arena", kva_alloc(tmp), tmp, PAGE_SIZE,
+ PAGE_SIZE * 16, 0);
+ vmem_set_reclaim(kmem_arena, kmem_reclaim);
#ifdef DEBUG_MEMGUARD
/*
@@ -755,8 +755,23 @@ kmeminit(void *dummy)
* replacement allocator used for detecting tamper-after-free
* scenarios as they occur. It is only used for debugging.
*/
- memguard_init(kmem_map);
+ memguard_init(kmem_arena);
#endif
+}
+
+/*
+ * Initialize the kernel memory allocator
+ */
+/* ARGSUSED*/
+static void
+mallocinit(void *dummy)
+{
+ int i;
+ uint8_t indx;
+
+ mtx_init(&malloc_mtx, "malloc", NULL, MTX_DEF);
+
+ kmeminit();
uma_startup2();
@@ -787,6 +802,7 @@ kmeminit(void *dummy)
}
}
+SYSINIT(kmem, SI_SUB_KMEM, SI_ORDER_FIRST, mallocinit, NULL);
void
malloc_init(void *data)
diff --git a/sys/kern/kern_mbuf.c b/sys/kern/kern_mbuf.c
index 9e85806..df9b854 100644
--- a/sys/kern/kern_mbuf.c
+++ b/sys/kern/kern_mbuf.c
@@ -121,8 +121,7 @@ tunable_mbinit(void *dummy)
* available kernel memory (physical or kmem).
* At most it can be 3/4 of available kernel memory.
*/
- realmem = qmin((quad_t)physmem * PAGE_SIZE,
- vm_map_max(kmem_map) - vm_map_min(kmem_map));
+ realmem = qmin((quad_t)physmem * PAGE_SIZE, vm_kmem_size);
maxmbufmem = realmem / 2;
TUNABLE_QUAD_FETCH("kern.ipc.maxmbufmem", &maxmbufmem);
if (maxmbufmem > realmem / 4 * 3)
@@ -395,7 +394,7 @@ mbuf_jumbo_alloc(uma_zone_t zone, int bytes, uint8_t *flags, int wait)
/* Inform UMA that this allocator uses kernel_map/object. */
*flags = UMA_SLAB_KERNEL;
- return ((void *)kmem_alloc_contig(kernel_map, bytes, wait,
+ return ((void *)kmem_alloc_contig(kernel_arena, bytes, wait,
(vm_paddr_t)0, ~(vm_paddr_t)0, 1, 0, VM_MEMATTR_DEFAULT));
}
diff --git a/sys/kern/kern_sharedpage.c b/sys/kern/kern_sharedpage.c
index 20b9038..622592b 100644
--- a/sys/kern/kern_sharedpage.c
+++ b/sys/kern/kern_sharedpage.c
@@ -112,7 +112,7 @@ shared_page_init(void *dummy __unused)
VM_ALLOC_ZERO);
m->valid = VM_PAGE_BITS_ALL;
VM_OBJECT_WUNLOCK(shared_page_obj);
- addr = kmem_alloc_nofault(kernel_map, PAGE_SIZE);
+ addr = kva_alloc(PAGE_SIZE);
pmap_qenter(addr, &m, 1);
shared_page_mapping = (char *)addr;
}
diff --git a/sys/kern/subr_busdma_bufalloc.c b/sys/kern/subr_busdma_bufalloc.c
index 9406d95..a80a233 100644
--- a/sys/kern/subr_busdma_bufalloc.c
+++ b/sys/kern/subr_busdma_bufalloc.c
@@ -152,10 +152,10 @@ busdma_bufalloc_alloc_uncacheable(uma_zone_t zone, int size, u_int8_t *pflag,
{
#ifdef VM_MEMATTR_UNCACHEABLE
- /* Inform UMA that this allocator uses kernel_map/object. */
+ /* Inform UMA that this allocator uses kernel_arena/object. */
*pflag = UMA_SLAB_KERNEL;
- return ((void *)kmem_alloc_attr(kernel_map, size, wait, 0,
+ return ((void *)kmem_alloc_attr(kernel_arena, size, wait, 0,
BUS_SPACE_MAXADDR, VM_MEMATTR_UNCACHEABLE));
#else
@@ -169,6 +169,6 @@ void
busdma_bufalloc_free_uncacheable(void *item, int size, u_int8_t pflag)
{
- kmem_free(kernel_map, (vm_offset_t)item, size);
+ kmem_free(kernel_arena, (vm_offset_t)item, size);
}
diff --git a/sys/kern/subr_vmem.c b/sys/kern/subr_vmem.c
index 6aa8ad3..d3a758d 100644
--- a/sys/kern/subr_vmem.c
+++ b/sys/kern/subr_vmem.c
@@ -61,6 +61,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm.h>
#include <vm/pmap.h>
#include <vm/vm_map.h>
+#include <vm/vm_object.h>
#include <vm/vm_kern.h>
#include <vm/vm_extern.h>
#include <vm/vm_param.h>
@@ -213,8 +214,12 @@ static LIST_HEAD(, vmem) vmem_list = LIST_HEAD_INITIALIZER(vmem_list);
static uma_zone_t vmem_bt_zone;
/* boot time arena storage. */
+static struct vmem kernel_arena_storage;
+static struct vmem kmem_arena_storage;
static struct vmem buffer_arena_storage;
static struct vmem transient_arena_storage;
+vmem_t *kernel_arena = &kernel_arena_storage;
+vmem_t *kmem_arena = &kmem_arena_storage;
vmem_t *buffer_arena = &buffer_arena_storage;
vmem_t *transient_arena = &transient_arena_storage;
@@ -231,6 +236,14 @@ bt_fill(vmem_t *vm, int flags)
VMEM_ASSERT_LOCKED(vm);
/*
+ * Only allow the kmem arena to dip into reserve tags. It is the
+ * vmem where new tags come from.
+ */
+ flags &= BT_FLAGS;
+ if (vm != kmem_arena)
+ flags &= ~M_USE_RESERVE;
+
+ /*
* Loop until we meet the reserve. To minimize the lock shuffle
* and prevent simultaneous fills we first try a NOWAIT regardless
* of the caller's flags. Specify M_NOVM so we don't recurse while
@@ -545,6 +558,77 @@ qc_drain(vmem_t *vm)
zone_drain(vm->vm_qcache[i].qc_cache);
}
+#ifndef UMA_MD_SMALL_ALLOC
+
+static struct mtx_padalign vmem_bt_lock;
+
+/*
+ * vmem_bt_alloc: Allocate a new page of boundary tags.
+ *
+ * On architectures with uma_small_alloc there is no recursion; no address
+ * space need be allocated to allocate boundary tags. For the others, we
+ * must handle recursion. Boundary tags are necessary to allocate new
+ * boundary tags.
+ *
+ * UMA guarantees that enough tags are held in reserve to allocate a new
+ * page of kva. We dip into this reserve by specifying M_USE_RESERVE only
+ * when allocating the page to hold new boundary tags. In this way the
+ * reserve is automatically filled by the allocation that uses the reserve.
+ *
+ * We still have to guarantee that the new tags are allocated atomically since
+ * many threads may try concurrently. The bt_lock provides this guarantee.
+ * We convert WAITOK allocations to NOWAIT and then handle the blocking here
+ * on failure. It's ok to return NULL for a WAITOK allocation as UMA will
+ * loop again after checking to see if we lost the race to allocate.
+ *
+ * There is a small race between vmem_bt_alloc() returning the page and the
+ * zone lock being acquired to add the page to the zone. For WAITOK
+ * allocations we just pause briefly. NOWAIT may experience a transient
+ * failure. To alleviate this we permit a small number of simultaneous
+ * fills to proceed concurrently so NOWAIT is less likely to fail unless
+ * we are really out of KVA.
+ */
+static void *
+vmem_bt_alloc(uma_zone_t zone, int bytes, uint8_t *pflag, int wait)
+{
+ vmem_addr_t addr;
+
+ *pflag = UMA_SLAB_KMEM;
+
+ /*
+ * Single thread boundary tag allocation so that the address space
+ * and memory are added in one atomic operation.
+ */
+ mtx_lock(&vmem_bt_lock);
+ if (vmem_xalloc(kmem_arena, bytes, 0, 0, 0, VMEM_ADDR_MIN,
+ VMEM_ADDR_MAX, M_NOWAIT | M_NOVM | M_USE_RESERVE | M_BESTFIT,
+ &addr) == 0) {
+ if (kmem_back(kmem_object, addr, bytes,
+ M_NOWAIT | M_USE_RESERVE) == 0) {
+ mtx_unlock(&vmem_bt_lock);
+ return ((void *)addr);
+ }
+ vmem_xfree(kmem_arena, addr, bytes);
+ mtx_unlock(&vmem_bt_lock);
+ /*
+ * Out of memory, not address space. This may not even be
+ * possible due to M_USE_RESERVE page allocation.
+ */
+ if (wait & M_WAITOK)
+ VM_WAIT;
+ return (NULL);
+ }
+ mtx_unlock(&vmem_bt_lock);
+ /*
+ * We're either out of address space or lost a fill race.
+ */
+ if (wait & M_WAITOK)
+ pause("btalloc", 1);
+
+ return (NULL);
+}
+#endif
+
void
vmem_startup(void)
{
@@ -553,6 +637,17 @@ vmem_startup(void)
vmem_bt_zone = uma_zcreate("vmem btag",
sizeof(struct vmem_btag), NULL, NULL, NULL, NULL,
UMA_ALIGN_PTR, UMA_ZONE_VM);
+#ifndef UMA_MD_SMALL_ALLOC
+ mtx_init(&vmem_bt_lock, "btag lock", NULL, MTX_DEF);
+ uma_prealloc(vmem_bt_zone, BT_MAXALLOC);
+ /*
+ * Reserve enough tags to allocate new tags. We allow multiple
+ * CPUs to attempt to allocate new tags concurrently to limit
+ * false restarts in UMA.
+ */
+ uma_zone_reserve(vmem_bt_zone, BT_MAXALLOC * (mp_ncpus + 1) / 2);
+ uma_zone_set_allocf(vmem_bt_zone, vmem_bt_alloc);
+#endif
}
/* ---- rehash */
@@ -661,15 +756,15 @@ vmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, int type)
btspan->bt_type = type;
btspan->bt_start = addr;
btspan->bt_size = size;
+ bt_insseg_tail(vm, btspan);
btfree = bt_alloc(vm);
btfree->bt_type = BT_TYPE_FREE;
btfree->bt_start = addr;
btfree->bt_size = size;
-
- bt_insseg_tail(vm, btspan);
bt_insseg(vm, btfree, btspan);
bt_insfree(vm, btfree);
+
vm->vm_size += size;
}
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 205e9b3..93fb27d 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -856,7 +856,7 @@ bufinit(void)
bogus_page = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ |
VM_ALLOC_NORMAL | VM_ALLOC_WIRED);
- unmapped_buf = (caddr_t)kmem_alloc_nofault(kernel_map, MAXPHYS);
+ unmapped_buf = (caddr_t)kva_alloc(MAXPHYS);
}
#ifdef INVARIANTS
OpenPOWER on IntegriCloud