diff options
author | alc <alc@FreeBSD.org> | 2005-12-16 18:34:14 +0000 |
---|---|---|
committer | alc <alc@FreeBSD.org> | 2005-12-16 18:34:14 +0000 |
commit | f69d4d5fa8d8f5a3f56bf9f07a20dbf05bdeeca4 (patch) | |
tree | aa11bb9f1f197a97830817243044264d50c17670 | |
parent | 4ea00e0984a3a4d6fcce90be6d9b56a67e0f8ad6 (diff) | |
download | FreeBSD-src-f69d4d5fa8d8f5a3f56bf9f07a20dbf05bdeeca4.zip FreeBSD-src-f69d4d5fa8d8f5a3f56bf9f07a20dbf05bdeeca4.tar.gz |
Use sf_buf_alloc() instead of vm_map_find() on exec_map to create the
ephemeral mappings that are used as the source for three copy
operations from kernel space to user space. There are two reasons for
making this change: (1) Under heavy load exec_map can fill up causing
vm_map_find() to fail. When it fails, the nascent process is aborted
(SIGABRT). Whereas, this reimplementation using sf_buf_alloc()
sleeps. (2) Although it is possible to sleep on vm_map_find()'s
failure until address space becomes available (see kmem_alloc_wait()),
using sf_buf_alloc() is faster. Furthermore, the reimplementation
uses a CPU private mapping, avoiding a TLB shootdown on
multiprocessors.
Problem uncovered by: kris@
Reviewed by: tegge@
MFC after: 3 weeks
-rw-r--r-- | sys/kern/imgact_elf.c | 90 | ||||
-rw-r--r-- | sys/vm/vm_extern.h | 2 | ||||
-rw-r--r-- | sys/vm/vm_glue.c | 72 |
3 files changed, 103 insertions, 61 deletions
diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index 1901591..5356837 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include <sys/proc.h> #include <sys/procfs.h> #include <sys/resourcevar.h> +#include <sys/sf_buf.h> #include <sys/systm.h> #include <sys/signalvar.h> #include <sys/stat.h> @@ -239,9 +240,9 @@ __elfN(map_partial)(vm_map_t map, vm_object_t object, vm_ooffset_t offset, vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max) { - int error, rv; + struct sf_buf *sf; + int error; vm_offset_t off; - vm_offset_t data_buf = 0; /* * Create the page if it doesn't exist yet. Ignore errors. @@ -255,25 +256,13 @@ __elfN(map_partial)(vm_map_t map, vm_object_t object, vm_ooffset_t offset, * Find the page from the underlying object. */ if (object) { - vm_object_reference(object); - rv = vm_map_find(exec_map, - object, - trunc_page(offset), - &data_buf, - PAGE_SIZE, - TRUE, - VM_PROT_READ, - VM_PROT_ALL, - MAP_COPY_ON_WRITE | MAP_PREFAULT_PARTIAL); - if (rv != KERN_SUCCESS) { - vm_object_deallocate(object); - return (rv); - } - + sf = vm_imgact_map_page(object, offset); + if (sf == NULL) + return (KERN_FAILURE); off = offset - trunc_page(offset); - error = copyout((caddr_t)data_buf + off, (caddr_t)start, + error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start, end - start); - vm_map_remove(exec_map, data_buf, data_buf + PAGE_SIZE); + vm_imgact_unmap_page(sf); if (error) { return (KERN_FAILURE); } @@ -287,7 +276,8 @@ __elfN(map_insert)(vm_map_t map, vm_object_t object, vm_ooffset_t offset, vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max, int cow) { - vm_offset_t data_buf, off; + struct sf_buf *sf; + vm_offset_t off; vm_size_t sz; int error, rv; @@ -316,35 +306,23 @@ __elfN(map_insert)(vm_map_t map, vm_object_t object, vm_ooffset_t offset, FALSE, prot, max, 0); if (rv) return (rv); - data_buf = 0; - while (start < end) { - vm_object_reference(object); - rv = vm_map_find(exec_map, - object, - trunc_page(offset), - &data_buf, - 2 * PAGE_SIZE, - TRUE, - VM_PROT_READ, - VM_PROT_ALL, - (MAP_COPY_ON_WRITE - | MAP_PREFAULT_PARTIAL)); - if (rv != KERN_SUCCESS) { - vm_object_deallocate(object); - return (rv); - } + if (object == NULL) + return (KERN_SUCCESS); + for (; start < end; start += sz) { + sf = vm_imgact_map_page(object, offset); + if (sf == NULL) + return (KERN_FAILURE); off = offset - trunc_page(offset); sz = end - start; - if (sz > PAGE_SIZE) - sz = PAGE_SIZE; - error = copyout((caddr_t)data_buf + off, + if (sz > PAGE_SIZE - off) + sz = PAGE_SIZE - off; + error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start, sz); - vm_map_remove(exec_map, data_buf, - data_buf + 2 * PAGE_SIZE); + vm_imgact_unmap_page(sf); if (error) { return (KERN_FAILURE); } - start += sz; + offset += sz; } rv = KERN_SUCCESS; } else { @@ -365,12 +343,12 @@ __elfN(load_section)(struct proc *p, struct vmspace *vmspace, caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot, size_t pagesize) { + struct sf_buf *sf; size_t map_len; vm_offset_t map_addr; int error, rv, cow; size_t copy_len; vm_offset_t file_addr; - vm_offset_t data_buf = 0; error = 0; @@ -455,27 +433,17 @@ __elfN(load_section)(struct proc *p, struct vmspace *vmspace, if (copy_len != 0) { vm_offset_t off; - vm_object_reference(object); - rv = vm_map_find(exec_map, - object, - trunc_page(offset + filsz), - &data_buf, - PAGE_SIZE, - TRUE, - VM_PROT_READ, - VM_PROT_ALL, - MAP_COPY_ON_WRITE | MAP_PREFAULT_PARTIAL); - if (rv != KERN_SUCCESS) { - vm_object_deallocate(object); - return (EINVAL); - } + + sf = vm_imgact_map_page(object, offset + filsz); + if (sf == NULL) + return (EIO); /* send the page fragment to user space */ off = trunc_page_ps(offset + filsz, pagesize) - trunc_page(offset + filsz); - error = copyout((caddr_t)data_buf + off, (caddr_t)map_addr, - copy_len); - vm_map_remove(exec_map, data_buf, data_buf + PAGE_SIZE); + error = copyout((caddr_t)sf_buf_kva(sf) + off, + (caddr_t)map_addr, copy_len); + vm_imgact_unmap_page(sf); if (error) { return (error); } diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h index 4fc8275..08dba00 100644 --- a/sys/vm/vm_extern.h +++ b/sys/vm/vm_extern.h @@ -86,6 +86,8 @@ void vsunlock(void *, size_t); void vm_object_print(/* db_expr_t */ long, boolean_t, /* db_expr_t */ long, char *); int vm_fault_quick(caddr_t v, int prot); +struct sf_buf *vm_imgact_map_page(vm_object_t object, vm_ooffset_t offset); +void vm_imgact_unmap_page(struct sf_buf *sf); void vm_thread_dispose(struct thread *td); void vm_thread_dispose_altkstack(struct thread *td); void vm_thread_new(struct thread *td, int pages); diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index 9deb363..3843ecd 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -70,6 +70,8 @@ __FBSDID("$FreeBSD$"); #include <sys/mutex.h> #include <sys/proc.h> #include <sys/resourcevar.h> +#include <sys/sched.h> +#include <sys/sf_buf.h> #include <sys/shm.h> #include <sys/vmmeter.h> #include <sys/sx.h> @@ -239,6 +241,76 @@ vsunlock(void *addr, size_t len) VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES); } +/* + * Pin the page contained within the given object at the given offset. If the + * page is not resident, allocate and load it using the given object's pager. + * Return the pinned page if successful; otherwise, return NULL. + */ +static vm_page_t +vm_imgact_hold_page(vm_object_t object, vm_ooffset_t offset) +{ + vm_page_t m, ma[1]; + vm_pindex_t pindex; + int rv; + + VM_OBJECT_LOCK(object); + pindex = OFF_TO_IDX(offset); + m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); + if ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL) { + ma[0] = m; + rv = vm_pager_get_pages(object, ma, 1, 0); + m = vm_page_lookup(object, pindex); + if (m == NULL) + goto out; + if (m->valid == 0 || rv != VM_PAGER_OK) { + vm_page_lock_queues(); + vm_page_free(m); + vm_page_unlock_queues(); + m = NULL; + goto out; + } + } + vm_page_lock_queues(); + vm_page_hold(m); + vm_page_wakeup(m); + vm_page_unlock_queues(); +out: + VM_OBJECT_UNLOCK(object); + return (m); +} + +/* + * Return a CPU private mapping to the page at the given offset within the + * given object. The page is pinned before it is mapped. + */ +struct sf_buf * +vm_imgact_map_page(vm_object_t object, vm_ooffset_t offset) +{ + vm_page_t m; + + m = vm_imgact_hold_page(object, offset); + if (m == NULL) + return (NULL); + sched_pin(); + return (sf_buf_alloc(m, SFB_CPUPRIVATE)); +} + +/* + * Destroy the given CPU private mapping and unpin the page that it mapped. + */ +void +vm_imgact_unmap_page(struct sf_buf *sf) +{ + vm_page_t m; + + m = sf_buf_page(sf); + sf_buf_free(sf); + sched_unpin(); + vm_page_lock_queues(); + vm_page_unhold(m); + vm_page_unlock_queues(); +} + #ifndef KSTACK_MAX_PAGES #define KSTACK_MAX_PAGES 32 #endif |