summaryrefslogtreecommitdiffstats
path: root/sys/kern/sys_process.c
diff options
context:
space:
mode:
authoralc <alc@FreeBSD.org>2010-12-20 22:49:31 +0000
committeralc <alc@FreeBSD.org>2010-12-20 22:49:31 +0000
commitbe5201b0d1c733f2bfb8fa4bef0225259f9acb2c (patch)
treef6dd1a0aa7e3213a5adcdd9bdd3123f868ee9dea /sys/kern/sys_process.c
parent04f9658f5ccf72df9c57e4e218f11574121fc70f (diff)
downloadFreeBSD-src-be5201b0d1c733f2bfb8fa4bef0225259f9acb2c.zip
FreeBSD-src-be5201b0d1c733f2bfb8fa4bef0225259f9acb2c.tar.gz
Introduce vm_fault_hold() and use it to (1) eliminate a long-standing race
condition in proc_rwmem() and to (2) simplify the implementation of the cxgb driver's vm_fault_hold_user_pages(). Specifically, in proc_rwmem() the requested read or write could fail because the targeted page could be reclaimed between the calls to vm_fault() and vm_page_hold(). In collaboration with: kib@ MFC after: 6 weeks
Diffstat (limited to 'sys/kern/sys_process.c')
-rw-r--r--sys/kern/sys_process.c80
1 files changed, 17 insertions, 63 deletions
diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c
index e5d9842..3733de3 100644
--- a/sys/kern/sys_process.c
+++ b/sys/kern/sys_process.c
@@ -237,10 +237,9 @@ int
proc_rwmem(struct proc *p, struct uio *uio)
{
vm_map_t map;
- vm_object_t backing_object, object;
vm_offset_t pageno; /* page number */
vm_prot_t reqprot;
- int error, writing;
+ int error, fault_flags, page_offset, writing;
/*
* Assert that someone has locked this vmspace. (Should be
@@ -255,26 +254,24 @@ proc_rwmem(struct proc *p, struct uio *uio)
*/
map = &p->p_vmspace->vm_map;
+ /*
+ * If we are writing, then we request vm_fault() to create a private
+ * copy of each page. Since these copies will not be writeable by the
+ * process, we must explicity request that they be dirtied.
+ */
writing = uio->uio_rw == UIO_WRITE;
reqprot = writing ? VM_PROT_COPY | VM_PROT_READ : VM_PROT_READ;
+ fault_flags = writing ? VM_FAULT_DIRTY : VM_FAULT_NORMAL;
/*
* Only map in one page at a time. We don't have to, but it
* makes things easier. This way is trivial - right?
*/
do {
- vm_map_t tmap;
vm_offset_t uva;
- int page_offset; /* offset into page */
- vm_map_entry_t out_entry;
- vm_prot_t out_prot;
- boolean_t wired;
- vm_pindex_t pindex;
u_int len;
vm_page_t m;
- object = NULL;
-
uva = (vm_offset_t)uio->uio_offset;
/*
@@ -289,10 +286,10 @@ proc_rwmem(struct proc *p, struct uio *uio)
len = min(PAGE_SIZE - page_offset, uio->uio_resid);
/*
- * Fault the page on behalf of the process
+ * Fault and hold the page on behalf of the process.
*/
- error = vm_fault(map, pageno, reqprot, VM_FAULT_NORMAL);
- if (error) {
+ error = vm_fault_hold(map, pageno, reqprot, fault_flags, &m);
+ if (error != KERN_SUCCESS) {
if (error == KERN_RESOURCE_SHORTAGE)
error = ENOMEM;
else
@@ -301,61 +298,18 @@ proc_rwmem(struct proc *p, struct uio *uio)
}
/*
- * Now we need to get the page. out_entry and wired
- * aren't used. One would think the vm code
- * would be a *bit* nicer... We use tmap because
- * vm_map_lookup() can change the map argument.
- */
- tmap = map;
- error = vm_map_lookup(&tmap, pageno, reqprot, &out_entry,
- &object, &pindex, &out_prot, &wired);
- if (error) {
- error = EFAULT;
- break;
- }
- VM_OBJECT_LOCK(object);
- while ((m = vm_page_lookup(object, pindex)) == NULL &&
- !writing &&
- (backing_object = object->backing_object) != NULL) {
- /*
- * Allow fallback to backing objects if we are reading.
- */
- VM_OBJECT_LOCK(backing_object);
- pindex += OFF_TO_IDX(object->backing_object_offset);
- VM_OBJECT_UNLOCK(object);
- object = backing_object;
- }
- if (writing && m != NULL) {
- vm_page_dirty(m);
- vm_pager_page_unswapped(m);
- }
- VM_OBJECT_UNLOCK(object);
- if (m == NULL) {
- vm_map_lookup_done(tmap, out_entry);
- error = EFAULT;
- break;
- }
-
- /*
- * Hold the page in memory.
- */
- vm_page_lock(m);
- vm_page_hold(m);
- vm_page_unlock(m);
-
- /*
- * We're done with tmap now.
- */
- vm_map_lookup_done(tmap, out_entry);
-
- /*
* Now do the i/o move.
*/
error = uiomove_fromphys(&m, page_offset, len, uio);
/* Make the I-cache coherent for breakpoints. */
- if (!error && writing && (out_prot & VM_PROT_EXECUTE))
- vm_sync_icache(map, uva, len);
+ if (writing && error == 0) {
+ vm_map_lock_read(map);
+ if (vm_map_check_protection(map, pageno, pageno +
+ PAGE_SIZE, VM_PROT_EXECUTE))
+ vm_sync_icache(map, uva, len);
+ vm_map_unlock_read(map);
+ }
/*
* Release the page.
OpenPOWER on IntegriCloud