summaryrefslogtreecommitdiffstats
path: root/sys/vm
diff options
context:
space:
mode:
authorattilio <attilio@FreeBSD.org>2013-08-09 11:11:11 +0000
committerattilio <attilio@FreeBSD.org>2013-08-09 11:11:11 +0000
commit16c7563cf4ffce21e633bfb33a25ab16072fc75e (patch)
treee2e9382aea2fd37ac6a9eec19380892618e99f37 /sys/vm
parentfcb31f05a939358e510816e46ff7729b2ffd94da (diff)
downloadFreeBSD-src-16c7563cf4ffce21e633bfb33a25ab16072fc75e.zip
FreeBSD-src-16c7563cf4ffce21e633bfb33a25ab16072fc75e.tar.gz
The soft and hard busy mechanism rely on the vm object lock to work.
Unify the 2 concept into a real, minimal, sxlock where the shared acquisition represent the soft busy and the exclusive acquisition represent the hard busy. The old VPO_WANTED mechanism becames the hard-path for this new lock and it becomes per-page rather than per-object. The vm_object lock becames an interlock for this functionality: it can be held in both read or write mode. However, if the vm_object lock is held in read mode while acquiring or releasing the busy state, the thread owner cannot make any assumption on the busy state unless it is also busying it. Also: - Add a new flag to directly shared busy pages while vm_page_alloc and vm_page_grab are being executed. This will be very helpful once these functions happen under a read object lock. - Move the swapping sleep into its own per-object flag The KPI is heavilly changed this is why the version is bumped. It is very likely that some VM ports users will need to change their own code. Sponsored by: EMC / Isilon storage division Discussed with: alc Reviewed by: jeff, kib Tested by: gavin, bapt (older version) Tested by: pho, scottl
Diffstat (limited to 'sys/vm')
-rw-r--r--sys/vm/phys_pager.c9
-rw-r--r--sys/vm/swap_pager.c40
-rw-r--r--sys/vm/vm_fault.c46
-rw-r--r--sys/vm/vm_glue.c20
-rw-r--r--sys/vm/vm_object.c48
-rw-r--r--sys/vm/vm_page.c332
-rw-r--r--sys/vm/vm_page.h117
-rw-r--r--sys/vm/vm_pageout.c36
-rw-r--r--sys/vm/vm_phys.c3
-rw-r--r--sys/vm/vnode_pager.c3
10 files changed, 421 insertions, 233 deletions
diff --git a/sys/vm/phys_pager.c b/sys/vm/phys_pager.c
index 7b9f7b2..9e98006 100644
--- a/sys/vm/phys_pager.c
+++ b/sys/vm/phys_pager.c
@@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <vm/vm.h>
+#include <vm/vm_param.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pager.h>
@@ -152,10 +153,12 @@ phys_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage)
KASSERT(m[i]->dirty == 0,
("phys_pager_getpages: dirty page %p", m[i]));
/* The requested page must remain busy, the others not. */
- if (i == reqpage)
+ if (i == reqpage) {
+ vm_page_lock(m[i]);
vm_page_flash(m[i]);
- else
- vm_page_wakeup(m[i]);
+ vm_page_unlock(m[i]);
+ } else
+ vm_page_xunbusy(m[i]);
}
return (VM_PAGER_OK);
}
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index 319811a..f7ba736 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -1219,9 +1219,10 @@ swap_pager_getpages(vm_object_t object, vm_page_t *m, int count, int reqpage)
*/
VM_OBJECT_WLOCK(object);
while ((mreq->oflags & VPO_SWAPINPROG) != 0) {
- mreq->oflags |= VPO_WANTED;
+ mreq->oflags |= VPO_SWAPSLEEP;
PCPU_INC(cnt.v_intrans);
- if (VM_OBJECT_SLEEP(object, mreq, PSWP, "swread", hz * 20)) {
+ if (VM_OBJECT_SLEEP(object, &object->paging_in_progress, PSWP,
+ "swread", hz * 20)) {
printf(
"swap_pager: indefinite wait buffer: bufobj: %p, blkno: %jd, size: %ld\n",
bp->b_bufobj, (intmax_t)bp->b_blkno, bp->b_bcount);
@@ -1459,12 +1460,6 @@ swap_pager_putpages(vm_object_t object, vm_page_t *m, int count,
* Completion routine for asynchronous reads and writes from/to swap.
* Also called manually by synchronous code to finish up a bp.
*
- * For READ operations, the pages are VPO_BUSY'd. For WRITE operations,
- * the pages are vm_page_t->busy'd. For READ operations, we VPO_BUSY
- * unbusy all pages except the 'main' request page. For WRITE
- * operations, we vm_page_t->busy'd unbusy all pages ( we can do this
- * because we marked them all VM_PAGER_PEND on return from putpages ).
- *
* This routine may not sleep.
*/
static void
@@ -1514,6 +1509,10 @@ swp_pager_async_iodone(struct buf *bp)
vm_page_t m = bp->b_pages[i];
m->oflags &= ~VPO_SWAPINPROG;
+ if (m->oflags & VPO_SWAPSLEEP) {
+ m->oflags &= ~VPO_SWAPSLEEP;
+ wakeup(&object->paging_in_progress);
+ }
if (bp->b_ioflags & BIO_ERROR) {
/*
@@ -1542,8 +1541,11 @@ swp_pager_async_iodone(struct buf *bp)
m->valid = 0;
if (i != bp->b_pager.pg_reqpage)
swp_pager_free_nrpage(m);
- else
+ else {
+ vm_page_lock(m);
vm_page_flash(m);
+ vm_page_unlock(m);
+ }
/*
* If i == bp->b_pager.pg_reqpage, do not wake
* the page up. The caller needs to.
@@ -1558,7 +1560,7 @@ swp_pager_async_iodone(struct buf *bp)
vm_page_lock(m);
vm_page_activate(m);
vm_page_unlock(m);
- vm_page_io_finish(m);
+ vm_page_sunbusy(m);
}
} else if (bp->b_iocmd == BIO_READ) {
/*
@@ -1575,7 +1577,7 @@ swp_pager_async_iodone(struct buf *bp)
* Note that the requested page, reqpage, is left
* busied, but we still have to wake it up. The
* other pages are released (unbusied) by
- * vm_page_wakeup().
+ * vm_page_xunbusy().
*/
KASSERT(!pmap_page_is_mapped(m),
("swp_pager_async_iodone: page %p is mapped", m));
@@ -1595,9 +1597,12 @@ swp_pager_async_iodone(struct buf *bp)
vm_page_lock(m);
vm_page_deactivate(m);
vm_page_unlock(m);
- vm_page_wakeup(m);
- } else
+ vm_page_xunbusy(m);
+ } else {
+ vm_page_lock(m);
vm_page_flash(m);
+ vm_page_unlock(m);
+ }
} else {
/*
* For write success, clear the dirty
@@ -1608,7 +1613,7 @@ swp_pager_async_iodone(struct buf *bp)
("swp_pager_async_iodone: page %p is not write"
" protected", m));
vm_page_undirty(m);
- vm_page_io_finish(m);
+ vm_page_sunbusy(m);
if (vm_page_count_severe()) {
vm_page_lock(m);
vm_page_try_to_cache(m);
@@ -1706,19 +1711,18 @@ swp_pager_force_pagein(vm_object_t object, vm_pindex_t pindex)
vm_page_t m;
vm_object_pip_add(object, 1);
- m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_RETRY |
- VM_ALLOC_NOBUSY);
+ m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
if (m->valid == VM_PAGE_BITS_ALL) {
vm_object_pip_subtract(object, 1);
vm_page_dirty(m);
vm_page_lock(m);
vm_page_activate(m);
vm_page_unlock(m);
+ vm_page_xunbusy(m);
vm_pager_page_unswapped(m);
return;
}
- vm_page_busy(m);
if (swap_pager_getpages(object, &m, 1, 0) != VM_PAGER_OK)
panic("swap_pager_force_pagein: read from swap failed");/*XXX*/
vm_object_pip_subtract(object, 1);
@@ -1726,7 +1730,7 @@ swp_pager_force_pagein(vm_object_t object, vm_pindex_t pindex)
vm_page_lock(m);
vm_page_deactivate(m);
vm_page_unlock(m);
- vm_page_wakeup(m);
+ vm_page_xunbusy(m);
vm_pager_page_unswapped(m);
}
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index f7f1889..341932a 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -141,7 +141,7 @@ static inline void
release_page(struct faultstate *fs)
{
- vm_page_wakeup(fs->m);
+ vm_page_xunbusy(fs->m);
vm_page_lock(fs->m);
vm_page_deactivate(fs->m);
vm_page_unlock(fs->m);
@@ -353,21 +353,21 @@ RetryFault:;
/*
* Wait/Retry if the page is busy. We have to do this
- * if the page is busy via either VPO_BUSY or
- * vm_page_t->busy because the vm_pager may be using
- * vm_page_t->busy for pageouts ( and even pageins if
- * it is the vnode pager ), and we could end up trying
- * to pagein and pageout the same page simultaneously.
+ * if the page is either exclusive or shared busy
+ * because the vm_pager may be using read busy for
+ * pageouts (and even pageins if it is the vnode
+ * pager), and we could end up trying to pagein and
+ * pageout the same page simultaneously.
*
* We can theoretically allow the busy case on a read
* fault if the page is marked valid, but since such
* pages are typically already pmap'd, putting that
* special case in might be more effort then it is
* worth. We cannot under any circumstances mess
- * around with a vm_page_t->busy page except, perhaps,
+ * around with a shared busied page except, perhaps,
* to pmap it.
*/
- if ((fs.m->oflags & VPO_BUSY) || fs.m->busy) {
+ if (vm_page_busied(fs.m)) {
/*
* Reference the page before unlocking and
* sleeping so that the page daemon is less
@@ -392,8 +392,7 @@ RetryFault:;
unlock_map(&fs);
if (fs.m == vm_page_lookup(fs.object,
fs.pindex)) {
- vm_page_sleep_if_busy(fs.m, TRUE,
- "vmpfw");
+ vm_page_sleep_if_busy(fs.m, "vmpfw");
}
vm_object_pip_wakeup(fs.object);
VM_OBJECT_WUNLOCK(fs.object);
@@ -410,7 +409,7 @@ RetryFault:;
* (readable), jump to readrest, else break-out ( we
* found the page ).
*/
- vm_page_busy(fs.m);
+ vm_page_xbusy(fs.m);
if (fs.m->valid != VM_PAGE_BITS_ALL)
goto readrest;
break;
@@ -516,7 +515,7 @@ readrest:
/*
* Call the pager to retrieve the data, if any, after
* releasing the lock on the map. We hold a ref on
- * fs.object and the pages are VPO_BUSY'd.
+ * fs.object and the pages are exclusive busied.
*/
unlock_map(&fs);
@@ -565,7 +564,7 @@ vnode_locked:
* return value is the index into the marray for the
* vm_page_t passed to the routine.
*
- * fs.m plus the additional pages are VPO_BUSY'd.
+ * fs.m plus the additional pages are exclusive busied.
*/
faultcount = vm_fault_additional_pages(
fs.m, behind, ahead, marray, &reqpage);
@@ -691,8 +690,7 @@ vnode_locked:
}
}
- KASSERT((fs.m->oflags & VPO_BUSY) != 0,
- ("vm_fault: not busy after main loop"));
+ vm_page_assert_xbusied(fs.m);
/*
* PAGE HAS BEEN FOUND. [Loop invariant still holds -- the object lock
@@ -757,7 +755,7 @@ vnode_locked:
vm_page_lock(fs.m);
vm_page_rename(fs.m, fs.first_object, fs.first_pindex);
vm_page_unlock(fs.m);
- vm_page_busy(fs.m);
+ vm_page_xbusy(fs.m);
fs.first_m = fs.m;
fs.m = NULL;
PCPU_INC(cnt.v_cow_optim);
@@ -905,11 +903,8 @@ vnode_locked:
}
}
- /*
- * Page had better still be busy
- */
- KASSERT(fs.m->oflags & VPO_BUSY,
- ("vm_fault: page %p not busy!", fs.m));
+ vm_page_assert_xbusied(fs.m);
+
/*
* Page must be completely valid or it is not fit to
* map into user space. vm_pager_get_pages() ensures this.
@@ -946,7 +941,7 @@ vnode_locked:
vm_page_hold(fs.m);
}
vm_page_unlock(fs.m);
- vm_page_wakeup(fs.m);
+ vm_page_xunbusy(fs.m);
/*
* Unlock everything, and return
@@ -991,13 +986,12 @@ vm_fault_cache_behind(const struct faultstate *fs, int distance)
if (pindex < OFF_TO_IDX(fs->entry->offset))
pindex = OFF_TO_IDX(fs->entry->offset);
m = first_object != object ? fs->first_m : fs->m;
- KASSERT((m->oflags & VPO_BUSY) != 0,
- ("vm_fault_cache_behind: page %p is not busy", m));
+ vm_page_assert_xbusied(m);
m_prev = vm_page_prev(m);
while ((m = m_prev) != NULL && m->pindex >= pindex &&
m->valid == VM_PAGE_BITS_ALL) {
m_prev = vm_page_prev(m);
- if (m->busy != 0 || (m->oflags & VPO_BUSY) != 0)
+ if (vm_page_busied(m))
continue;
vm_page_lock(m);
if (m->hold_count == 0 && m->wire_count == 0) {
@@ -1378,7 +1372,7 @@ vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map,
vm_page_activate(dst_m);
vm_page_unlock(dst_m);
}
- vm_page_wakeup(dst_m);
+ vm_page_xunbusy(dst_m);
}
VM_OBJECT_WUNLOCK(dst_object);
if (upgrade) {
diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c
index 94e07f9..2d4186c 100644
--- a/sys/vm/vm_glue.c
+++ b/sys/vm/vm_glue.c
@@ -233,10 +233,8 @@ vm_imgact_hold_page(vm_object_t object, vm_ooffset_t offset)
VM_OBJECT_WLOCK(object);
pindex = OFF_TO_IDX(offset);
- m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_RETRY |
- VM_ALLOC_NOBUSY);
+ m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
if (m->valid != VM_PAGE_BITS_ALL) {
- vm_page_busy(m);
ma[0] = m;
rv = vm_pager_get_pages(object, ma, 1, 0);
m = vm_page_lookup(object, pindex);
@@ -249,8 +247,8 @@ vm_imgact_hold_page(vm_object_t object, vm_ooffset_t offset)
m = NULL;
goto out;
}
- vm_page_wakeup(m);
}
+ vm_page_xunbusy(m);
vm_page_lock(m);
vm_page_hold(m);
vm_page_unlock(m);
@@ -533,13 +531,11 @@ vm_thread_swapin(struct thread *td)
VM_ALLOC_WIRED);
for (i = 0; i < pages; i++) {
if (ma[i]->valid != VM_PAGE_BITS_ALL) {
- KASSERT(ma[i]->oflags & VPO_BUSY,
- ("lost busy 1"));
+ vm_page_assert_xbusied(ma[i]);
vm_object_pip_add(ksobj, 1);
for (j = i + 1; j < pages; j++) {
- KASSERT(ma[j]->valid == VM_PAGE_BITS_ALL ||
- (ma[j]->oflags & VPO_BUSY),
- ("lost busy 2"));
+ if (ma[j]->valid != VM_PAGE_BITS_ALL)
+ vm_page_assert_xbusied(ma[j]);
if (ma[j]->valid == VM_PAGE_BITS_ALL)
break;
}
@@ -550,9 +546,9 @@ vm_thread_swapin(struct thread *td)
vm_object_pip_wakeup(ksobj);
for (k = i; k < j; k++)
ma[k] = vm_page_lookup(ksobj, k);
- vm_page_wakeup(ma[i]);
- } else if (ma[i]->oflags & VPO_BUSY)
- vm_page_wakeup(ma[i]);
+ vm_page_xunbusy(ma[i]);
+ } else if (vm_page_xbusied(ma[i]))
+ vm_page_xunbusy(ma[i]);
}
VM_OBJECT_WUNLOCK(ksobj);
pmap_qenter(td->td_kstack, ma, pages);
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index 7c3cad4..74e580f 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -744,8 +744,7 @@ vm_object_terminate(vm_object_t object)
* the object, the page and object are reset to any empty state.
*/
TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) {
- KASSERT(!p->busy && (p->oflags & VPO_BUSY) == 0,
- ("vm_object_terminate: freeing busy page %p", p));
+ vm_page_assert_unbusied(p);
vm_page_lock(p);
/*
* Optimize the page's removal from the object by resetting
@@ -871,7 +870,7 @@ rescan:
np = TAILQ_NEXT(p, listq);
if (p->valid == 0)
continue;
- if (vm_page_sleep_if_busy(p, TRUE, "vpcwai")) {
+ if (vm_page_sleep_if_busy(p, "vpcwai")) {
if (object->generation != curgeneration) {
if ((flags & OBJPC_SYNC) != 0)
goto rescan;
@@ -939,7 +938,7 @@ vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int pagerflags,
for (tp = p; count < vm_pageout_page_count; count++) {
tp = vm_page_next(tp);
- if (tp == NULL || tp->busy != 0 || (tp->oflags & VPO_BUSY) != 0)
+ if (tp == NULL || vm_page_busied(tp))
break;
if (!vm_object_page_remove_write(tp, flags, clearobjflags))
break;
@@ -947,7 +946,7 @@ vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int pagerflags,
for (p_first = p; count < vm_pageout_page_count; count++) {
tp = vm_page_prev(p_first);
- if (tp == NULL || tp->busy != 0 || (tp->oflags & VPO_BUSY) != 0)
+ if (tp == NULL || vm_page_busied(tp))
break;
if (!vm_object_page_remove_write(tp, flags, clearobjflags))
break;
@@ -1156,7 +1155,7 @@ shadowlookup:
("vm_object_madvise: page %p is fictitious", m));
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("vm_object_madvise: page %p is not managed", m));
- if ((m->oflags & VPO_BUSY) || m->busy) {
+ if (vm_page_busied(m)) {
if (advise == MADV_WILLNEED) {
/*
* Reference the page before unlocking and
@@ -1165,11 +1164,10 @@ shadowlookup:
*/
vm_page_aflag_set(m, PGA_REFERENCED);
}
- vm_page_unlock(m);
if (object != tobject)
VM_OBJECT_WUNLOCK(object);
- m->oflags |= VPO_WANTED;
- VM_OBJECT_SLEEP(tobject, m, PDROP | PVM, "madvpo", 0);
+ VM_OBJECT_WUNLOCK(tobject);
+ vm_page_busy_sleep(m, "madvpo");
VM_OBJECT_WLOCK(object);
goto relookup;
}
@@ -1344,10 +1342,12 @@ retry:
* We do not have to VM_PROT_NONE the page as mappings should
* not be changed by this operation.
*/
- if ((m->oflags & VPO_BUSY) || m->busy) {
+ if (vm_page_busied(m)) {
VM_OBJECT_WUNLOCK(new_object);
- m->oflags |= VPO_WANTED;
- VM_OBJECT_SLEEP(orig_object, m, PVM, "spltwt", 0);
+ vm_page_lock(m);
+ VM_OBJECT_WUNLOCK(orig_object);
+ vm_page_busy_sleep(m, "spltwt");
+ VM_OBJECT_WLOCK(orig_object);
VM_OBJECT_WLOCK(new_object);
goto retry;
}
@@ -1371,7 +1371,7 @@ retry:
vm_page_unlock(m);
/* page automatically made dirty by rename and cache handled */
if (orig_object->type == OBJT_SWAP)
- vm_page_busy(m);
+ vm_page_xbusy(m);
}
if (orig_object->type == OBJT_SWAP) {
/*
@@ -1380,7 +1380,7 @@ retry:
*/
swap_pager_copy(orig_object, new_object, offidxstart, 0);
TAILQ_FOREACH(m, &new_object->memq, listq)
- vm_page_wakeup(m);
+ vm_page_xunbusy(m);
/*
* Transfer any cached pages from orig_object to new_object.
@@ -1496,18 +1496,16 @@ vm_object_backing_scan(vm_object_t object, int op)
vm_page_t pp;
if (op & OBSC_COLLAPSE_NOWAIT) {
- if ((p->oflags & VPO_BUSY) ||
- !p->valid ||
- p->busy) {
+ if (!p->valid || vm_page_busied(p)) {
p = next;
continue;
}
} else if (op & OBSC_COLLAPSE_WAIT) {
- if ((p->oflags & VPO_BUSY) || p->busy) {
+ if (vm_page_busied(p)) {
VM_OBJECT_WUNLOCK(object);
- p->oflags |= VPO_WANTED;
- VM_OBJECT_SLEEP(backing_object, p,
- PDROP | PVM, "vmocol", 0);
+ vm_page_lock(p);
+ VM_OBJECT_WUNLOCK(backing_object);
+ vm_page_busy_sleep(p, "vmocol");
VM_OBJECT_WLOCK(object);
VM_OBJECT_WLOCK(backing_object);
/*
@@ -1905,8 +1903,12 @@ again:
}
goto next;
}
- if (vm_page_sleep_if_busy(p, TRUE, "vmopar"))
+ if (vm_page_busied(p)) {
+ VM_OBJECT_WUNLOCK(object);
+ vm_page_busy_sleep(p, "vmopar");
+ VM_OBJECT_WLOCK(object);
goto again;
+ }
KASSERT((p->flags & PG_FICTITIOUS) == 0,
("vm_object_page_remove: page %p is fictitious", p));
if ((options & OBJPR_CLEANONLY) != 0 && p->valid != 0) {
@@ -2033,7 +2035,7 @@ vm_object_populate(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
if (pindex > start) {
m = vm_page_lookup(object, start);
while (m != NULL && m->pindex < pindex) {
- vm_page_wakeup(m);
+ vm_page_xunbusy(m);
m = TAILQ_NEXT(m, listq);
}
}
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 36689f6..6fccf45 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -483,66 +483,170 @@ vm_page_reference(vm_page_t m)
vm_page_aflag_set(m, PGA_REFERENCED);
}
+/*
+ * vm_page_busy_downgrade:
+ *
+ * Downgrade an exclusive busy page into a single shared busy page.
+ */
void
-vm_page_busy(vm_page_t m)
+vm_page_busy_downgrade(vm_page_t m)
{
+ u_int x;
- VM_OBJECT_ASSERT_WLOCKED(m->object);
- KASSERT((m->oflags & VPO_BUSY) == 0,
- ("vm_page_busy: page already busy!!!"));
- m->oflags |= VPO_BUSY;
+ vm_page_assert_xbusied(m);
+
+ for (;;) {
+ x = m->busy_lock;
+ x &= VPB_BIT_WAITERS;
+ if (atomic_cmpset_rel_int(&m->busy_lock,
+ VPB_SINGLE_EXCLUSIVER | x, VPB_SHARERS_WORD(1) | x))
+ break;
+ }
+}
+
+/*
+ * vm_page_sbusied:
+ *
+ * Return a positive value if the page is shared busied, 0 otherwise.
+ */
+int
+vm_page_sbusied(vm_page_t m)
+{
+ u_int x;
+
+ x = m->busy_lock;
+ return ((x & VPB_BIT_SHARED) != 0 && x != VPB_UNBUSIED);
}
/*
- * vm_page_flash:
+ * vm_page_sunbusy:
*
- * wakeup anyone waiting for the page.
+ * Shared unbusy a page.
*/
void
-vm_page_flash(vm_page_t m)
+vm_page_sunbusy(vm_page_t m)
{
+ u_int x;
- VM_OBJECT_ASSERT_WLOCKED(m->object);
- if (m->oflags & VPO_WANTED) {
- m->oflags &= ~VPO_WANTED;
+ vm_page_assert_sbusied(m);
+
+ for (;;) {
+ x = m->busy_lock;
+ if (VPB_SHARERS(x) > 1) {
+ if (atomic_cmpset_int(&m->busy_lock, x,
+ x - VPB_ONE_SHARER))
+ break;
+ continue;
+ }
+ if ((x & VPB_BIT_WAITERS) == 0) {
+ KASSERT(x == VPB_SHARERS_WORD(1),
+ ("vm_page_sunbusy: invalid lock state"));
+ if (atomic_cmpset_int(&m->busy_lock,
+ VPB_SHARERS_WORD(1), VPB_UNBUSIED))
+ break;
+ continue;
+ }
+ KASSERT(x == (VPB_SHARERS_WORD(1) | VPB_BIT_WAITERS),
+ ("vm_page_sunbusy: invalid lock state for waiters"));
+
+ vm_page_lock(m);
+ if (!atomic_cmpset_int(&m->busy_lock, x, VPB_UNBUSIED)) {
+ vm_page_unlock(m);
+ continue;
+ }
wakeup(m);
+ vm_page_unlock(m);
+ break;
}
}
/*
- * vm_page_wakeup:
+ * vm_page_busy_sleep:
*
- * clear the VPO_BUSY flag and wakeup anyone waiting for the
- * page.
+ * Sleep and release the page lock, using the page pointer as wchan.
+ * This is used to implement the hard-path of busying mechanism.
*
+ * The given page must be locked.
*/
void
-vm_page_wakeup(vm_page_t m)
+vm_page_busy_sleep(vm_page_t m, const char *wmesg)
{
+ u_int x;
- VM_OBJECT_ASSERT_WLOCKED(m->object);
- KASSERT(m->oflags & VPO_BUSY, ("vm_page_wakeup: page not busy!!!"));
- m->oflags &= ~VPO_BUSY;
- vm_page_flash(m);
+ vm_page_lock_assert(m, MA_OWNED);
+
+ x = m->busy_lock;
+ if (x == VPB_UNBUSIED) {
+ vm_page_unlock(m);
+ return;
+ }
+ if ((x & VPB_BIT_WAITERS) == 0 &&
+ !atomic_cmpset_int(&m->busy_lock, x, x | VPB_BIT_WAITERS)) {
+ vm_page_unlock(m);
+ return;
+ }
+ msleep(m, vm_page_lockptr(m), PVM | PDROP, wmesg, 0);
+}
+
+/*
+ * vm_page_trysbusy:
+ *
+ * Try to shared busy a page.
+ * If the operation succeeds 1 is returned otherwise 0.
+ * The operation never sleeps.
+ */
+int
+vm_page_trysbusy(vm_page_t m)
+{
+ u_int x;
+
+ x = m->busy_lock;
+ return ((x & VPB_BIT_SHARED) != 0 &&
+ atomic_cmpset_acq_int(&m->busy_lock, x, x + VPB_ONE_SHARER));
}
+/*
+ * vm_page_xunbusy_hard:
+ *
+ * Called after the first try the exclusive unbusy of a page failed.
+ * It is assumed that the waiters bit is on.
+ */
void
-vm_page_io_start(vm_page_t m)
+vm_page_xunbusy_hard(vm_page_t m)
{
- VM_OBJECT_ASSERT_WLOCKED(m->object);
- m->busy++;
+ vm_page_assert_xbusied(m);
+
+ vm_page_lock(m);
+ atomic_store_rel_int(&m->busy_lock, VPB_UNBUSIED);
+ wakeup(m);
+ vm_page_unlock(m);
}
+/*
+ * vm_page_flash:
+ *
+ * Wakeup anyone waiting for the page.
+ * The ownership bits do not change.
+ *
+ * The given page must be locked.
+ */
void
-vm_page_io_finish(vm_page_t m)
+vm_page_flash(vm_page_t m)
{
+ u_int x;
- VM_OBJECT_ASSERT_WLOCKED(m->object);
- KASSERT(m->busy > 0, ("vm_page_io_finish: page %p is not busy", m));
- m->busy--;
- if (m->busy == 0)
- vm_page_flash(m);
+ vm_page_lock_assert(m, MA_OWNED);
+
+ for (;;) {
+ x = m->busy_lock;
+ if ((x & VPB_BIT_WAITERS) == 0)
+ return;
+ if (atomic_cmpset_int(&m->busy_lock, x,
+ x & (~VPB_BIT_WAITERS)))
+ break;
+ }
+ wakeup(m);
}
/*
@@ -657,7 +761,8 @@ vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr)
/* Fictitious pages don't use "segind". */
m->flags = PG_FICTITIOUS;
/* Fictitious pages don't use "order" or "pool". */
- m->oflags = VPO_BUSY | VPO_UNMANAGED;
+ m->oflags = VPO_UNMANAGED;
+ m->busy_lock = VPB_SINGLE_EXCLUSIVER;
m->wire_count = 1;
pmap_page_init(m);
memattr:
@@ -737,16 +842,13 @@ vm_page_readahead_finish(vm_page_t m)
* deactivating the page is usually the best choice,
* unless the page is wanted by another thread.
*/
- if (m->oflags & VPO_WANTED) {
- vm_page_lock(m);
+ vm_page_lock(m);
+ if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
vm_page_activate(m);
- vm_page_unlock(m);
- } else {
- vm_page_lock(m);
+ else
vm_page_deactivate(m);
- vm_page_unlock(m);
- }
- vm_page_wakeup(m);
+ vm_page_unlock(m);
+ vm_page_xunbusy(m);
} else {
/*
* Free the completely invalid page. Such page state
@@ -761,29 +863,38 @@ vm_page_readahead_finish(vm_page_t m)
}
/*
- * vm_page_sleep:
+ * vm_page_sleep_if_busy:
*
- * Sleep and release the page lock.
+ * Sleep and release the page queues lock if the page is busied.
+ * Returns TRUE if the thread slept.
*
- * The object containing the given page must be locked.
+ * The given page must be unlocked and object containing it must
+ * be locked.
*/
-void
-vm_page_sleep(vm_page_t m, const char *msg)
+int
+vm_page_sleep_if_busy(vm_page_t m, const char *msg)
{
+ vm_object_t obj;
+ vm_page_lock_assert(m, MA_NOTOWNED);
VM_OBJECT_ASSERT_WLOCKED(m->object);
- if (mtx_owned(vm_page_lockptr(m)))
- vm_page_unlock(m);
- /*
- * It's possible that while we sleep, the page will get
- * unbusied and freed. If we are holding the object
- * lock, we will assume we hold a reference to the object
- * such that even if m->object changes, we can re-lock
- * it.
- */
- m->oflags |= VPO_WANTED;
- VM_OBJECT_SLEEP(m->object, m, PVM, msg, 0);
+ if (vm_page_busied(m)) {
+ /*
+ * The page-specific object must be cached because page
+ * identity can change during the sleep, causing the
+ * re-lock of a different object.
+ * It is assumed that a reference to the object is already
+ * held by the callers.
+ */
+ obj = m->object;
+ vm_page_lock(m);
+ VM_OBJECT_WUNLOCK(obj);
+ vm_page_busy_sleep(m, msg);
+ VM_OBJECT_WLOCK(obj);
+ return (TRUE);
+ }
+ return (FALSE);
}
/*
@@ -908,15 +1019,24 @@ void
vm_page_remove(vm_page_t m)
{
vm_object_t object;
+ boolean_t lockacq;
if ((m->oflags & VPO_UNMANAGED) == 0)
vm_page_lock_assert(m, MA_OWNED);
if ((object = m->object) == NULL)
return;
VM_OBJECT_ASSERT_WLOCKED(object);
- if (m->oflags & VPO_BUSY) {
- m->oflags &= ~VPO_BUSY;
+ if (vm_page_xbusied(m)) {
+ lockacq = FALSE;
+ if ((m->oflags & VPO_UNMANAGED) != 0 &&
+ !mtx_owned(vm_page_lockptr(m))) {
+ lockacq = TRUE;
+ vm_page_lock(m);
+ }
vm_page_flash(m);
+ atomic_store_rel_int(&m->busy_lock, VPB_UNBUSIED);
+ if (lockacq)
+ vm_page_unlock(m);
}
/*
@@ -1185,8 +1305,7 @@ vm_page_is_cached(vm_object_t object, vm_pindex_t pindex)
* vm_page_alloc:
*
* Allocate and return a page that is associated with the specified
- * object and offset pair. By default, this page has the flag VPO_BUSY
- * set.
+ * object and offset pair. By default, this page is exclusive busied.
*
* The caller must always specify an allocation class.
*
@@ -1201,10 +1320,11 @@ vm_page_is_cached(vm_object_t object, vm_pindex_t pindex)
* VM_ALLOC_IFCACHED return page only if it is cached
* VM_ALLOC_IFNOTCACHED return NULL, do not reactivate if the page
* is cached
- * VM_ALLOC_NOBUSY do not set the flag VPO_BUSY on the page
+ * VM_ALLOC_NOBUSY do not exclusive busy the page
* VM_ALLOC_NODUMP do not include the page in a kernel core dump
* VM_ALLOC_NOOBJ page is not associated with an object and
- * should not have the flag VPO_BUSY set
+ * should not be exclusive busy
+ * VM_ALLOC_SBUSY shared busy the allocated page
* VM_ALLOC_WIRED wire the allocated page
* VM_ALLOC_ZERO prefer a zeroed page
*
@@ -1219,8 +1339,12 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
int flags, req_class;
mpred = 0; /* XXX: pacify gcc */
- KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0),
- ("vm_page_alloc: inconsistent object/req"));
+ KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) &&
+ (object != NULL || (req & VM_ALLOC_SBUSY) == 0) &&
+ ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) !=
+ (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)),
+ ("vm_page_alloc: inconsistent object(%p)/req(%x)", (void *)object,
+ req));
if (object != NULL)
VM_OBJECT_ASSERT_WLOCKED(object);
@@ -1301,7 +1425,8 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
("vm_page_alloc: page %p has unexpected queue %d", m, m->queue));
KASSERT(m->wire_count == 0, ("vm_page_alloc: page %p is wired", m));
KASSERT(m->hold_count == 0, ("vm_page_alloc: page %p is held", m));
- KASSERT(m->busy == 0, ("vm_page_alloc: page %p is busy", m));
+ KASSERT(!vm_page_sbusied(m),
+ ("vm_page_alloc: page %p is busy", m));
KASSERT(m->dirty == 0, ("vm_page_alloc: page %p is dirty", m));
KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT,
("vm_page_alloc: page %p has unexpected memattr %d", m,
@@ -1345,8 +1470,11 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
m->aflags = 0;
m->oflags = object == NULL || (object->flags & OBJ_UNMANAGED) != 0 ?
VPO_UNMANAGED : 0;
- if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_NOOBJ)) == 0)
- m->oflags |= VPO_BUSY;
+ m->busy_lock = VPB_UNBUSIED;
+ if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_NOOBJ | VM_ALLOC_SBUSY)) == 0)
+ m->busy_lock = VPB_SINGLE_EXCLUSIVER;
+ if ((req & VM_ALLOC_SBUSY) != 0)
+ m->busy_lock = VPB_SHARERS_WORD(1);
if (req & VM_ALLOC_WIRED) {
/*
* The page lock is not required for wiring a page until that
@@ -1414,9 +1542,10 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
* VM_ALLOC_INTERRUPT interrupt time request
*
* optional allocation flags:
- * VM_ALLOC_NOBUSY do not set the flag VPO_BUSY on the page
+ * VM_ALLOC_NOBUSY do not exclusive busy the page
* VM_ALLOC_NOOBJ page is not associated with an object and
- * should not have the flag VPO_BUSY set
+ * should not be exclusive busy
+ * VM_ALLOC_SBUSY shared busy the allocated page
* VM_ALLOC_WIRED wire the allocated page
* VM_ALLOC_ZERO prefer a zeroed page
*
@@ -1432,8 +1561,12 @@ vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,
u_int flags, oflags;
int req_class;
- KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0),
- ("vm_page_alloc_contig: inconsistent object/req"));
+ KASSERT((object != NULL) == ((req & VM_ALLOC_NOOBJ) == 0) &&
+ (object != NULL || (req & VM_ALLOC_SBUSY) == 0) &&
+ ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) !=
+ (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)),
+ ("vm_page_alloc: inconsistent object(%p)/req(%x)", (void *)object,
+ req));
if (object != NULL) {
VM_OBJECT_ASSERT_WLOCKED(object);
KASSERT(object->type == OBJT_PHYS,
@@ -1509,8 +1642,6 @@ retry:
atomic_add_int(&cnt.v_wire_count, npages);
oflags = VPO_UNMANAGED;
if (object != NULL) {
- if ((req & VM_ALLOC_NOBUSY) == 0)
- oflags |= VPO_BUSY;
if (object->memattr != VM_MEMATTR_DEFAULT &&
memattr == VM_MEMATTR_DEFAULT)
memattr = object->memattr;
@@ -1518,6 +1649,13 @@ retry:
for (m = m_ret; m < &m_ret[npages]; m++) {
m->aflags = 0;
m->flags = (m->flags | PG_NODUMP) & flags;
+ m->busy_lock = VPB_UNBUSIED;
+ if (object != NULL) {
+ if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0)
+ m->busy_lock = VPB_SINGLE_EXCLUSIVER;
+ if ((req & VM_ALLOC_SBUSY) != 0)
+ m->busy_lock = VPB_SHARERS_WORD(1);
+ }
if ((req & VM_ALLOC_WIRED) != 0)
m->wire_count = 1;
/* Unmanaged pages don't use "act_count". */
@@ -1560,7 +1698,7 @@ vm_page_alloc_init(vm_page_t m)
("vm_page_alloc_init: page %p is wired", m));
KASSERT(m->hold_count == 0,
("vm_page_alloc_init: page %p is held", m));
- KASSERT(m->busy == 0,
+ KASSERT(!vm_page_sbusied(m),
("vm_page_alloc_init: page %p is busy", m));
KASSERT(m->dirty == 0,
("vm_page_alloc_init: page %p is dirty", m));
@@ -1926,7 +2064,7 @@ vm_page_free_toq(vm_page_t m)
if (VM_PAGE_IS_FREE(m))
panic("vm_page_free: freeing free page %p", m);
- else if (m->busy != 0)
+ else if (vm_page_sbusied(m))
panic("vm_page_free: freeing busy page %p", m);
/*
@@ -2137,8 +2275,8 @@ vm_page_try_to_cache(vm_page_t m)
vm_page_lock_assert(m, MA_OWNED);
VM_OBJECT_ASSERT_WLOCKED(m->object);
- if (m->dirty || m->hold_count || m->busy || m->wire_count ||
- (m->oflags & (VPO_BUSY | VPO_UNMANAGED)) != 0)
+ if (m->dirty || m->hold_count || m->wire_count ||
+ (m->oflags & VPO_UNMANAGED) != 0 || vm_page_busied(m))
return (0);
pmap_remove_all(m);
if (m->dirty)
@@ -2160,8 +2298,8 @@ vm_page_try_to_free(vm_page_t m)
vm_page_lock_assert(m, MA_OWNED);
if (m->object != NULL)
VM_OBJECT_ASSERT_WLOCKED(m->object);
- if (m->dirty || m->hold_count || m->busy || m->wire_count ||
- (m->oflags & (VPO_BUSY | VPO_UNMANAGED)) != 0)
+ if (m->dirty || m->hold_count || m->wire_count ||
+ (m->oflags & VPO_UNMANAGED) != 0 || vm_page_busied(m))
return (0);
pmap_remove_all(m);
if (m->dirty)
@@ -2186,7 +2324,7 @@ vm_page_cache(vm_page_t m)
vm_page_lock_assert(m, MA_OWNED);
object = m->object;
VM_OBJECT_ASSERT_WLOCKED(object);
- if ((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) || m->busy ||
+ if (vm_page_busied(m) || (m->oflags & VPO_UNMANAGED) ||
m->hold_count || m->wire_count)
panic("vm_page_cache: attempting to cache busy page");
KASSERT(!pmap_page_is_mapped(m),
@@ -2372,21 +2510,29 @@ vm_page_t
vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags)
{
vm_page_t m;
+ int sleep;
VM_OBJECT_ASSERT_WLOCKED(object);
KASSERT((allocflags & VM_ALLOC_RETRY) != 0,
("vm_page_grab: VM_ALLOC_RETRY is required"));
+ KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 ||
+ (allocflags & VM_ALLOC_IGN_SBUSY) != 0,
+ ("vm_page_grab: VM_ALLOC_SBUSY/VM_ALLOC_IGN_SBUSY mismatch"));
retrylookup:
if ((m = vm_page_lookup(object, pindex)) != NULL) {
- if ((m->oflags & VPO_BUSY) != 0 ||
- ((allocflags & VM_ALLOC_IGN_SBUSY) == 0 && m->busy != 0)) {
+ sleep = (allocflags & VM_ALLOC_IGN_SBUSY) != 0 ?
+ vm_page_xbusied(m) : vm_page_busied(m);
+ if (sleep) {
/*
* Reference the page before unlocking and
* sleeping so that the page daemon is less
* likely to reclaim it.
*/
vm_page_aflag_set(m, PGA_REFERENCED);
- vm_page_sleep(m, "pgrbwt");
+ vm_page_lock(m);
+ VM_OBJECT_WUNLOCK(object);
+ vm_page_busy_sleep(m, "pgrbwt");
+ VM_OBJECT_WLOCK(object);
goto retrylookup;
} else {
if ((allocflags & VM_ALLOC_WIRED) != 0) {
@@ -2394,8 +2540,11 @@ retrylookup:
vm_page_wire(m);
vm_page_unlock(m);
}
- if ((allocflags & VM_ALLOC_NOBUSY) == 0)
- vm_page_busy(m);
+ if ((allocflags &
+ (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0)
+ vm_page_xbusy(m);
+ if ((allocflags & VM_ALLOC_SBUSY) != 0)
+ vm_page_sbusy(m);
return (m);
}
}
@@ -2503,12 +2652,12 @@ vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits)
#endif
/*
- * If the object is locked and the page is neither VPO_BUSY nor
+ * If the object is locked and the page is neither exclusive busy nor
* write mapped, then the page's dirty field cannot possibly be
* set by a concurrent pmap operation.
*/
VM_OBJECT_ASSERT_WLOCKED(m->object);
- if ((m->oflags & VPO_BUSY) == 0 && !pmap_page_is_write_mapped(m))
+ if (!vm_page_xbusied(m) && !pmap_page_is_write_mapped(m))
m->dirty &= ~pagebits;
else {
/*
@@ -2717,7 +2866,7 @@ vm_page_is_valid(vm_page_t m, int base, int size)
{
vm_page_bits_t bits;
- VM_OBJECT_ASSERT_WLOCKED(m->object);
+ VM_OBJECT_ASSERT_LOCKED(m->object);
bits = vm_page_bits(base, size);
return (m->valid != 0 && (m->valid & bits) == bits);
}
@@ -2877,12 +3026,11 @@ vm_page_object_lock_assert(vm_page_t m)
/*
* Certain of the page's fields may only be modified by the
- * holder of the containing object's lock or the setter of the
- * page's VPO_BUSY flag. Unfortunately, the setter of the
- * VPO_BUSY flag is not recorded, and thus cannot be checked
- * here.
+ * holder of the containing object's lock or the exclusive busy.
+ * holder. Unfortunately, the holder of the write busy is
+ * not recorded, and thus cannot be checked here.
*/
- if (m->object != NULL && (m->oflags & VPO_BUSY) == 0)
+ if (m->object != NULL && !vm_page_xbusied(m))
VM_OBJECT_ASSERT_WLOCKED(m->object);
}
#endif
@@ -2942,9 +3090,9 @@ DB_SHOW_COMMAND(pginfo, vm_page_print_pginfo)
m = (vm_page_t)addr;
db_printf(
"page %p obj %p pidx 0x%jx phys 0x%jx q %d hold %d wire %d\n"
- " af 0x%x of 0x%x f 0x%x act %d busy %d valid 0x%x dirty 0x%x\n",
+ " af 0x%x of 0x%x f 0x%x act %d busy %x valid 0x%x dirty 0x%x\n",
m, m->object, (uintmax_t)m->pindex, (uintmax_t)m->phys_addr,
m->queue, m->hold_count, m->wire_count, m->aflags, m->oflags,
- m->flags, m->act_count, m->busy, m->valid, m->dirty);
+ m->flags, m->act_count, m->busy_lock, m->valid, m->dirty);
}
#endif /* DDB */
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index d8de0de..53cf449 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -144,11 +144,12 @@ struct vm_page {
uint8_t oflags; /* page VPO_* flags (O) */
uint16_t flags; /* page PG_* flags (P) */
u_char act_count; /* page usage count (P) */
- u_char busy; /* page busy count (O) */
+ u_char __pad0; /* unused padding */
/* NOTE that these must support one bit per DEV_BSIZE in a page!!! */
/* so, on normal X86 kernels, they must be at least 8 bits wide */
vm_page_bits_t valid; /* map of valid DEV_BSIZE chunks (O) */
vm_page_bits_t dirty; /* map of dirty DEV_BSIZE chunks (M) */
+ volatile u_int busy_lock; /* busy owners lock */
};
/*
@@ -165,12 +166,35 @@ struct vm_page {
* mappings, and such pages are also not on any PQ queue.
*
*/
-#define VPO_BUSY 0x01 /* page is in transit */
-#define VPO_WANTED 0x02 /* someone is waiting for page */
+#define VPO_UNUSED01 0x01 /* --available-- */
+#define VPO_SWAPSLEEP 0x02 /* waiting for swap to finish */
#define VPO_UNMANAGED 0x04 /* no PV management for page */
#define VPO_SWAPINPROG 0x08 /* swap I/O in progress on page */
#define VPO_NOSYNC 0x10 /* do not collect for syncer */
+/*
+ * Busy page implementation details.
+ * The algorithm is taken mostly by rwlock(9) and sx(9) locks implementation,
+ * even if the support for owner identity is removed because of size
+ * constraints. Checks on lock recursion are then not possible, while the
+ * lock assertions effectiveness is someway reduced.
+ */
+#define VPB_BIT_SHARED 0x01
+#define VPB_BIT_EXCLUSIVE 0x02
+#define VPB_BIT_WAITERS 0x04
+#define VPB_BIT_FLAGMASK \
+ (VPB_BIT_SHARED | VPB_BIT_EXCLUSIVE | VPB_BIT_WAITERS)
+
+#define VPB_SHARERS_SHIFT 3
+#define VPB_SHARERS(x) \
+ (((x) & ~VPB_BIT_FLAGMASK) >> VPB_SHARERS_SHIFT)
+#define VPB_SHARERS_WORD(x) ((x) << VPB_SHARERS_SHIFT | VPB_BIT_SHARED)
+#define VPB_ONE_SHARER (1 << VPB_SHARERS_SHIFT)
+
+#define VPB_SINGLE_EXCLUSIVER VPB_BIT_EXCLUSIVE
+
+#define VPB_UNBUSIED VPB_SHARERS_WORD(0)
+
#define PQ_NONE 255
#define PQ_INACTIVE 0
#define PQ_ACTIVE 1
@@ -274,8 +298,9 @@ extern struct mtx_padalign pa_lock[];
* directly set this flag. They should call vm_page_reference() instead.
*
* PGA_WRITEABLE is set exclusively on managed pages by pmap_enter(). When it
- * does so, the page must be VPO_BUSY. The MI VM layer must never access this
- * flag directly. Instead, it should call pmap_page_is_write_mapped().
+ * does so, the page must be exclusive busied. The MI VM layer must never
+ * access this flag directly. Instead, it should call
+ * pmap_page_is_write_mapped().
*
* PGA_EXECUTABLE may be set by pmap routines, and indicates that a page has
* at least one executable mapping. It is not consumed by the MI VM layer.
@@ -362,6 +387,7 @@ vm_page_t PHYS_TO_VM_PAGE(vm_paddr_t pa);
#define VM_ALLOC_IFNOTCACHED 0x0800 /* Fail if the page is cached */
#define VM_ALLOC_IGN_SBUSY 0x1000 /* vm_page_grab() only */
#define VM_ALLOC_NODUMP 0x2000 /* don't include in dump */
+#define VM_ALLOC_SBUSY 0x4000 /* Shared busy the page */
#define VM_ALLOC_COUNT_SHIFT 16
#define VM_ALLOC_COUNT(count) ((count) << VM_ALLOC_COUNT_SHIFT)
@@ -385,15 +411,13 @@ malloc2vm_flags(int malloc_flags)
}
#endif
-void vm_page_busy(vm_page_t m);
+void vm_page_busy_downgrade(vm_page_t m);
+void vm_page_busy_sleep(vm_page_t m, const char *msg);
void vm_page_flash(vm_page_t m);
-void vm_page_io_start(vm_page_t m);
-void vm_page_io_finish(vm_page_t m);
void vm_page_hold(vm_page_t mem);
void vm_page_unhold(vm_page_t mem);
void vm_page_free(vm_page_t m);
void vm_page_free_zero(vm_page_t m);
-void vm_page_wakeup(vm_page_t m);
void vm_page_activate (vm_page_t);
void vm_page_advise(vm_page_t m, int advice);
@@ -428,13 +452,17 @@ void vm_page_remove (vm_page_t);
void vm_page_rename (vm_page_t, vm_object_t, vm_pindex_t);
void vm_page_requeue(vm_page_t m);
void vm_page_requeue_locked(vm_page_t m);
+int vm_page_sbusied(vm_page_t m);
void vm_page_set_valid_range(vm_page_t m, int base, int size);
-void vm_page_sleep(vm_page_t m, const char *msg);
+int vm_page_sleep_if_busy(vm_page_t m, const char *msg);
vm_offset_t vm_page_startup(vm_offset_t vaddr);
+void vm_page_sunbusy(vm_page_t m);
+int vm_page_trysbusy(vm_page_t m);
void vm_page_unhold_pages(vm_page_t *ma, int count);
void vm_page_unwire (vm_page_t, int);
void vm_page_updatefake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr);
void vm_page_wire (vm_page_t);
+void vm_page_xunbusy_hard(vm_page_t m);
void vm_page_set_validclean (vm_page_t, int, int);
void vm_page_clear_dirty (vm_page_t, int, int);
void vm_page_set_invalid (vm_page_t, int, int);
@@ -457,6 +485,48 @@ void vm_page_assert_locked_KBI(vm_page_t m, const char *file, int line);
void vm_page_lock_assert_KBI(vm_page_t m, int a, const char *file, int line);
#endif
+#define vm_page_assert_sbusied(m) \
+ KASSERT(vm_page_sbusied(m), \
+ ("vm_page_assert_sbusied: page %p not shared busy @ %s:%d", \
+ (void *)m, __FILE__, __LINE__));
+
+#define vm_page_assert_unbusied(m) \
+ KASSERT(!vm_page_busied(m), \
+ ("vm_page_assert_unbusied: page %p busy @ %s:%d", \
+ (void *)m, __FILE__, __LINE__));
+
+#define vm_page_assert_xbusied(m) \
+ KASSERT(vm_page_xbusied(m), \
+ ("vm_page_assert_xbusied: page %p not exclusive busy @ %s:%d", \
+ (void *)m, __FILE__, __LINE__));
+
+#define vm_page_busied(m) \
+ ((m)->busy_lock != VPB_UNBUSIED)
+
+#define vm_page_sbusy(m) do { \
+ if (!vm_page_trysbusy(m)) \
+ panic("%s: page %p failed shared busing", __func__, m); \
+} while (0)
+
+#define vm_page_tryxbusy(m) \
+ (atomic_cmpset_acq_int(&m->busy_lock, VPB_UNBUSIED, \
+ VPB_SINGLE_EXCLUSIVER))
+
+#define vm_page_xbusied(m) \
+ ((m->busy_lock & VPB_SINGLE_EXCLUSIVER) != 0)
+
+#define vm_page_xbusy(m) do { \
+ if (!vm_page_tryxbusy(m)) \
+ panic("%s: page %p failed exclusive busing", __func__, \
+ m); \
+} while (0)
+
+#define vm_page_xunbusy(m) do { \
+ if (!atomic_cmpset_rel_int(&(m)->busy_lock, \
+ VPB_SINGLE_EXCLUSIVER, VPB_UNBUSIED)) \
+ vm_page_xunbusy_hard(m); \
+} while (0)
+
#ifdef INVARIANTS
void vm_page_object_lock_assert(vm_page_t m);
#define VM_PAGE_OBJECT_LOCK_ASSERT(m) vm_page_object_lock_assert(m)
@@ -511,11 +581,11 @@ vm_page_aflag_set(vm_page_t m, uint8_t bits)
/*
* The PGA_WRITEABLE flag can only be set if the page is managed and
- * VPO_BUSY. Currently, this flag is only set by pmap_enter().
+ * exclusive busied. Currently, this flag is only set by pmap_enter().
*/
KASSERT((bits & PGA_WRITEABLE) == 0 ||
- (m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == VPO_BUSY,
- ("vm_page_aflag_set: PGA_WRITEABLE and !VPO_BUSY"));
+ ((m->oflags & VPO_UNMANAGED) == 0 && vm_page_xbusied(m)),
+ ("vm_page_aflag_set: PGA_WRITEABLE and not exclusive busy"));
/*
* Access the whole 32-bit word containing the aflags field with an
@@ -571,27 +641,6 @@ vm_page_remque(vm_page_t m)
}
/*
- * vm_page_sleep_if_busy:
- *
- * Sleep and release the page queues lock if VPO_BUSY is set or,
- * if also_m_busy is TRUE, busy is non-zero. Returns TRUE if the
- * thread slept and the page queues lock was released.
- * Otherwise, retains the page queues lock and returns FALSE.
- *
- * The object containing the given page must be locked.
- */
-static __inline int
-vm_page_sleep_if_busy(vm_page_t m, int also_m_busy, const char *msg)
-{
-
- if ((m->oflags & VPO_BUSY) || (also_m_busy && m->busy)) {
- vm_page_sleep(m, msg);
- return (TRUE);
- }
- return (FALSE);
-}
-
-/*
* vm_page_undirty:
*
* Set page to not be dirty. Note: does not clear pmap modify bits
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index 5bdc464..f801603 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -232,8 +232,8 @@ static void vm_pageout_page_stats(struct vm_domain *vmd);
/*
* Initialize a dummy page for marking the caller's place in the specified
* paging queue. In principle, this function only needs to set the flag
- * PG_MARKER. Nonetheless, it sets the flag VPO_BUSY and initializes the hold
- * count to one as safety precautions.
+ * PG_MARKER. Nonetheless, it wirte busies and initializes the hold count
+ * to one as safety precautions.
*/
static void
vm_pageout_init_marker(vm_page_t marker, u_short queue)
@@ -241,7 +241,7 @@ vm_pageout_init_marker(vm_page_t marker, u_short queue)
bzero(marker, sizeof(*marker));
marker->flags = PG_MARKER;
- marker->oflags = VPO_BUSY;
+ marker->busy_lock = VPB_SINGLE_EXCLUSIVER;
marker->queue = queue;
marker->hold_count = 1;
}
@@ -361,8 +361,7 @@ vm_pageout_clean(vm_page_t m)
/*
* Can't clean the page if it's busy or held.
*/
- KASSERT(m->busy == 0 && (m->oflags & VPO_BUSY) == 0,
- ("vm_pageout_clean: page %p is busy", m));
+ vm_page_assert_unbusied(m);
KASSERT(m->hold_count == 0, ("vm_pageout_clean: page %p is held", m));
vm_page_unlock(m);
@@ -400,8 +399,7 @@ more:
break;
}
- if ((p = vm_page_prev(pb)) == NULL ||
- (p->oflags & VPO_BUSY) != 0 || p->busy != 0) {
+ if ((p = vm_page_prev(pb)) == NULL || vm_page_busied(p)) {
ib = 0;
break;
}
@@ -430,8 +428,7 @@ more:
pindex + is < object->size) {
vm_page_t p;
- if ((p = vm_page_next(ps)) == NULL ||
- (p->oflags & VPO_BUSY) != 0 || p->busy != 0)
+ if ((p = vm_page_next(ps)) == NULL || vm_page_busied(p))
break;
vm_page_lock(p);
vm_page_test_dirty(p);
@@ -501,7 +498,7 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags, int mreq, int *prunlen,
KASSERT(mc[i]->valid == VM_PAGE_BITS_ALL,
("vm_pageout_flush: partially invalid page %p index %d/%d",
mc[i], i, count));
- vm_page_io_start(mc[i]);
+ vm_page_sbusy(mc[i]);
pmap_remove_write(mc[i]);
}
vm_object_pip_add(object, count);
@@ -557,7 +554,7 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags, int mreq, int *prunlen,
*/
if (pageout_status[i] != VM_PAGER_PEND) {
vm_object_pip_wakeup(object);
- vm_page_io_finish(mt);
+ vm_page_sunbusy(mt);
if (vm_page_count_severe()) {
vm_page_lock(mt);
vm_page_try_to_cache(mt);
@@ -594,8 +591,7 @@ vm_pageout_launder(struct vm_pagequeue *pq, int tries, vm_paddr_t low,
object = m->object;
if ((!VM_OBJECT_TRYWLOCK(object) &&
(!vm_pageout_fallback_object_lock(m, &next) ||
- m->hold_count != 0)) || (m->oflags & VPO_BUSY) != 0 ||
- m->busy != 0) {
+ m->hold_count != 0)) || vm_page_busied(m)) {
vm_page_unlock(m);
VM_OBJECT_WUNLOCK(object);
continue;
@@ -767,7 +763,7 @@ vm_pageout_object_deactivate_pages(pmap_t pmap, vm_object_t first_object,
TAILQ_FOREACH(p, &object->memq, listq) {
if (pmap_resident_count(pmap) <= desired)
goto unlock_return;
- if ((p->oflags & VPO_BUSY) != 0 || p->busy != 0)
+ if (vm_page_busied(p))
continue;
PCPU_INC(cnt.v_pdpages);
vm_page_lock(p);
@@ -1005,7 +1001,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
* pages, because they may leave the inactive queue
* shortly after page scan is finished.
*/
- if (m->busy != 0 || (m->oflags & VPO_BUSY) != 0) {
+ if (vm_page_busied(m)) {
vm_page_unlock(m);
VM_OBJECT_WUNLOCK(object);
addl_page_shortage++;
@@ -1224,7 +1220,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
* page back onto the end of the queue so that
* statistics are more correct if we don't.
*/
- if (m->busy || (m->oflags & VPO_BUSY)) {
+ if (vm_page_busied(m)) {
vm_page_unlock(m);
goto unlock_and_continue;
}
@@ -1334,9 +1330,7 @@ relock_queues:
/*
* Don't deactivate pages that are busy.
*/
- if ((m->busy != 0) ||
- (m->oflags & VPO_BUSY) ||
- (m->hold_count != 0)) {
+ if (vm_page_busied(m) || m->hold_count != 0) {
vm_page_unlock(m);
VM_OBJECT_WUNLOCK(object);
vm_page_requeue_locked(m);
@@ -1641,9 +1635,7 @@ vm_pageout_page_stats(struct vm_domain *vmd)
/*
* Don't deactivate pages that are busy or held.
*/
- if (m->busy != 0 ||
- (m->oflags & VPO_BUSY) != 0 ||
- m->hold_count != 0) {
+ if (vm_page_busied(m) || m->hold_count != 0) {
vm_page_unlock(m);
VM_OBJECT_WUNLOCK(object);
vm_page_requeue_locked(m);
diff --git a/sys/vm/vm_phys.c b/sys/vm/vm_phys.c
index 1fa223b..4c988db 100644
--- a/sys/vm/vm_phys.c
+++ b/sys/vm/vm_phys.c
@@ -568,7 +568,8 @@ vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end,
}
for (i = 0; i < page_count; i++) {
vm_page_initfake(&fp[i], start + PAGE_SIZE * i, memattr);
- fp[i].oflags &= ~(VPO_BUSY | VPO_UNMANAGED);
+ fp[i].oflags &= ~VPO_UNMANAGED;
+ fp[i].busy_lock = VPB_UNBUSIED;
}
mtx_lock(&vm_phys_fictitious_reg_mtx);
for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) {
diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c
index 4c7da16..2e3d2c0 100644
--- a/sys/vm/vnode_pager.c
+++ b/sys/vm/vnode_pager.c
@@ -1135,8 +1135,7 @@ vnode_pager_generic_putpages(struct vnode *vp, vm_page_t *ma, int bytecount,
* pmap operation.
*/
m = ma[ncount - 1];
- KASSERT(m->busy > 0,
- ("vnode_pager_generic_putpages: page %p is not busy", m));
+ vm_page_assert_sbusied(m);
KASSERT(!pmap_page_is_write_mapped(m),
("vnode_pager_generic_putpages: page %p is not read-only", m));
vm_page_clear_dirty(m, pgoff, PAGE_SIZE -
OpenPOWER on IntegriCloud