summaryrefslogtreecommitdiffstats
path: root/sys/vm
diff options
context:
space:
mode:
authordas <das@FreeBSD.org>2004-11-05 05:36:56 +0000
committerdas <das@FreeBSD.org>2004-11-05 05:36:56 +0000
commit2beb616cedbf76aae405b2ab5ed44f18edd51ead (patch)
tree94c5d1589c4833b313bdb27920dd84d12e5f85b3 /sys/vm
parent343d17a8c0e316015939dafe222020c58a03302b (diff)
downloadFreeBSD-src-2beb616cedbf76aae405b2ab5ed44f18edd51ead.zip
FreeBSD-src-2beb616cedbf76aae405b2ab5ed44f18edd51ead.tar.gz
Close a race in swapoff(). Here are the gory details:
In order to avoid livelock, swapoff() skips over objects with a nonzero pip count and makes another pass if necessary. Since it is impossible to know which objects we care about, it would choose an arbitrary object with a nonzero pip count and wait for it before making another pass, the theory being that this object would finish paging about as quickly as the ones we care about. Unfortunately, we may have slept since we acquired a reference to this object. Hack around this problem by tsleep()ing on the pointer anyway, but timeout after a fixed interval. More elegant solutions are possible, but the ones I considered unnecessarily complicate this rare case. Also, kill some nits that seem to have crept into the swapoff() code in the last 75 revisions or so: - Don't pass both sp and sp->sw_used to swap_pager_swapoff(), since the latter can be derived from the former. - Replace swp_pager_find_dev() with something simpler. There's no need to iterate over the entire list of swap devices just to determine if a given block is assigned to the one we're interested in. - Expand the scope of the swhash_mtx in a couple of places so that it isn't released and reacquired once for every hash bucket. - Don't drop the swhash_mtx while holding a reference to an object. We need to lock the object first. Unfortunately, doing so would violate the established lock order, so use VM_OBJECT_TRYLOCK() and try again on a subsequent pass if the object is already locked. - Refactor swp_pager_force_pagein() and swap_pager_swapoff() a bit.
Diffstat (limited to 'sys/vm')
-rw-r--r--sys/vm/swap_pager.c123
1 files changed, 53 insertions, 70 deletions
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index b4c6a6a..da7f1c0 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -236,7 +236,7 @@ static boolean_t
swap_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, int *after);
static void swap_pager_init(void);
static void swap_pager_unswapped(vm_page_t);
-static void swap_pager_swapoff(struct swdevt *sp, int *sw_used);
+static void swap_pager_swapoff(struct swdevt *sp);
struct pagerops swappagerops = {
.pgo_init = swap_pager_init, /* early system initialization of pager */
@@ -609,19 +609,11 @@ done:
return (blk);
}
-static struct swdevt *
-swp_pager_find_dev(daddr_t blk)
+static int
+swp_pager_isondev(daddr_t blk, struct swdevt *sp)
{
- struct swdevt *sp;
- mtx_lock(&sw_dev_mtx);
- TAILQ_FOREACH(sp, &swtailq, sw_list) {
- if (blk >= sp->sw_first && blk < sp->sw_end) {
- mtx_unlock(&sw_dev_mtx);
- return (sp);
- }
- }
- panic("Swapdev not found");
+ return (blk >= sp->sw_first && blk < sp->sw_end);
}
static void
@@ -1597,27 +1589,24 @@ swap_pager_isswapped(vm_object_t object, struct swdevt *sp)
if (object->type != OBJT_SWAP)
return (0);
+ mtx_lock(&swhash_mtx);
for (bcount = 0; bcount < object->un_pager.swp.swp_bcount; bcount++) {
struct swblock *swap;
- mtx_lock(&swhash_mtx);
if ((swap = *swp_pager_hash(object, index)) != NULL) {
for (i = 0; i < SWAP_META_PAGES; ++i) {
- daddr_t v = swap->swb_pages[i];
- if (v == SWAPBLK_NONE)
- continue;
- if (swp_pager_find_dev(v) == sp) {
+ if (swp_pager_isondev(swap->swb_pages[i], sp)) {
mtx_unlock(&swhash_mtx);
- return 1;
+ return (1);
}
}
}
- mtx_unlock(&swhash_mtx);
index += SWAP_META_PAGES;
if (index > 0x20000000)
panic("swap_pager_isswapped: failed to locate all swap meta blocks");
}
- return 0;
+ mtx_unlock(&swhash_mtx);
+ return (0);
}
/*
@@ -1635,19 +1624,12 @@ swap_pager_isswapped(vm_object_t object, struct swdevt *sp)
* revert to the one-by-one behavior for now. Sigh.
*/
static __inline void
-swp_pager_force_pagein(struct swblock *swap, int idx)
+swp_pager_force_pagein(vm_object_t object, vm_pindex_t pindex)
{
- vm_object_t object;
vm_page_t m;
- vm_pindex_t pindex;
- object = swap->swb_object;
- pindex = swap->swb_index;
- mtx_unlock(&swhash_mtx);
-
- VM_OBJECT_LOCK(object);
vm_object_pip_add(object, 1);
- m = vm_page_grab(object, pindex + idx, VM_ALLOC_NORMAL|VM_ALLOC_RETRY);
+ m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL|VM_ALLOC_RETRY);
if (m->valid == VM_PAGE_BITS_ALL) {
vm_object_pip_subtract(object, 1);
vm_page_lock_queues();
@@ -1656,12 +1638,10 @@ swp_pager_force_pagein(struct swblock *swap, int idx)
vm_page_wakeup(m);
vm_page_unlock_queues();
vm_pager_page_unswapped(m);
- VM_OBJECT_UNLOCK(object);
return;
}
- if (swap_pager_getpages(object, &m, 1, 0) !=
- VM_PAGER_OK)
+ if (swap_pager_getpages(object, &m, 1, 0) != VM_PAGER_OK)
panic("swap_pager_force_pagein: read from swap failed");/*XXX*/
vm_object_pip_subtract(object, 1);
vm_page_lock_queues();
@@ -1670,10 +1650,8 @@ swp_pager_force_pagein(struct swblock *swap, int idx)
vm_page_wakeup(m);
vm_page_unlock_queues();
vm_pager_page_unswapped(m);
- VM_OBJECT_UNLOCK(object);
}
-
/*
* swap_pager_swapoff:
*
@@ -1682,60 +1660,65 @@ swp_pager_force_pagein(struct swblock *swap, int idx)
* marked as allocated and the device must be flagged SW_CLOSING.
* There may be no processes swapped out to the device.
*
- * The sw_used parameter points to the field in the swdev structure
- * that contains a count of the number of blocks still allocated
- * on the device. If we encounter objects with a nonzero pip count
- * in our scan, we use this number to determine if we're really done.
- *
* This routine may block.
*/
static void
-swap_pager_swapoff(struct swdevt *sp, int *sw_used)
+swap_pager_swapoff(struct swdevt *sp)
{
- struct swblock **pswap;
struct swblock *swap;
vm_object_t waitobj;
- daddr_t v;
int i, j;
GIANT_REQUIRED;
full_rescan:
waitobj = NULL;
+ mtx_lock(&swhash_mtx);
for (i = 0; i <= swhash_mask; i++) { /* '<=' is correct here */
restart:
- pswap = &swhash[i];
- mtx_lock(&swhash_mtx);
- while ((swap = *pswap) != NULL) {
+ for (swap = swhash[i]; swap != NULL; swap = swap->swb_hnext) {
+ vm_object_t object = swap->swb_object;
+ vm_pindex_t pindex = swap->swb_index;
for (j = 0; j < SWAP_META_PAGES; ++j) {
- v = swap->swb_pages[j];
- if (v != SWAPBLK_NONE &&
- swp_pager_find_dev(v) == sp)
- break;
+ if (swp_pager_isondev(swap->swb_pages[j], sp)) {
+ /* avoid deadlock */
+ if (!VM_OBJECT_TRYLOCK(object)) {
+ waitobj = object;
+ break;
+ } else {
+ mtx_unlock(&swhash_mtx);
+ swp_pager_force_pagein(object,
+ pindex + j);
+ VM_OBJECT_UNLOCK(object);
+ mtx_lock(&swhash_mtx);
+ goto restart;
+ }
+ }
}
- if (j < SWAP_META_PAGES) {
- swp_pager_force_pagein(swap, j);
- goto restart;
- } else if (swap->swb_object->paging_in_progress) {
- if (!waitobj)
- waitobj = swap->swb_object;
- }
- pswap = &swap->swb_hnext;
+ if (object->paging_in_progress)
+ waitobj = object;
}
- mtx_unlock(&swhash_mtx);
}
- if (waitobj && *sw_used) {
- /*
- * We wait on an arbitrary object to clock our rescans
- * to the rate of paging completion.
- */
- VM_OBJECT_LOCK(waitobj);
- vm_object_pip_wait(waitobj, "swpoff");
- VM_OBJECT_UNLOCK(waitobj);
- goto full_rescan;
+ mtx_unlock(&swhash_mtx);
+ if (waitobj && sp->sw_used) {
+ /*
+ * The most likely reason we will have to do another pass is
+ * that something is being paged out to the device being
+ * removed. This can't happen forever because new allocations
+ * will not be made on this device, but we still need to wait
+ * for the activity to finish. We have no way of knowing
+ * which objects we need to wait for, so we pick an arbitrary
+ * object that is paging and hope that it finishes paging at
+ * about the same time as one of the objects we care about.
+ *
+ * XXX Unfortunately, our waitobj reference might not be valid
+ * anymore, so we also retry after 50 ms.
+ */
+ tsleep(waitobj, PVM, "swpoff", hz / 20);
+ goto full_rescan;
}
- if (*sw_used)
- panic("swapoff: failed to locate %d swap blocks", *sw_used);
+ if (sp->sw_used)
+ panic("swapoff: failed to locate %d swap blocks", sp->sw_used);
}
/************************************************************************
@@ -2237,7 +2220,7 @@ found:
#ifndef NO_SWAPPING
vm_proc_swapin_all(sp);
#endif /* !NO_SWAPPING */
- swap_pager_swapoff(sp, &sp->sw_used);
+ swap_pager_swapoff(sp);
sp->sw_close(td, sp);
sp->sw_id = NULL;
OpenPOWER on IntegriCloud