diff options
author | dillon <dillon@FreeBSD.org> | 2002-12-15 19:17:57 +0000 |
---|---|---|
committer | dillon <dillon@FreeBSD.org> | 2002-12-15 19:17:57 +0000 |
commit | b43fb3e9200092f2885e909dc7ee85cb0871cfef (patch) | |
tree | fc6e3be9fa1b757f9ac0967a46494adcf0cc5682 /sys/vm | |
parent | 2925e70a14eb46bd10c8905fd619024bb19f7f9d (diff) | |
download | FreeBSD-src-b43fb3e9200092f2885e909dc7ee85cb0871cfef.zip FreeBSD-src-b43fb3e9200092f2885e909dc7ee85cb0871cfef.tar.gz |
This is David Schultz's swapoff code which I am finally able to commit.
This should be considered highly experimental for the moment.
Submitted by: David Schultz <dschultz@uclink.Berkeley.EDU>
MFC after: 3 weeks
Diffstat (limited to 'sys/vm')
-rw-r--r-- | sys/vm/swap_pager.c | 159 | ||||
-rw-r--r-- | sys/vm/swap_pager.h | 2 | ||||
-rw-r--r-- | sys/vm/vm_glue.c | 40 | ||||
-rw-r--r-- | sys/vm/vm_pageout.h | 6 | ||||
-rw-r--r-- | sys/vm/vm_swap.c | 140 |
5 files changed, 339 insertions, 8 deletions
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index de203e2..2f43bc4 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -206,6 +206,8 @@ static __inline daddr_t swp_pager_getswapspace(int npages); /* * Metadata functions */ +static __inline struct swblock ** + swp_pager_hash(vm_object_t object, vm_pindex_t index); static void swp_pager_meta_build(vm_object_t, vm_pindex_t, daddr_t); static void swp_pager_meta_free(vm_object_t, vm_pindex_t, daddr_t); static void swp_pager_meta_free_all(vm_object_t); @@ -512,12 +514,22 @@ swp_pager_freeswapspace(blk, npages) daddr_t blk; int npages; { + struct swdevt *sp = &swdevt[BLK2DEVIDX(blk)]; + GIANT_REQUIRED; + /* per-swap area stats */ + sp->sw_used -= npages; + + /* + * If we are attempting to stop swapping on this device, we + * don't want to mark any blocks free lest they be reused. + */ + if (sp->sw_flags & SW_CLOSING) + return; + blist_free(swapblist, blk, npages); vm_swap_size += npages; - /* per-swap area stats */ - swdevt[BLK2DEVIDX(blk)].sw_used -= npages; swp_sizecheck(); } @@ -1624,6 +1636,149 @@ swp_pager_async_iodone(bp) splx(s); } +/* + * swap_pager_isswapped: + * + * Return 1 if at least one page in the given object is paged + * out to the given swap device. + * + * This routine may not block. + */ +int swap_pager_isswapped(vm_object_t object, int devidx) { + daddr_t index = 0; + int bcount; + int i; + + for (bcount = 0; bcount < object->un_pager.swp.swp_bcount; bcount++) { + struct swblock *swap; + + if ((swap = *swp_pager_hash(object, index)) != NULL) { + for (i = 0; i < SWAP_META_PAGES; ++i) { + daddr_t v = swap->swb_pages[i]; + if (v != SWAPBLK_NONE && + BLK2DEVIDX(v) == devidx) + return 1; + } + } + + index += SWAP_META_PAGES; + if (index > 0x20000000) + panic("swap_pager_isswapped: failed to locate all swap meta blocks"); + } + return 0; +} + +/* + * SWP_PAGER_FORCE_PAGEIN() - force a swap block to be paged in + * + * This routine dissociates the page at the given index within a + * swap block from its backing store, paging it in if necessary. + * If the page is paged in, it is placed in the inactive queue, + * since it had its backing store ripped out from under it. + * We also attempt to swap in all other pages in the swap block, + * we only guarantee that the one at the specified index is + * paged in. + * + * XXX - The code to page the whole block in doesn't work, so we + * revert to the one-by-one behavior for now. Sigh. + */ +static __inline void +swp_pager_force_pagein(struct swblock *swap, int idx) +{ + vm_object_t object; + vm_page_t m; + vm_pindex_t pindex; + + object = swap->swb_object; + pindex = swap->swb_index; + + vm_object_pip_add(object, 1); + m = vm_page_grab(object, pindex + idx, VM_ALLOC_NORMAL|VM_ALLOC_RETRY); + if (m->valid == VM_PAGE_BITS_ALL) { + vm_object_pip_subtract(object, 1); + vm_page_lock_queues(); + vm_page_activate(m); + vm_page_dirty(m); + vm_page_wakeup(m); + vm_page_unlock_queues(); + vm_pager_page_unswapped(m); + return; + } + + if (swap_pager_getpages(object, &m, 1, 0) != + VM_PAGER_OK) + panic("swap_pager_force_pagein: read from swap failed");/*XXX*/ + vm_object_pip_subtract(object, 1); + + vm_page_lock_queues(); + vm_page_dirty(m); + vm_page_dontneed(m); + vm_page_wakeup(m); + vm_page_unlock_queues(); + vm_pager_page_unswapped(m); +} + + +/* + * swap_pager_swapoff: + * + * Page in all of the pages that have been paged out to the + * given device. The corresponding blocks in the bitmap must be + * marked as allocated and the device must be flagged SW_CLOSING. + * There may be no processes swapped out to the device. + * + * The sw_used parameter points to the field in the swdev structure + * that contains a count of the number of blocks still allocated + * on the device. If we encounter objects with a nonzero pip count + * in our scan, we use this number to determine if we're really done. + * + * This routine may block. + */ +void +swap_pager_swapoff(int devidx, int *sw_used) +{ + struct swblock **pswap; + struct swblock *swap; + vm_object_t waitobj; + daddr_t v; + int i, j; + + GIANT_REQUIRED; + +full_rescan: + waitobj = NULL; + for (i = 0; i <= swhash_mask; i++) { /* '<=' is correct here */ +restart: + pswap = &swhash[i]; + while ((swap = *pswap) != NULL) { + for (j = 0; j < SWAP_META_PAGES; ++j) { + v = swap->swb_pages[j]; + if (v != SWAPBLK_NONE && + BLK2DEVIDX(v) == devidx) + break; + } + if (j < SWAP_META_PAGES) { + swp_pager_force_pagein(swap, j); + goto restart; + } else if (swap->swb_object->paging_in_progress) { + if (!waitobj) + waitobj = swap->swb_object; + } + pswap = &swap->swb_hnext; + } + } + if (waitobj && *sw_used) { + /* + * We wait on an arbitrary object to clock our rescans + * to the rate of paging completion. + */ + vm_object_pip_wait(waitobj, "swpoff"); + goto full_rescan; + } + if (*sw_used) + panic("swapoff: failed to locate %d swap blocks", *sw_used); +} + /************************************************************************ * SWAP META DATA * ************************************************************************ diff --git a/sys/vm/swap_pager.h b/sys/vm/swap_pager.h index 97d50d3..4402284 100644 --- a/sys/vm/swap_pager.h +++ b/sys/vm/swap_pager.h @@ -83,9 +83,11 @@ extern struct pagerlst swap_pager_un_object_list; extern int swap_pager_full; extern struct blist *swapblist; extern struct uma_zone *swap_zone; +extern int nswap_lowat, nswap_hiwat; void swap_pager_putpages(vm_object_t, vm_page_t *, int, boolean_t, int *); boolean_t swap_pager_haspage(vm_object_t object, vm_pindex_t pindex, int *before, int *after); +void swap_pager_swapoff(int devidx, int *sw_used); int swap_pager_swp_alloc(vm_object_t, int); void swap_pager_copy(vm_object_t, vm_object_t, vm_pindex_t, int); diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index 6ac6a96..e38b3d3 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -91,6 +91,7 @@ #include <vm/vm_kern.h> #include <vm/vm_extern.h> #include <vm/vm_pager.h> +#include <vm/swap_pager.h> #include <sys/user.h> @@ -324,6 +325,45 @@ vm_proc_swapin(struct proc *p) up = (vm_offset_t)p->p_uarea; pmap_qenter(up, ma, UAREA_PAGES); } + +/* + * Swap in the UAREAs of all processes swapped out to the given device. + * The pages in the UAREA are marked dirty and their swap metadata is freed. + */ +void +vm_proc_swapin_all(int devidx) +{ + struct proc *p; + vm_object_t object; + vm_page_t m; + +retry: + sx_slock(&allproc_lock); + FOREACH_PROC_IN_SYSTEM(p) { + PROC_LOCK(p); + mtx_lock_spin(&sched_lock); + + object = p->p_upages_obj; + if (object != NULL && + swap_pager_isswapped(p->p_upages_obj, devidx)) { + sx_sunlock(&allproc_lock); + faultin(p); + mtx_unlock_spin(&sched_lock); + PROC_UNLOCK(p); + vm_page_lock_queues(); + TAILQ_FOREACH(m, &object->memq, listq) + vm_page_dirty(m); + vm_page_unlock_queues(); + swap_pager_freespace(object, 0, + object->un_pager.swp.swp_bcount); + goto retry; + } + + mtx_unlock_spin(&sched_lock); + PROC_UNLOCK(p); + } + sx_sunlock(&allproc_lock); +} #endif /* diff --git a/sys/vm/vm_pageout.h b/sys/vm/vm_pageout.h index c909c68..d68ec79 100644 --- a/sys/vm/vm_pageout.h +++ b/sys/vm/vm_pageout.h @@ -104,6 +104,12 @@ extern void pagedaemon_wakeup(void); extern void vm_wait(void); extern void vm_waitpfault(void); +/* XXX This is probably misplaced. */ +#ifndef NO_SWAPPING +void vm_proc_swapin_all(int); +int swap_pager_isswapped(vm_object_t, int); +#endif /* !NO_SWAPPING */ + #ifdef _KERNEL void vm_pageout_page(vm_page_t, vm_object_t); void vm_pageout_cluster(vm_page_t, vm_object_t); diff --git a/sys/vm/vm_swap.c b/sys/vm/vm_swap.c index 1781182..0ec5220 100644 --- a/sys/vm/vm_swap.c +++ b/sys/vm/vm_swap.c @@ -36,6 +36,7 @@ #include "opt_mac.h" #include "opt_swap.h" +#include "opt_vm.h" #include <sys/param.h> #include <sys/systm.h> @@ -58,6 +59,7 @@ #include <vm/vm.h> #include <vm/vm_extern.h> #include <vm/vm_param.h> +#include <vm/vm_pageout.h> #include <vm/swap_pager.h> #include <vm/uma.h> @@ -73,6 +75,8 @@ struct swdevt *swdevt = should_be_malloced; static int nswap; /* first block after the interleaved devs */ int nswdev = NSWAPDEV; int vm_swap_size; +static int swdev_syscall_active = 0; /* serialize swap(on|off) */ + static int swapdev_strategy(struct vop_strategy_args *ap); struct vnode *swapdev_vp; @@ -165,11 +169,12 @@ swapdev_strategy(ap) /* * Create a special vnode op vector for swapdev_vp - we only use - * VOP_STRATEGY(), everything else returns an error. + * VOP_STRATEGY() and reclaim; everything else returns an error. */ vop_t **swapdev_vnodeop_p; static struct vnodeopv_entry_desc swapdev_vnodeop_entries[] = { { &vop_default_desc, (vop_t *) vop_defaultop }, + { &vop_reclaim_desc, (vop_t *) vop_null }, { &vop_strategy_desc, (vop_t *) swapdev_strategy }, { NULL, NULL } }; @@ -208,19 +213,23 @@ swapon(td, uap) if (error) goto done2; + while (swdev_syscall_active) + tsleep(&swdev_syscall_active, PUSER - 1, "swpon", 0); + swdev_syscall_active = 1; + /* * Swap metadata may not fit in the KVM if we have physical * memory of >1GB. */ if (swap_zone == NULL) { error = ENOMEM; - goto done2; + goto done; } NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->name, td); error = namei(&nd); if (error) - goto done2; + goto done; NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; @@ -239,6 +248,9 @@ swapon(td, uap) if (error) vrele(vp); +done: + swdev_syscall_active = 0; + wakeup_one(&swdev_syscall_active); done2: mtx_unlock(&Giant); return (error); @@ -252,8 +264,6 @@ done2: * * The new swap code uses page-sized blocks. The old swap code used * DEV_BSIZE'd chunks. - * - * XXX locking when multiple swapon's run in parallel */ int swaponvp(td, vp, dev, nblks) @@ -330,7 +340,7 @@ swaponvp(td, vp, dev, nblks) sp->sw_vp = vp; sp->sw_dev = dev2udev(dev); sp->sw_device = dev; - sp->sw_flags |= SW_FREED; + sp->sw_flags = SW_FREED; sp->sw_nblks = nblks; sp->sw_used = 0; @@ -356,9 +366,127 @@ swaponvp(td, vp, dev, nblks) vm_swap_size += blk; } + swap_pager_full = 0; + return (0); } +/* + * SYSCALL: swapoff(devname) + * + * Disable swapping on the given device. + */ +#ifndef _SYS_SYSPROTO_H_ +struct swapoff_args { + char *name; +}; +#endif + +/* + * MPSAFE + */ +/* ARGSUSED */ +int +swapoff(td, uap) + struct thread *td; + struct swapoff_args *uap; +{ + struct vnode *vp; + struct nameidata nd; + struct swdevt *sp; + swblk_t dvbase, vsbase; + u_long nblks, aligned_nblks, blk; + int error, index; + + mtx_lock(&Giant); + + error = suser(td); + if (error) + goto done2; + + while (swdev_syscall_active) + tsleep(&swdev_syscall_active, PUSER - 1, "swpoff", 0); + swdev_syscall_active = 1; + + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->name, td); + error = namei(&nd); + if (error) + goto done; + NDFREE(&nd, NDF_ONLY_PNBUF); + vp = nd.ni_vp; + + for (sp = swdevt, index = 0 ; index < nswdev; index++, sp++) { + if (sp->sw_vp == vp) + goto found; + } + error = EINVAL; + goto done; +found: + nblks = sp->sw_nblks; + + /* + * We can turn off this swap device safely only if the + * available virtual memory in the system will fit the amount + * of data we will have to page back in, plus an epsilon so + * the system doesn't become critically low on swap space. + */ + if (cnt.v_free_count + cnt.v_cache_count + vm_swap_size < + nblks + nswap_lowat) { + error = ENOMEM; + goto done; + } + + /* + * Prevent further allocations on this device. + */ + sp->sw_flags |= SW_CLOSING; + for (dvbase = dmmax; dvbase < nblks; dvbase += dmmax) { + blk = min(nblks - dvbase, dmmax); + vsbase = index * dmmax + dvbase * nswdev; + vm_swap_size -= blist_fill(swapblist, vsbase, blk); + } + + /* + * Page in the contents of the device and close it. + */ +#ifndef NO_SWAPPING + vm_proc_swapin_all(index); +#endif /* !NO_SWAPPING */ + swap_pager_swapoff(index, &sp->sw_used); + + VOP_CLOSE(vp, FREAD | FWRITE, td->td_ucred, td); + vrele(vp); + sp->sw_vp = NULL; + + /* + * Resize the bitmap based on the new largest swap device, + * or free the bitmap if there are no more devices. + */ + for (sp = swdevt, nblks = 0; sp < swdevt + nswdev; sp++) { + if (sp->sw_vp == NULL) + continue; + nblks = max(nblks, sp->sw_nblks); + } + + aligned_nblks = (nblks + (dmmax - 1)) & ~(u_long)(dmmax - 1); + nswap = aligned_nblks * nswdev; + + if (nswap == 0) { + blist_destroy(swapblist); + swapblist = NULL; + vrele(swapdev_vp); + swapdev_vp = NULL; + } else + blist_resize(&swapblist, nswap, 0); + +done: + swdev_syscall_active = 0; + wakeup_one(&swdev_syscall_active); +done2: + mtx_unlock(&Giant); + return (error); +} + static int sysctl_vm_swap_info(SYSCTL_HANDLER_ARGS) { |