diff options
Diffstat (limited to 'sys')
-rw-r--r-- | sys/vm/swap_pager.c | 392 |
1 files changed, 254 insertions, 138 deletions
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index a3aa6a2..aa9cdb4 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -107,6 +107,8 @@ __FBSDID("$FreeBSD$"); #include <vm/vm_extern.h> #include <vm/uma.h> +#include <geom/geom.h> + /* * SWB_NPAGES must be a power of 2. It may be set to 1, 2, 4, 8, or 16 * pages per allocation. We recommend you stick with the default of 8. @@ -143,6 +145,7 @@ typedef int32_t swblk_t; /* struct swdevt; typedef void sw_strategy_t(struct buf *bp, struct swdevt *sw); +typedef void sw_close_t(struct thread *td, struct swdevt *sw); /* * Swap device table @@ -152,12 +155,14 @@ struct swdevt { int sw_nblks; int sw_used; udev_t sw_udev; + struct vnode *sw_vp; void *sw_id; swblk_t sw_first; swblk_t sw_end; struct blist *sw_blist; TAILQ_ENTRY(swdevt) sw_list; sw_strategy_t *sw_strategy; + sw_close_t *sw_close; }; #define SW_CLOSING 0x04 @@ -258,7 +263,7 @@ SYSCTL_INT(_vm, OID_AUTO, dmmax, static void swp_sizecheck(void); static void swp_pager_sync_iodone(struct buf *bp); static void swp_pager_async_iodone(struct buf *bp); -static int swapondev(struct thread *, struct vnode *); +static int swapongeom(struct thread *, struct vnode *); static int swaponvp(struct thread *, struct vnode *, u_long); /* @@ -2000,59 +2005,6 @@ swp_pager_meta_ctl(vm_object_t object, vm_pindex_t pindex, int flags) } /* - * swapdev_strategy: - * - * Perform swap strategy interleave device selection. - * - * The bp is expected to be locked and *not* B_DONE on call. - */ -static void -swapdev_strategy(struct buf *bp, struct swdevt *sp) -{ - int s; - struct vnode *vp, *vp2; - - bp->b_dev = NODEV; - bp->b_blkno = ctodb(bp->b_blkno - sp->sw_first); - - vp2 = sp->sw_id; - vhold(vp2); - s = splvm(); - if (bp->b_iocmd == BIO_WRITE) { - vp = bp->b_vp; - if (vp) { - VI_LOCK(vp); - vp->v_numoutput--; - if ((vp->v_iflag & VI_BWAIT) && vp->v_numoutput <= 0) { - vp->v_iflag &= ~VI_BWAIT; - wakeup(&vp->v_numoutput); - } - VI_UNLOCK(vp); - } - VI_LOCK(vp2); - vp2->v_numoutput++; - VI_UNLOCK(vp2); - } - bp->b_vp = vp2; - splx(s); - VOP_STRATEGY(vp2, bp); - return; -} - -static void -swapdev_devstrategy(struct buf *bp, struct swdevt *sp) -{ - struct vnode *vp; - - vp = sp->sw_id; - bp->b_dev = vp->v_rdev; - bp->b_blkno = ctodb(bp->b_blkno - sp->sw_first); - - VOP_SPECSTRATEGY(vp, bp); - return; -} - -/* * System call swapon(name) enables swapping on device name, * which must be in the swdevsw. Return EBUSY * if already swapping on this device. @@ -2102,7 +2054,7 @@ swapon(struct thread *td, struct swapon_args *uap) vp = nd.ni_vp; if (vn_isdisk(vp, &error)) { - error = swapondev(td, vp); + error = swapongeom(td, vp); } else if (vp->v_type == VREG && (vp->v_mount->mnt_vfc->vfc_flags & VFCF_NETWORK) != 0 && (error = VOP_GETATTR(vp, &attr, td->td_ucred, td)) == 0) { @@ -2124,7 +2076,7 @@ done2: } static void -swaponsomething(void *vp, u_long nblks, sw_strategy_t *strategy, udev_t udev) +swaponsomething(struct vnode *vp, void *id, u_long nblks, sw_strategy_t *strategy, sw_close_t *close, udev_t udev) { struct swdevt *sp; swblk_t dvbase; @@ -2164,7 +2116,8 @@ swaponsomething(void *vp, u_long nblks, sw_strategy_t *strategy, udev_t udev) nblks = dbtoc(nblks); sp = malloc(sizeof *sp, M_VMPGDATA, M_WAITOK | M_ZERO); - sp->sw_id = vp; + sp->sw_vp = vp; + sp->sw_id = id; sp->sw_udev = udev; sp->sw_flags = 0; sp->sw_nblks = nblks; @@ -2172,6 +2125,7 @@ swaponsomething(void *vp, u_long nblks, sw_strategy_t *strategy, udev_t udev) sp->sw_first = dvbase; sp->sw_end = dvbase + nblks; sp->sw_strategy = strategy; + sp->sw_close = close; sp->sw_blist = blist_create(nblks); /* @@ -2188,88 +2142,14 @@ swaponsomething(void *vp, u_long nblks, sw_strategy_t *strategy, udev_t udev) swap_pager_full = 0; } - -static int -swapondev(struct thread *td, struct vnode *vp) -{ - struct swdevt *sp; - int error; - off_t mediasize; - u_long nblks; - - mtx_lock(&sw_dev_mtx); - TAILQ_FOREACH(sp, &swtailq, sw_list) { - if (sp->sw_id == vp) { - mtx_unlock(&sw_dev_mtx); - return (EBUSY); - } - } - mtx_unlock(&sw_dev_mtx); - - (void) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); -#ifdef MAC - error = mac_check_system_swapon(td->td_ucred, vp); - if (error == 0) -#endif - error = VOP_OPEN(vp, FREAD | FWRITE, td->td_ucred, td, -1); - (void) VOP_UNLOCK(vp, 0, td); - if (error) - return (error); - - error = VOP_IOCTL(vp, DIOCGMEDIASIZE, (caddr_t)&mediasize, - FREAD, td->td_ucred, td); - if (error == 0) - nblks = mediasize / DEV_BSIZE; - else { - (void) VOP_CLOSE(vp, FREAD | FWRITE, td->td_ucred, td); - return (ENXIO); - } - - /* - * XXX: We should also check that the sectorsize makes sense - * XXX: it should be a power of two, no larger than the page size. - */ - - swaponsomething(vp, nblks, swapdev_devstrategy, dev2udev(vp->v_rdev)); - return (0); -} - - -static int -swaponvp(struct thread *td, struct vnode *vp, u_long nblks) -{ - struct swdevt *sp; - int error; - - if (nblks == 0) - return (ENXIO); - mtx_lock(&sw_dev_mtx); - TAILQ_FOREACH(sp, &swtailq, sw_list) { - if (sp->sw_id == vp) { - mtx_unlock(&sw_dev_mtx); - return (EBUSY); - } - } - mtx_unlock(&sw_dev_mtx); - - (void) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); -#ifdef MAC - error = mac_check_system_swapon(td->td_ucred, vp); - if (error == 0) -#endif - error = VOP_OPEN(vp, FREAD | FWRITE, td->td_ucred, td, -1); - (void) VOP_UNLOCK(vp, 0, td); - if (error) - return (error); - - swaponsomething(vp, nblks, swapdev_strategy, NOUDEV); - return (0); -} - /* * SYSCALL: swapoff(devname) * * Disable swapping on the given device. + * + * XXX: Badly designed system call: it should use a device index + * rather than filename as specification. We keep sw_vp around + * only to make this work. */ #ifndef _SYS_SYSPROTO_H_ struct swapoff_args { @@ -2309,7 +2189,7 @@ swapoff(struct thread *td, struct swapoff_args *uap) mtx_lock(&sw_dev_mtx); TAILQ_FOREACH(sp, &swtailq, sw_list) { - if (sp->sw_id == vp) + if (sp->sw_vp == vp) goto found; } mtx_unlock(&sw_dev_mtx); @@ -2356,8 +2236,7 @@ found: #endif /* !NO_SWAPPING */ swap_pager_swapoff(sp, &sp->sw_used); - VOP_CLOSE(vp, FREAD | FWRITE, td->td_ucred, td); - vrele(vp); + sp->sw_close(td, sp); sp->sw_id = NULL; mtx_lock(&sw_dev_mtx); TAILQ_REMOVE(&swtailq, sp, sw_list); @@ -2463,3 +2342,240 @@ vmspace_swap_count(struct vmspace *vmspace) } return (count); } + +/* + * GEOM backend + * + * Swapping onto disk devices. + * + */ + +static struct g_class g_swap_class = { + .name = "SWAP", +}; + +DECLARE_GEOM_CLASS(g_swap_class, g_class); + + +static void +swapgeom_done(struct bio *bp2) +{ + struct buf *bp; + + bp = bp2->bio_caller2; + if (bp2->bio_error) + bp->b_ioflags |= BIO_ERROR; + mtx_lock(&Giant); + bufdone(bp); + mtx_unlock(&Giant); + g_destroy_bio(bp2); +} + +static void +swapgeom_strategy(struct buf *bp, struct swdevt *sp) +{ + struct bio *bio; + struct g_consumer *cp; + + cp = sp->sw_id; + if (cp == NULL) { + bp->b_error = ENXIO; + bp->b_ioflags |= BIO_ERROR; + bufdone(bp); + return; + } + bio = g_clone_bio(&bp->b_io); + bio->bio_caller2 = bp; + bio->bio_offset = (bp->b_blkno - sp->sw_first) * PAGE_SIZE; + bio->bio_length = bp->b_bcount; + bio->bio_done = swapgeom_done; + g_io_request(bio, cp); + return; +} + +static void +swapgeom_orphan(struct g_consumer *cp) +{ + struct swdevt *sp; + + mtx_lock(&sw_dev_mtx); + TAILQ_FOREACH(sp, &swtailq, sw_list) + if (sp->sw_id == cp) + sp->sw_id = NULL; + mtx_unlock(&sw_dev_mtx); +} + +static void +swapgeom_close_ev(void *arg, int flags) +{ + struct g_consumer *cp; + + cp = arg; + g_access_rel(cp, -1, -1, -1); + g_detach(cp); + g_destroy_consumer(cp); +} + +static void +swapgeom_close(struct thread *td, struct swdevt *sw) +{ + + /* XXX: direct call when Giant untangled */ + g_waitfor_event(swapgeom_close_ev, sw->sw_id, M_WAITOK, NULL); +} + + +struct swh0h0 { + dev_t dev; + struct vnode *vp; + int error; +}; + +static void +swapongeom_ev(void *arg, int flags) +{ + struct swh0h0 *swh; + struct g_provider *pp; + struct g_consumer *cp; + static struct g_geom *gp; + struct swdevt *sp; + u_long nblks; + int error; + + swh = arg; + swh->error = 0; + pp = g_dev_getprovider(swh->dev); + if (pp == NULL) { + swh->error = ENODEV; + return; + } + mtx_lock(&sw_dev_mtx); + TAILQ_FOREACH(sp, &swtailq, sw_list) { + cp = sp->sw_id; + if (cp != NULL && cp->provider == pp) { + mtx_unlock(&sw_dev_mtx); + swh->error = EBUSY; + return; + } + } + mtx_unlock(&sw_dev_mtx); + if (gp == NULL) { + gp = g_new_geomf(&g_swap_class, "swap", NULL); + gp->orphan = swapgeom_orphan; + } + cp = g_new_consumer(gp); + g_attach(cp, pp); + error = g_access_rel(cp, 1, 1, 1); + if (error) { + g_detach(cp); + g_destroy_consumer(cp); + swh->error = error; + return; + } + nblks = pp->mediasize / DEV_BSIZE; + swaponsomething(swh->vp, cp, nblks, swapgeom_strategy, + swapgeom_close, dev2udev(swh->dev)); + swh->error = 0; + return; +} + +static int +swapongeom(struct thread *td, struct vnode *vp) +{ + int error; + struct swh0h0 swh; + + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); + + swh.dev = vp->v_rdev; + swh.vp = vp; + swh.error = 0; + /* XXX: direct call when Giant untangled */ + error = g_waitfor_event(swapongeom_ev, &swh, M_WAITOK, NULL); + if (!error) + error = swh.error; + VOP_UNLOCK(vp, 0, td); + return (error); +} + +/* + * VNODE backend + * + * This is used mainly for network filesystem (read: probably only tested + * with NFS) swapfiles. + * + */ + +static void +swapdev_strategy(struct buf *bp, struct swdevt *sp) +{ + int s; + struct vnode *vp, *vp2; + + bp->b_dev = NODEV; + bp->b_blkno = ctodb(bp->b_blkno - sp->sw_first); + + vp2 = sp->sw_id; + vhold(vp2); + s = splvm(); + if (bp->b_iocmd == BIO_WRITE) { + vp = bp->b_vp; + if (vp) { + VI_LOCK(vp); + vp->v_numoutput--; + if ((vp->v_iflag & VI_BWAIT) && vp->v_numoutput <= 0) { + vp->v_iflag &= ~VI_BWAIT; + wakeup(&vp->v_numoutput); + } + VI_UNLOCK(vp); + } + VI_LOCK(vp2); + vp2->v_numoutput++; + VI_UNLOCK(vp2); + } + bp->b_vp = vp2; + splx(s); + VOP_STRATEGY(vp2, bp); + return; +} + +static void +swapdev_close(struct thread *td, struct swdevt *sp) +{ + + VOP_CLOSE(sp->sw_vp, FREAD | FWRITE, td->td_ucred, td); + vrele(sp->sw_vp); +} + + +static int +swaponvp(struct thread *td, struct vnode *vp, u_long nblks) +{ + struct swdevt *sp; + int error; + + if (nblks == 0) + return (ENXIO); + mtx_lock(&sw_dev_mtx); + TAILQ_FOREACH(sp, &swtailq, sw_list) { + if (sp->sw_id == vp) { + mtx_unlock(&sw_dev_mtx); + return (EBUSY); + } + } + mtx_unlock(&sw_dev_mtx); + + (void) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); +#ifdef MAC + error = mac_check_system_swapon(td->td_ucred, vp); + if (error == 0) +#endif + error = VOP_OPEN(vp, FREAD | FWRITE, td->td_ucred, td, -1); + (void) VOP_UNLOCK(vp, 0, td); + if (error) + return (error); + + swaponsomething(vp, vp, nblks, swapdev_strategy, swapdev_close, + NOUDEV); + return (0); +} |