Add snapshots to the fast filesystem. Most of the changes support

the gating of system calls that cause modifications to the underlying filesystem. The gating can be enabled by any filesystem that needs to consistently suspend operations by adding the vop_stdgetwritemount to their set of vnops. Once gating is enabled, the function vfs_write_suspend stops all new write operations to a filesystem, allows any filesystem modifying system calls already in progress to complete, then sync's the filesystem to disk and returns. The function vfs_write_resume allows the suspended write operations to begin again. Gating is not added by default for all filesystems as for SMP systems it adds two extra locks to such critical kernel paths as the write system call. Thus, gating should only be added as needed. Details on the use and current status of snapshots in FFS can be found in /sys/ufs/ffs/README.snapshot so for brevity and timelyness is not included here. Unless and until you create a snapshot file, these changes should have no effect on your system (famous last words).
author: mckusick <mckusick@FreeBSD.org> 2000-07-11 22:07:57 +0000
committer: mckusick <mckusick@FreeBSD.org> 2000-07-11 22:07:57 +0000
commit: a3d0c189ea25a7af3dfab30112f5d8d65e214e1c (patch)
tree: c84458dcf49aaf90ff010ebc108cb3b6ca3c2f4a /sys
parent: c8c04452402a28eabd1ed8a1a06e0a14ac3d22c6 (diff)
download: FreeBSD-src-a3d0c189ea25a7af3dfab30112f5d8d65e214e1c.zip
FreeBSD-src-a3d0c189ea25a7af3dfab30112f5d8d65e214e1c.tar.gz
52 files changed, 2536 insertions, 446 deletions
diff --git a/sys/compat/svr4/svr4_fcntl.c b/sys/compat/svr4/svr4_fcntl.c
index 4040030..c65f345 100644
--- a/sys/compat/svr4/svr4_fcntl.c
+++ b/sys/compat/svr4/svr4_fcntl.c
@@ -247,6 +247,7 @@ fd_revoke(p, fd)
 	struct filedesc *fdp = p->p_fd;
 	struct file *fp;
 	struct vnode *vp;
+	struct mount *mp;
 	struct vattr vattr;
 	int error, *retval;
 
@@ -271,8 +272,11 @@ fd_revoke(p, fd)
 	    (error = suser(p)) != 0)
 		goto out;
 
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		goto out;
 	if (vcount(vp) > 1)
 		VOP_REVOKE(vp, REVOKEALL);
+	vn_finished_write(mp);
 out:
 	vrele(vp);
 	return error;
diff --git a/sys/conf/files b/sys/conf/files
index 84130e1..01f088f 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -906,6 +906,8 @@ ufs/ffs/ffs_balloc.c	optional ffs
 ufs/ffs/ffs_balloc.c	optional mfs
 ufs/ffs/ffs_inode.c	optional ffs
 ufs/ffs/ffs_inode.c	optional mfs
+ufs/ffs/ffs_snapshot.c	optional ffs
+ufs/ffs/ffs_snapshot.c	optional mfs
 ufs/ffs/ffs_softdep.c	optional softupdates
 ufs/ffs/ffs_softdep_stub.c	standard
 ufs/ffs/ffs_subr.c	optional ffs
diff --git a/sys/dev/vn/vn.c b/sys/dev/vn/vn.c
index 88e3801..efbc437 100644
--- a/sys/dev/vn/vn.c
+++ b/sys/dev/vn/vn.c
@@ -276,7 +276,6 @@ vnstrategy(struct bio *bp)
 	int unit;
 	struct vn_softc *vn;
 	int error;
-	int isvplocked = 0;
 
 	unit = dkunit(bp->bio_dev);
 	vn = bp->bio_dev->si_drv1;
@@ -360,6 +359,7 @@ vnstrategy(struct bio *bp)
 		 */
 		struct uio auio;
 		struct iovec aiov;
+		struct mount *mp;
 
 		bzero(&auio, sizeof(auio));
 
@@ -375,18 +375,18 @@ vnstrategy(struct bio *bp)
 			auio.uio_rw = UIO_WRITE;
 		auio.uio_resid = bp->bio_bcount;
 		auio.uio_procp = curproc;
-		if (!VOP_ISLOCKED(vn->sc_vp, NULL)) {
-			isvplocked = 1;
+		if (VOP_ISLOCKED(vn->sc_vp, NULL))
+			vprint("unexpected vn driver lock", vn->sc_vp);
+		if (bp->bio_cmd == BIO_READ) {
 			vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY, curproc);
-		}
-		if(bp->bio_cmd == BIO_READ)
 			error = VOP_READ(vn->sc_vp, &auio, 0, vn->sc_cred);
-		else
+		} else {
+			(void) vn_start_write(vn->sc_vp, &mp, V_WAIT);
+			vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY, curproc);
 			error = VOP_WRITE(vn->sc_vp, &auio, 0, vn->sc_cred);
-		if (isvplocked) {
-			VOP_UNLOCK(vn->sc_vp, 0, curproc);
-			isvplocked = 0;
+			vn_finished_write(mp);
 		}
+		VOP_UNLOCK(vn->sc_vp, 0, curproc);
 		bp->bio_resid = auio.uio_resid;
 
 		if (error) {
diff --git a/sys/fs/fdescfs/fdesc_vnops.c b/sys/fs/fdescfs/fdesc_vnops.c
index 772a94c..72c7cae 100644
--- a/sys/fs/fdescfs/fdesc_vnops.c
+++ b/sys/fs/fdescfs/fdesc_vnops.c
@@ -383,6 +383,8 @@ fdesc_setattr(ap)
 {
 	struct filedesc *fdp = ap->a_p->p_fd;
 	struct vattr *vap = ap->a_vap;
+	struct vnode *vp;
+	struct mount *mp;
 	struct file *fp;
 	unsigned fd;
 	int error;
@@ -403,8 +405,11 @@ fdesc_setattr(ap)
 	switch (fp->f_type) {
 	case DTYPE_FIFO:
 	case DTYPE_VNODE:
-		error = VOP_SETATTR((struct vnode *) fp->f_data, ap->a_vap,
-		    ap->a_cred, ap->a_p);
+		vp = (struct vnode *)fp->f_data;
+		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+			return (error);
+		error = VOP_SETATTR(vp, ap->a_vap, ap->a_cred, ap->a_p);
+		vn_finished_write(mp);
 		break;
 
 	default:
diff --git a/sys/fs/fifofs/fifo_vnops.c b/sys/fs/fifofs/fifo_vnops.c
index 5bd13a7..03e3e37 100644
--- a/sys/fs/fifofs/fifo_vnops.c
+++ b/sys/fs/fifofs/fifo_vnops.c
@@ -107,6 +107,7 @@ static struct vnodeopv_entry_desc fifo_vnodeop_entries[] = {
 	{ &vop_open_desc,		(vop_t *) fifo_open },
 	{ &vop_pathconf_desc,		(vop_t *) fifo_pathconf },
 	{ &vop_poll_desc,		(vop_t *) fifo_poll },
+	{ &vop_getwritemount_desc, 	(vop_t *) vop_stdgetwritemount },
 	{ &vop_print_desc,		(vop_t *) fifo_print },
 	{ &vop_read_desc,		(vop_t *) fifo_read },
 	{ &vop_readdir_desc,		(vop_t *) fifo_badop },
diff --git a/sys/fs/specfs/spec_vnops.c b/sys/fs/specfs/spec_vnops.c
index cbe52f4..baf40c3 100644
--- a/sys/fs/specfs/spec_vnops.c
+++ b/sys/fs/specfs/spec_vnops.c
@@ -88,6 +88,7 @@ static struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
 	{ &vop_open_desc,		(vop_t *) spec_open },
 	{ &vop_pathconf_desc,		(vop_t *) vop_stdpathconf },
 	{ &vop_poll_desc,		(vop_t *) spec_poll },
+	{ &vop_getwritemount_desc, 	(vop_t *) vop_stdgetwritemount },
 	{ &vop_print_desc,		(vop_t *) spec_print },
 	{ &vop_read_desc,		(vop_t *) spec_read },
 	{ &vop_readdir_desc,		(vop_t *) vop_panic },
@@ -415,16 +416,29 @@ spec_strategy(ap)
 	struct buf *bp;
 	struct vnode *vp;
 	struct mount *mp;
+	int error;
 
 	bp = ap->a_bp;
-	if ((bp->b_iocmd == BIO_WRITE) && (LIST_FIRST(&bp->b_dep)) != NULL)
-		buf_start(bp);
-
+	vp = ap->a_vp;
+	if ((bp->b_iocmd == BIO_WRITE)) {
+		if (vp->v_mount != NULL &&
+		    (vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
+			panic("spec_strategy: bad I/O");
+		if (LIST_FIRST(&bp->b_dep) != NULL)
+			buf_start(bp);
+		if ((vp->v_flag & VCOPYONWRITE) &&
+		    (error = VOP_COPYONWRITE(vp, bp)) != 0 &&
+		    error != EOPNOTSUPP) {
+			bp->b_io.bio_error = error;
+			bp->b_io.bio_flags |= BIO_ERROR;
+			biodone(&bp->b_io);
+			return (0);
+		}
+	}
 	/*
 	 * Collect statistics on synchronous and asynchronous read
 	 * and write counts for disks that have associated filesystems.
 	 */
-	vp = ap->a_vp;
 	if (vn_isdisk(vp, NULL) && (mp = vp->v_specmountpoint) != NULL) {
 		if (bp->b_iocmd == BIO_WRITE) {
 			if (bp->b_lock.lk_lockholder == LK_KERNPROC)
diff --git a/sys/fs/unionfs/union_subr.c b/sys/fs/unionfs/union_subr.c
index 6b88bef..d1d6e31 100644
--- a/sys/fs/unionfs/union_subr.c
+++ b/sys/fs/unionfs/union_subr.c
@@ -747,6 +747,7 @@ union_copyup(un, docopy, cred, p)
 	struct proc *p;
 {
 	int error;
+	struct mount *mp;
 	struct vnode *lvp, *uvp;
 
 	/*
@@ -759,9 +760,12 @@ union_copyup(un, docopy, cred, p)
 	if (error)
 		return (error);
 
-	error = union_vn_create(&uvp, un, p);
-	if (error)
+	if ((error = vn_start_write(un->un_dirvp, &mp, V_WAIT | PCATCH)) != 0)
 		return (error);
+	if ((error = union_vn_create(&uvp, un, p)) != 0) {
+		vn_finished_write(mp);
+		return (error);
+	}
 
 	lvp = un->un_lowervp;
 
@@ -785,6 +789,7 @@ union_copyup(un, docopy, cred, p)
 
 	}
 	VOP_UNLOCK(uvp, 0, p);
+	vn_finished_write(mp);
 	union_newupper(un, uvp);
 	KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount));
 	union_vn_close(uvp, FWRITE, cred, p);
@@ -910,11 +915,15 @@ union_mkshadow(um, dvp, cnp, vpp)
 	struct vattr va;
 	struct proc *p = cnp->cn_proc;
 	struct componentname cn;
+	struct mount *mp;
 
-	error = union_relookup(um, dvp, vpp, cnp, &cn,
-			cnp->cn_nameptr, cnp->cn_namelen);
-	if (error)
+	if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
+	if ((error = union_relookup(um, dvp, vpp, cnp, &cn,
+			cnp->cn_nameptr, cnp->cn_namelen)) != 0) {
+		vn_finished_write(mp);
 		return (error);
+	}
 
 	if (*vpp) {
 		if (cn.cn_flags & HASBUF) {
@@ -925,6 +934,7 @@ union_mkshadow(um, dvp, cnp, vpp)
 			vrele(*vpp);
 		else
 			vput(*vpp);
+		vn_finished_write(mp);
 		*vpp = NULLVP;
 		return (EEXIST);
 	}
@@ -950,6 +960,7 @@ union_mkshadow(um, dvp, cnp, vpp)
 		cn.cn_flags &= ~HASBUF;
 	}
 	/*vput(dvp);*/
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -973,10 +984,15 @@ union_mkwhiteout(um, dvp, cnp, path)
 	struct proc *p = cnp->cn_proc;
 	struct vnode *wvp;
 	struct componentname cn;
+	struct mount *mp;
 
+	if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
 	error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path));
-	if (error)
+	if (error) {
+		vn_finished_write(mp);
 		return (error);
+	}
 
 	if (wvp) {
 		if (cn.cn_flags & HASBUF) {
@@ -987,6 +1003,7 @@ union_mkwhiteout(um, dvp, cnp, path)
 			vrele(wvp);
 		else
 			vput(wvp);
+		vn_finished_write(mp);
 		return (EEXIST);
 	}
 
@@ -998,6 +1015,7 @@ union_mkwhiteout(um, dvp, cnp, path)
 		zfree(namei_zone, cn.cn_pnbuf);
 		cn.cn_flags &= ~HASBUF;
 	}
+	vn_finished_write(mp);
 	return (error);
 }
 
diff --git a/sys/fs/unionfs/union_vnops.c b/sys/fs/unionfs/union_vnops.c
index 1c5ed5d..d7b95f3 100644
--- a/sys/fs/unionfs/union_vnops.c
+++ b/sys/fs/unionfs/union_vnops.c
@@ -93,6 +93,7 @@ static int	union_print __P((struct vop_print_args *ap));
 static int	union_read __P((struct vop_read_args *ap));
 static int	union_readdir __P((struct vop_readdir_args *ap));
 static int	union_readlink __P((struct vop_readlink_args *ap));
+static int	union_getwritemount __P((struct vop_getwritemount_args *ap));
 static int	union_reclaim __P((struct vop_reclaim_args *ap));
 static int	union_remove __P((struct vop_remove_args *ap));
 static int	union_rename __P((struct vop_rename_args *ap));
@@ -1681,6 +1682,20 @@ union_readlink(ap)
 	return (error);
 }
 
+static int
+union_getwritemount(ap)
+	struct vop_getwritemount_args /* {
+		struct vnode *a_vp;
+		struct mount **a_mpp;
+	} */ *ap;
+{
+	struct vnode *vp = UPPERVP(ap->a_vp);
+
+	if (vp == NULL)
+		panic("union: missing upper layer in getwritemount");
+	return(VOP_GETWRITEMOUNT(vp, ap->a_mpp));
+}
+
 /*
  *	union_inactive:
  *
@@ -1963,6 +1978,7 @@ static struct vnodeopv_entry_desc union_vnodeop_entries[] = {
 	{ &vop_read_desc,		(vop_t *) union_read },
 	{ &vop_readdir_desc,		(vop_t *) union_readdir },
 	{ &vop_readlink_desc,		(vop_t *) union_readlink },
+	{ &vop_getwritemount_desc,	(vop_t *) union_getwritemount },
 	{ &vop_reclaim_desc,		(vop_t *) union_reclaim },
 	{ &vop_remove_desc,		(vop_t *) union_remove },
 	{ &vop_rename_desc,		(vop_t *) union_rename },
diff --git a/sys/gnu/ext2fs/ext2_bmap.c b/sys/gnu/ext2fs/ext2_bmap.c
index 9056340..ab4ac52 100644
--- a/sys/gnu/ext2fs/ext2_bmap.c
+++ b/sys/gnu/ext2fs/ext2_bmap.c
@@ -47,6 +47,7 @@
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/resourcevar.h>
+#include <sys/stat.h>
 
 #include <ufs/ufs/extattr.h>
 #include <ufs/ufs/quota.h>
@@ -115,7 +116,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
 	struct indir a[NIADDR+1], *xap;
 	ufs_daddr_t daddr;
 	long metalbn;
-	int error, maxrun, num;
+	int error, num, maxrun = 0;
 
 	ip = VTOI(vp);
 	mp = vp->v_mount;
@@ -127,6 +128,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
 #endif
 
 	if (runp) {
+		maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1;
 		*runp = 0;
 	}
 
@@ -134,7 +136,6 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
 		*runb = 0;
 	}
 
-	maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1;
 
 	xap = ap == NULL ? a : ap;
 	if (!nump)
@@ -146,9 +147,12 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
 	num = *nump;
 	if (num == 0) {
 		*bnp = blkptrtodb(ump, ip->i_db[bn]);
-		if (*bnp == 0)
-			*bnp = -1;
-		else if (runp) {
+		if (*bnp == 0) {
+			if (ip->i_flags & SF_SNAPSHOT)
+				*bnp = blkptrtodb(ump, bn * ump->um_seqinc);
+			else
+				*bnp = -1;
+		} else if (runp) {
 			daddr_t bnb = bn;
 			for (++bn; bn < NDADDR && *runp < maxrun &&
 			    is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]);
@@ -226,8 +230,13 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
 	if (bp)
 		bqrelse(bp);
 
-	daddr = blkptrtodb(ump, daddr);
-	*bnp = daddr == 0 ? -1 : daddr;
+	*bnp = blkptrtodb(ump, daddr);
+	if (*bnp == 0) {
+		if (ip->i_flags & SF_SNAPSHOT)
+			*bnp = blkptrtodb(ump, bn * ump->um_seqinc);
+		else
+			*bnp = -1;
+	}
 	return (0);
 }
 
diff --git a/sys/gnu/ext2fs/inode.h b/sys/gnu/ext2fs/inode.h
index 83960b0..6417a10 100644
--- a/sys/gnu/ext2fs/inode.h
+++ b/sys/gnu/ext2fs/inode.h
@@ -84,6 +84,7 @@ struct inode {
 	struct	 dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */
 	u_quad_t i_modrev;	/* Revision level for NFS lease. */
 	struct	 lockf *i_lockf;/* Head of byte-level lock list. */
+	struct	 inode *i_copyonwrite; /* copy-on-write list */
 	/*
 	 * Side effects; used during directory lookup.
 	 */
diff --git a/sys/gnu/fs/ext2fs/ext2_bmap.c b/sys/gnu/fs/ext2fs/ext2_bmap.c
index 9056340..ab4ac52 100644
--- a/sys/gnu/fs/ext2fs/ext2_bmap.c
+++ b/sys/gnu/fs/ext2fs/ext2_bmap.c
@@ -47,6 +47,7 @@
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/resourcevar.h>
+#include <sys/stat.h>
 
 #include <ufs/ufs/extattr.h>
 #include <ufs/ufs/quota.h>
@@ -115,7 +116,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
 	struct indir a[NIADDR+1], *xap;
 	ufs_daddr_t daddr;
 	long metalbn;
-	int error, maxrun, num;
+	int error, num, maxrun = 0;
 
 	ip = VTOI(vp);
 	mp = vp->v_mount;
@@ -127,6 +128,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
 #endif
 
 	if (runp) {
+		maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1;
 		*runp = 0;
 	}
 
@@ -134,7 +136,6 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
 		*runb = 0;
 	}
 
-	maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1;
 
 	xap = ap == NULL ? a : ap;
 	if (!nump)
@@ -146,9 +147,12 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
 	num = *nump;
 	if (num == 0) {
 		*bnp = blkptrtodb(ump, ip->i_db[bn]);
-		if (*bnp == 0)
-			*bnp = -1;
-		else if (runp) {
+		if (*bnp == 0) {
+			if (ip->i_flags & SF_SNAPSHOT)
+				*bnp = blkptrtodb(ump, bn * ump->um_seqinc);
+			else
+				*bnp = -1;
+		} else if (runp) {
 			daddr_t bnb = bn;
 			for (++bn; bn < NDADDR && *runp < maxrun &&
 			    is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]);
@@ -226,8 +230,13 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
 	if (bp)
 		bqrelse(bp);
 
-	daddr = blkptrtodb(ump, daddr);
-	*bnp = daddr == 0 ? -1 : daddr;
+	*bnp = blkptrtodb(ump, daddr);
+	if (*bnp == 0) {
+		if (ip->i_flags & SF_SNAPSHOT)
+			*bnp = blkptrtodb(ump, bn * ump->um_seqinc);
+		else
+			*bnp = -1;
+	}
 	return (0);
 }
 
diff --git a/sys/gnu/fs/ext2fs/inode.h b/sys/gnu/fs/ext2fs/inode.h
index 83960b0..6417a10 100644
--- a/sys/gnu/fs/ext2fs/inode.h
+++ b/sys/gnu/fs/ext2fs/inode.h
@@ -84,6 +84,7 @@ struct inode {
 	struct	 dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */
 	u_quad_t i_modrev;	/* Revision level for NFS lease. */
 	struct	 lockf *i_lockf;/* Head of byte-level lock list. */
+	struct	 inode *i_copyonwrite; /* copy-on-write list */
 	/*
 	 * Side effects; used during directory lookup.
 	 */
diff --git a/sys/kern/kern_ktrace.c b/sys/kern/kern_ktrace.c
index d914fc2..b0530f9 100644
--- a/sys/kern/kern_ktrace.c
+++ b/sys/kern/kern_ktrace.c
@@ -457,7 +457,8 @@ ktrwrite(vp, kth, uio)
 {
 	struct uio auio;
 	struct iovec aiov[2];
-	register struct proc *p = curproc;	/* XXX */
+	struct proc *p = curproc;	/* XXX */
+	struct mount *mp;
 	int error;
 
 	if (vp == NULL)
@@ -479,6 +480,7 @@ ktrwrite(vp, kth, uio)
 		if (uio != NULL)
 			kth->ktr_len += uio->uio_resid;
 	}
+	vn_start_write(vp, &mp, V_WAIT);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	(void)VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, p->p_ucred);
@@ -487,6 +489,7 @@ ktrwrite(vp, kth, uio)
 		error = VOP_WRITE(vp, uio, IO_UNIT | IO_APPEND, p->p_ucred);
 	}
 	VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(mp);
 	if (!error)
 		return;
 	/*
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index e96f471..2d87b63 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -1599,6 +1599,7 @@ coredump(p)
 	struct nameidata nd;
 	struct vattr vattr;
 	int error, error1, flags;
+	struct mount *mp;
 	char *name;			/* name of corefile */
 	off_t limit;
 	
@@ -1619,6 +1620,7 @@ coredump(p)
 	if (limit == 0)
 		return 0;
 
+restart:
 	name = expand_name(p->p_comm, p->p_ucred->cr_uid, p->p_pid);
 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name, p);
 	flags = O_CREAT | FWRITE | O_NOFOLLOW;
@@ -1628,6 +1630,14 @@ coredump(p)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vp = nd.ni_vp;
+	if (vn_start_write(vp, &mp, V_NOWAIT) != 0) {
+		VOP_UNLOCK(vp, 0, p);
+		if ((error = vn_close(vp, FWRITE, cred, p)) != 0)
+			return (error);
+		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
+	}
 
 	/* Don't dump to non-regular files or files with links. */
 	if (vp->v_type != VREG ||
@@ -1647,6 +1657,7 @@ coredump(p)
 
 out:
 	VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(mp);
 	error1 = vn_close(vp, FWRITE, cred, p);
 	if (error == 0)
 		error = error1;
diff --git a/sys/kern/tty_tty.c b/sys/kern/tty_tty.c
index 2d15c70..66f7a7b 100644
--- a/sys/kern/tty_tty.c
+++ b/sys/kern/tty_tty.c
@@ -133,13 +133,19 @@ cttywrite(dev, uio, flag)
 {
 	struct proc *p = uio->uio_procp;
 	struct vnode *ttyvp = cttyvp(uio->uio_procp);
+	struct mount *mp;
 	int error;
 
 	if (ttyvp == NULL)
 		return (EIO);
+	mp = NULL;
+	if (ttyvp->v_type != VCHR &&
+	    (error = vn_start_write(ttyvp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
 	vn_lock(ttyvp, LK_EXCLUSIVE | LK_RETRY, p);
 	error = VOP_WRITE(ttyvp, uio, flag, NOCRED);
 	VOP_UNLOCK(ttyvp, 0, p);
+	vn_finished_write(mp);
 	return (error);
 }
 
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index 0103877..a0b4072 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -538,7 +538,8 @@ unp_bind(unp, nam, p)
 	struct proc *p;
 {
 	struct sockaddr_un *soun = (struct sockaddr_un *)nam;
-	register struct vnode *vp;
+	struct vnode *vp;
+	struct mount *mp;
 	struct vattr vattr;
 	int error, namelen;
 	struct nameidata nd;
@@ -552,6 +553,7 @@ unp_bind(unp, nam, p)
 		return EINVAL;
 	strncpy(buf, soun->sun_path, namelen);
 	buf[namelen] = 0;	/* null-terminate the string */
+restart:
 	NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE,
 	    buf, p);
 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
@@ -559,14 +561,19 @@ unp_bind(unp, nam, p)
 	if (error)
 		return (error);
 	vp = nd.ni_vp;
-	if (vp != NULL) {
+	if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		if (nd.ni_dvp == vp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
-		vrele(vp);
-		return (EADDRINUSE);
+		if (vp != NULL) {
+			vrele(vp);
+			return (EADDRINUSE);
+		}
+		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
 	}
 	VATTR_NULL(&vattr);
 	vattr.va_type = VSOCK;
@@ -582,6 +589,7 @@ unp_bind(unp, nam, p)
 	unp->unp_vnode = vp;
 	unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1);
 	VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(mp);
 	return (0);
 }
 
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index dba2151..96fbd63 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -1165,6 +1165,8 @@ brelse(struct buf * bp)
 	BUF_UNLOCK(bp);
 	bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF);
 	bp->b_ioflags &= ~BIO_ORDERED;
+	if ((bp->b_flags & B_DELWRI) == 0 && (bp->b_xflags & BX_VNDIRTY))
+		panic("brelse: not dirty");
 	splx(s);
 }
 
@@ -1225,6 +1227,8 @@ bqrelse(struct buf * bp)
 	BUF_UNLOCK(bp);
 	bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF);
 	bp->b_ioflags &= ~BIO_ORDERED;
+	if ((bp->b_flags & B_DELWRI) == 0 && (bp->b_xflags & BX_VNDIRTY))
+		panic("bqrelse: not dirty");
 	splx(s);
 }
 
@@ -1420,7 +1424,7 @@ getnewbuf(int slpflag, int slptimeo, int size, int maxsize)
 	int isspecial;
 	static int flushingbufs;
 
-	if (curproc && (curproc->p_flag & P_BUFEXHAUST) == 0)
+	if (curproc && (curproc->p_flag & (P_COWINPROGRESS|P_BUFEXHAUST)) == 0)
 		isspecial = 0;
 	else
 		isspecial = 1;
diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c
index f478aa2..00f9beb 100644
--- a/sys/kern/vfs_default.c
+++ b/sys/kern/vfs_default.c
@@ -500,6 +500,21 @@ vop_noislocked(ap)
 	return (lockstatus(vp->v_vnlock, ap->a_p));
 }
 
+/*
+ * Return our mount point, as we will take charge of the writes.
+ */
+int
+vop_stdgetwritemount(ap)
+	struct vop_getwritemount_args /* {
+		struct vnode *a_vp;
+		struct mount **a_mpp;
+	} */ *ap;
+{
+
+	*(ap->a_mpp) = ap->a_vp->v_mount;
+	return (0);
+}
+
 /* 
  * vfs default ops
  * used to fill the vfs fucntion table to get reasonable default return values.
diff --git a/sys/kern/vfs_export.c b/sys/kern/vfs_export.c
index 6483660..0e5ec3f 100644
--- a/sys/kern/vfs_export.c
+++ b/sys/kern/vfs_export.c
@@ -453,6 +453,7 @@ getnewvnode(tag, mp, vops, vpp)
 	int s, count;
 	struct proc *p = curproc;	/* XXX */
 	struct vnode *vp = NULL;
+	struct mount *vnmp;
 	vm_object_t object;
 
 	/*
@@ -491,7 +492,14 @@ getnewvnode(tag, mp, vops, vpp)
 			vp = NULL;
 			continue;
 		}
-		break;
+		/*
+		 * Skip over it if its filesystem is being suspended.
+		 */
+		if (vn_start_write(vp, &vnmp, V_NOWAIT) == 0)
+			break;
+		simple_unlock(&vp->v_interlock);
+		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
+		vp = NULL;
 	}
 	if (vp) {
 		vp->v_flag |= VDOOMED;
@@ -504,6 +512,7 @@ getnewvnode(tag, mp, vops, vpp)
 		} else {
 			simple_unlock(&vp->v_interlock);
 		}
+		vn_finished_write(vnmp);
 
 #ifdef INVARIANTS
 		{
@@ -515,6 +524,8 @@ getnewvnode(tag, mp, vops, vpp)
 			if (vp->v_numoutput)
 				panic("Clean vnode has pending I/O's");
 			splx(s);
+			if (vp->v_writecount != 0)
+				panic("Non-zero write count");
 		}
 #endif
 		vp->v_flag = 0;
@@ -523,7 +534,6 @@ getnewvnode(tag, mp, vops, vpp)
 		vp->v_cstart = 0;
 		vp->v_clen = 0;
 		vp->v_socket = 0;
-		vp->v_writecount = 0;	/* XXX */
 	} else {
 		simple_unlock(&vnode_free_list_slock);
 		vp = (struct vnode *) zalloc(vnode_zone);
@@ -946,6 +956,7 @@ sched_sync(void)
 {
 	struct synclist *slp;
 	struct vnode *vp;
+	struct mount *mp;
 	long starttime;
 	int s;
 	struct proc *p = updateproc;
@@ -970,10 +981,12 @@ sched_sync(void)
 		splx(s);
 
 		while ((vp = LIST_FIRST(slp)) != NULL) {
-			if (VOP_ISLOCKED(vp, NULL) == 0) {
+			if (VOP_ISLOCKED(vp, NULL) == 0 &&
+			    vn_start_write(vp, &mp, V_NOWAIT) == 0) {
 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 				(void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p);
 				VOP_UNLOCK(vp, 0, p);
+				vn_finished_write(mp);
 			}
 			s = splbio();
 			if (LIST_FIRST(slp) == vp) {
@@ -1386,6 +1399,7 @@ vrele(vp)
 	struct proc *p = curproc;	/* XXX */
 
 	KASSERT(vp != NULL, ("vrele: null vp"));
+	KASSERT(vp->v_writecount < vp->v_usecount, ("vrele: missed vn_close"));
 
 	simple_lock(&vp->v_interlock);
 
@@ -1427,6 +1441,7 @@ vput(vp)
 	struct proc *p = curproc;	/* XXX */
 
 	KASSERT(vp != NULL, ("vput: null vp"));
+	KASSERT(vp->v_writecount < vp->v_usecount, ("vput: missed vn_close"));
 
 	simple_lock(&vp->v_interlock);
 
@@ -1632,6 +1647,8 @@ vclean(vp, flags, p)
 	 * If the flush fails, just toss the buffers.
 	 */
 	if (flags & DOCLOSE) {
+		if (TAILQ_FIRST(&vp->v_dirtyblkhd) != NULL)
+			(void) vn_write_suspend_wait(vp, V_WAIT);
 		if (vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0) != 0)
 			vinvalbuf(vp, 0, NOCRED, p, 0, 0);
 	}
@@ -2785,12 +2802,18 @@ sync_fsync(ap)
 		simple_unlock(&mountlist_slock);
 		return (0);
 	}
+	if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) {
+		vfs_unbusy(mp, p);
+		simple_unlock(&mountlist_slock);
+		return (0);
+	}
 	asyncflag = mp->mnt_flag & MNT_ASYNC;
 	mp->mnt_flag &= ~MNT_ASYNC;
 	vfs_msync(mp, MNT_NOWAIT);
 	VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p);
 	if (asyncflag)
 		mp->mnt_flag |= MNT_ASYNC;
+	vn_finished_write(mp);
 	vfs_unbusy(mp, p);
 	return (0);
 }
diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c
index 65a297ca..404114a 100644
--- a/sys/kern/vfs_extattr.c
+++ b/sys/kern/vfs_extattr.c
@@ -164,8 +164,8 @@ mount(p, uap)
 			vput(vp);
 			return (EOPNOTSUPP);	/* Needs translation */
 		}
-		mp->mnt_flag |=
-		    SCARG(uap, flags) & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
+		mp->mnt_flag |= SCARG(uap, flags) &
+		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
 		/*
 		 * Only root, or the user that did the original mount is
 		 * permitted to update it.
@@ -303,7 +303,8 @@ update:
 		vrele(vp);
 		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
 			mp->mnt_flag &= ~MNT_RDONLY;
-		mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
+		mp->mnt_flag &=~
+		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
 		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
 		if (error) {
 			mp->mnt_flag = flag;
@@ -458,7 +459,7 @@ unmount(p, uap)
  */
 int
 dounmount(mp, flags, p)
-	register struct mount *mp;
+	struct mount *mp;
 	int flags;
 	struct proc *p;
 {
@@ -469,6 +470,7 @@ dounmount(mp, flags, p)
 	simple_lock(&mountlist_slock);
 	mp->mnt_kern_flag |= MNTK_UNMOUNT;
 	lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock, p);
+	vn_start_write(NULL, &mp, V_WAIT);
 
 	if (mp->mnt_flag & MNT_EXPUBLIC)
 		vfs_setpublicfs(NULL, NULL, NULL);
@@ -481,8 +483,10 @@ dounmount(mp, flags, p)
 		vrele(mp->mnt_syncer);
 	if (((mp->mnt_flag & MNT_RDONLY) ||
 	     (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) ||
-	    (flags & MNT_FORCE))
+	    (flags & MNT_FORCE)) {
 		error = VFS_UNMOUNT(mp, flags, p);
+	}
+	vn_finished_write(mp);
 	simple_lock(&mountlist_slock);
 	if (error) {
 		if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
@@ -530,7 +534,7 @@ sync(p, uap)
 	struct proc *p;
 	struct sync_args *uap;
 {
-	register struct mount *mp, *nmp;
+	struct mount *mp, *nmp;
 	int asyncflag;
 
 	simple_lock(&mountlist_slock);
@@ -539,13 +543,15 @@ sync(p, uap)
 			nmp = TAILQ_NEXT(mp, mnt_list);
 			continue;
 		}
-		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
+		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
+		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
 			asyncflag = mp->mnt_flag & MNT_ASYNC;
 			mp->mnt_flag &= ~MNT_ASYNC;
 			vfs_msync(mp, MNT_NOWAIT);
 			VFS_SYNC(mp, MNT_NOWAIT,
-				((p != NULL) ? p->p_ucred : NOCRED), p);
+			    ((p != NULL) ? p->p_ucred : NOCRED), p);
 			mp->mnt_flag |= asyncflag;
+			vn_finished_write(mp);
 		}
 		simple_lock(&mountlist_slock);
 		nmp = TAILQ_NEXT(mp, mnt_list);
@@ -593,7 +599,7 @@ quotactl(p, uap)
 		syscallarg(caddr_t) arg;
 	} */ *uap;
 {
-	register struct mount *mp;
+	struct mount *mp;
 	int error;
 	struct nameidata nd;
 
@@ -602,11 +608,15 @@ quotactl(p, uap)
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if ((error = namei(&nd)) != 0)
 		return (error);
-	mp = nd.ni_vp->v_mount;
 	NDFREE(&nd, NDF_ONLY_PNBUF);
+	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
 	vrele(nd.ni_vp);
-	return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
-	    SCARG(uap, arg), p));
+	if (error)
+		return (error);
+	error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
+	    SCARG(uap, arg), p);
+	vn_finished_write(mp);
+	return (error);
 }
 
 /*
@@ -972,6 +982,7 @@ open(p, uap)
 	struct file *fp;
 	struct vnode *vp;
 	struct vattr vat;
+	struct mount *mp;
 	int cmode, flags, oflags;
 	struct file *nfp;
 	int type, indx, error;
@@ -1029,12 +1040,15 @@ open(p, uap)
 		fp->f_flag |= FHASLOCK;
 	}
 	if (flags & O_TRUNC) {
+		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+			goto bad;
 		VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 		VATTR_NULL(&vat);
 		vat.va_size = 0;
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 		error = VOP_SETATTR(vp, &vat, p->p_ucred, p);
 		VOP_UNLOCK(vp, 0, p);
+		vn_finished_write(mp);
 		if (error)
 			goto bad;
 	}
@@ -1101,7 +1115,8 @@ mknod(p, uap)
 		syscallarg(int) dev;
 	} */ *uap;
 {
-	register struct vnode *vp;
+	struct vnode *vp;
+	struct mount *mp;
 	struct vattr vattr;
 	int error;
 	int whiteout = 0;
@@ -1118,14 +1133,16 @@ mknod(p, uap)
 	}
 	if (error)
 		return (error);
+restart:
 	bwillwrite();
 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
-	if (vp != NULL)
+	if (vp != NULL) {
+		vrele(vp);
 		error = EEXIST;
-	else {
+	} else {
 		VATTR_NULL(&vattr);
 		vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
 		vattr.va_rdev = SCARG(uap, dev);
@@ -1149,6 +1166,13 @@ mknod(p, uap)
 			break;
 		}
 	}
+	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vput(nd.ni_dvp);
+		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
+	}
 	if (!error) {
 		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 		if (whiteout)
@@ -1159,17 +1183,10 @@ mknod(p, uap)
 			if (error == 0)
 				vput(nd.ni_vp);
 		}
-		NDFREE(&nd, NDF_ONLY_PNBUF);
-		vput(nd.ni_dvp);
-	} else {
-		NDFREE(&nd, NDF_ONLY_PNBUF);
-		if (nd.ni_dvp == vp)
-			vrele(nd.ni_dvp);
-		else
-			vput(nd.ni_dvp);
-		if (vp)
-			vrele(vp);
 	}
+	NDFREE(&nd, NDF_ONLY_PNBUF);
+	vput(nd.ni_dvp);
+	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
 	return (error);
@@ -1193,23 +1210,29 @@ mkfifo(p, uap)
 		syscallarg(int) mode;
 	} */ *uap;
 {
+	struct mount *mp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
+restart:
 	bwillwrite();
 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	if (nd.ni_vp != NULL) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
-		if (nd.ni_dvp == nd.ni_vp)
-			vrele(nd.ni_dvp);
-		else
-			vput(nd.ni_dvp);
 		vrele(nd.ni_vp);
+		vput(nd.ni_dvp);
 		return (EEXIST);
 	}
+	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vput(nd.ni_dvp);
+		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
+	}
 	VATTR_NULL(&vattr);
 	vattr.va_type = VFIFO;
 	vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
@@ -1219,6 +1242,7 @@ mkfifo(p, uap)
 		vput(nd.ni_vp);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_dvp);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -1240,7 +1264,8 @@ link(p, uap)
 		syscallarg(char *) link;
 	} */ *uap;
 {
-	register struct vnode *vp;
+	struct vnode *vp;
+	struct mount *mp;
 	struct nameidata nd;
 	int error;
 
@@ -1250,30 +1275,29 @@ link(p, uap)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vp = nd.ni_vp;
-	if (vp->v_type == VDIR)
-		error = EPERM;		/* POSIX */
-	else {
-		NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
-		error = namei(&nd);
-		if (!error) {
-			if (nd.ni_vp != NULL) {
-				if (nd.ni_vp)
-					vrele(nd.ni_vp);
-				error = EEXIST;
-			} else {
-				VOP_LEASE(nd.ni_dvp, p, p->p_ucred,
-				    LEASE_WRITE);
-				VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
-				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
-			}
-			NDFREE(&nd, NDF_ONLY_PNBUF);
-			if (nd.ni_dvp == nd.ni_vp)
-				vrele(nd.ni_dvp);
-			else
-				vput(nd.ni_dvp);
+	if (vp->v_type == VDIR) {
+		vrele(vp);
+		return (EPERM);		/* POSIX */
+	}
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
+		vrele(vp);
+		return (error);
+	}
+	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
+	if ((error = namei(&nd)) == 0) {
+		if (nd.ni_vp != NULL) {
+			vrele(nd.ni_vp);
+			error = EEXIST;
+		} else {
+			VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+			VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
+			error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 		}
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vput(nd.ni_dvp);
 	}
 	vrele(vp);
+	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
 	return (error);
@@ -1297,6 +1321,7 @@ symlink(p, uap)
 		syscallarg(char *) link;
 	} */ *uap;
 {
+	struct mount *mp;
 	struct vattr vattr;
 	char *path;
 	int error;
@@ -1305,20 +1330,25 @@ symlink(p, uap)
 	path = zalloc(namei_zone);
 	if ((error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) != 0)
 		goto out;
+restart:
 	bwillwrite();
 	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
 	if ((error = namei(&nd)) != 0)
 		goto out;
 	if (nd.ni_vp) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
-		if (nd.ni_dvp == nd.ni_vp)
-			vrele(nd.ni_dvp);
-		else
-			vput(nd.ni_dvp);
 		vrele(nd.ni_vp);
+		vput(nd.ni_dvp);
 		error = EEXIST;
 		goto out;
 	}
+	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vput(nd.ni_dvp);
+		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
+	}
 	VATTR_NULL(&vattr);
 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
@@ -1327,6 +1357,7 @@ symlink(p, uap)
 	if (error == 0)
 		vput(nd.ni_vp);
 	vput(nd.ni_dvp);
+	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
 out:
@@ -1346,8 +1377,10 @@ undelete(p, uap)
 	} */ *uap;
 {
 	int error;
+	struct mount *mp;
 	struct nameidata nd;
 
+restart:
 	bwillwrite();
 	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
 	    SCARG(uap, path), p);
@@ -1357,19 +1390,23 @@ undelete(p, uap)
 
 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
-		if (nd.ni_dvp == nd.ni_vp)
-			vrele(nd.ni_dvp);
-		else
-			vput(nd.ni_dvp);
 		if (nd.ni_vp)
 			vrele(nd.ni_vp);
+		vput(nd.ni_dvp);
 		return (EEXIST);
 	}
-
+	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vput(nd.ni_dvp);
+		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
+	}
 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_dvp);
+	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
 	return (error);
@@ -1391,18 +1428,17 @@ unlink(p, uap)
 		syscallarg(char *) path;
 	} */ *uap;
 {
-	register struct vnode *vp;
+	struct mount *mp;
+	struct vnode *vp;
 	int error;
 	struct nameidata nd;
 
+restart:
 	bwillwrite();
 	NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
-	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
-	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
-
 	if (vp->v_type == VDIR)
 		error = EPERM;		/* POSIX */
 	else {
@@ -1414,18 +1450,24 @@ unlink(p, uap)
 		if (vp->v_flag & VROOT)
 			error = EBUSY;
 	}
-
+	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vrele(vp);
+		vput(nd.ni_dvp);
+		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
+	}
+	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	if (!error) {
 		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
 	}
 	NDFREE(&nd, NDF_ONLY_PNBUF);
-	if (nd.ni_dvp == vp)
-		vrele(nd.ni_dvp);
-	else
-		vput(nd.ni_dvp);
-	if (vp != NULLVP)
-		vput(vp);
+	vput(nd.ni_dvp);
+	vput(vp);
+	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
 	return (error);
@@ -1936,6 +1978,7 @@ setfflags(p, vp, flags)
 	int flags;
 {
 	int error;
+	struct mount *mp;
 	struct vattr vattr;
 
 	/*
@@ -1948,12 +1991,15 @@ setfflags(p, vp, flags)
 	    ((error = suser_xxx(p->p_ucred, p, PRISON_ROOT)) != 0))
 		return (error);
 
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	VATTR_NULL(&vattr);
 	vattr.va_flags = flags;
 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -2020,14 +2066,18 @@ setfmode(p, vp, mode)
 	int mode;
 {
 	int error;
+	struct mount *mp;
 	struct vattr vattr;
 
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	VATTR_NULL(&vattr);
 	vattr.va_mode = mode & ALLPERMS;
 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(mp);
 	return error;
 }
 
@@ -2125,8 +2175,11 @@ setfown(p, vp, uid, gid)
 	gid_t gid;
 {
 	int error;
+	struct mount *mp;
 	struct vattr vattr;
 
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	VATTR_NULL(&vattr);
@@ -2134,6 +2187,7 @@ setfown(p, vp, uid, gid)
 	vattr.va_gid = gid;
 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(mp);
 	return error;
 }
 
@@ -2259,8 +2313,11 @@ setutimes(p, vp, ts, nullflag)
 	int nullflag;
 {
 	int error;
+	struct mount *mp;
 	struct vattr vattr;
 
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	VATTR_NULL(&vattr);
@@ -2270,6 +2327,7 @@ setutimes(p, vp, ts, nullflag)
 		vattr.va_vaflags |= VA_UTIMES_NULL;
 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(mp);
 	return error;
 }
 
@@ -2394,7 +2452,8 @@ truncate(p, uap)
 		syscallarg(off_t) length;
 	} */ *uap;
 {
-	register struct vnode *vp;
+	struct mount *mp;
+	struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
@@ -2405,6 +2464,10 @@ truncate(p, uap)
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
+		vrele(vp);
+		return (error);
+	}
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
@@ -2417,6 +2480,7 @@ truncate(p, uap)
 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	}
 	vput(vp);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -2440,6 +2504,7 @@ ftruncate(p, uap)
 		syscallarg(off_t) length;
 	} */ *uap;
 {
+	struct mount *mp;
 	struct vattr vattr;
 	struct vnode *vp;
 	struct file *fp;
@@ -2452,6 +2517,8 @@ ftruncate(p, uap)
 	if ((fp->f_flag & FWRITE) == 0)
 		return (EINVAL);
 	vp = (struct vnode *)fp->f_data;
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	if (vp->v_type == VDIR)
@@ -2462,6 +2529,7 @@ ftruncate(p, uap)
 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
 	}
 	VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -2541,13 +2609,16 @@ fsync(p, uap)
 		syscallarg(int) fd;
 	} */ *uap;
 {
-	register struct vnode *vp;
+	struct vnode *vp;
+	struct mount *mp;
 	struct file *fp;
 	int error;
 
 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 		return (error);
 	vp = (struct vnode *)fp->f_data;
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	if (vp->v_object)
 		vm_object_page_clean(vp->v_object, 0, 0, 0);
@@ -2558,6 +2629,7 @@ fsync(p, uap)
 #endif
 
 	VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -2580,7 +2652,8 @@ rename(p, uap)
 		syscallarg(char *) to;
 	} */ *uap;
 {
-	register struct vnode *tvp, *fvp, *tdvp;
+	struct mount *mp;
+	struct vnode *tvp, *fvp, *tdvp;
 	struct nameidata fromnd, tond;
 	int error;
 
@@ -2590,6 +2663,12 @@ rename(p, uap)
 	if ((error = namei(&fromnd)) != 0)
 		return (error);
 	fvp = fromnd.ni_vp;
+	if ((error = vn_start_write(fvp, &mp, V_WAIT | PCATCH)) != 0) {
+		NDFREE(&fromnd, NDF_ONLY_PNBUF);
+		vrele(fromnd.ni_dvp);
+		vrele(fvp);
+		goto out1;
+	}
 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ,
 	    UIO_USERSPACE, SCARG(uap, to), p);
 	if (fromnd.ni_vp->v_type == VDIR)
@@ -2652,6 +2731,7 @@ out:
 		vrele(fvp);
 	}
 	vrele(tond.ni_startdir);
+	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
 	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
 	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
@@ -2682,11 +2762,13 @@ mkdir(p, uap)
 		syscallarg(int) mode;
 	} */ *uap;
 {
-	register struct vnode *vp;
+	struct mount *mp;
+	struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
+restart:
 	bwillwrite();
 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
 	nd.ni_cnd.cn_flags |= WILLBEDIR;
@@ -2695,13 +2777,17 @@ mkdir(p, uap)
 	vp = nd.ni_vp;
 	if (vp != NULL) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
-		if (nd.ni_dvp == vp)
-			vrele(nd.ni_dvp);
-		else
-			vput(nd.ni_dvp);
 		vrele(vp);
+		vput(nd.ni_dvp);
 		return (EEXIST);
 	}
+	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vput(nd.ni_dvp);
+		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
+	}
 	VATTR_NULL(&vattr);
 	vattr.va_type = VDIR;
 	vattr.va_mode = (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_fd->fd_cmask;
@@ -2711,6 +2797,7 @@ mkdir(p, uap)
 	vput(nd.ni_dvp);
 	if (!error)
 		vput(nd.ni_vp);
+	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
 	return (error);
@@ -2732,10 +2819,12 @@ rmdir(p, uap)
 		syscallarg(char *) path;
 	} */ *uap;
 {
-	register struct vnode *vp;
+	struct mount *mp;
+	struct vnode *vp;
 	int error;
 	struct nameidata nd;
 
+restart:
 	bwillwrite();
 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
 	    SCARG(uap, path), p);
@@ -2756,21 +2845,32 @@ rmdir(p, uap)
 	/*
 	 * The root of a mounted filesystem cannot be deleted.
 	 */
-	if (vp->v_flag & VROOT)
+	if (vp->v_flag & VROOT) {
 		error = EBUSY;
-	else {
-		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
-		VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
-		error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+		goto out;
 	}
+	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		if (nd.ni_dvp == vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		vput(vp);
+		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
+	}
+	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
+	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+	vn_finished_write(mp);
 out:
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	if (nd.ni_dvp == vp)
 		vrele(nd.ni_dvp);
 	else
 		vput(nd.ni_dvp);
-	if (vp != NULLVP)
-		vput(vp);
+	vput(vp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
 	return (error);
@@ -3049,7 +3149,8 @@ revoke(p, uap)
 		syscallarg(char *) path;
 	} */ *uap;
 {
-	register struct vnode *vp;
+	struct mount *mp;
+	struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
@@ -3068,8 +3169,11 @@ revoke(p, uap)
 	if (p->p_ucred->cr_uid != vattr.va_uid &&
 	    (error = suser_xxx(0, p, PRISON_ROOT)))
 		goto out;
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		goto out;
 	if (vcount(vp) > 1)
 		VOP_REVOKE(vp, REVOKEALL);
+	vn_finished_write(mp);
 out:
 	vrele(vp);
 	return (error);
@@ -3228,11 +3332,16 @@ fhopen(p, uap)
 	}
 	if (fmode & O_TRUNC) {
 		VOP_UNLOCK(vp, 0, p);				/* XXX */
+		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
+			vrele(vp);
+			return (error);
+		}
 		VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);	/* XXX */
 		VATTR_NULL(vap);
 		vap->va_size = 0;
 		error = VOP_SETATTR(vp, vap, p->p_ucred, p);
+		vn_finished_write(mp);
 		if (error)
 			goto bad;
 	}
@@ -3407,10 +3516,15 @@ extattrctl(p, uap)
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if ((error = namei(&nd)) != 0)
 		return (error);
-	mp = nd.ni_vp->v_mount;
+	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
 	NDFREE(&nd, 0);
-	return (VFS_EXTATTRCTL(mp, SCARG(uap, cmd), SCARG(uap, attrname),
-	    SCARG(uap, arg), p));
+	vrele(nd.ni_vp);
+	if (error)
+		return (error);
+	error = VFS_EXTATTRCTL(mp, SCARG(uap, cmd), SCARG(uap, attrname),
+	    SCARG(uap, arg), p);
+	vn_finished_write(mp);
+	return (error);
 }
 
 /*
@@ -3425,6 +3539,7 @@ extattr_set_file(p, uap)
 	struct extattr_set_file_args *uap;
 {
 	struct nameidata nd;
+	struct mount *mp;
 	struct uio auio;
 	struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV];
 	char attrname[EXTATTR_MAXNAMELEN];
@@ -3434,10 +3549,11 @@ extattr_set_file(p, uap)
 	error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN);
 	if (error)
 		return (error);
-	NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW, UIO_USERSPACE, SCARG(uap, path),
-	    p);
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if ((error = namei(&nd)) != 0)
 		return(error);
+	if ((error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH)) != 0)
+		goto done;
 	iovlen = uap->iovcnt * sizeof(struct iovec);
 	if (uap->iovcnt > UIO_SMALLIOV) {
 		if (uap->iovcnt > UIO_MAXIOV) {
@@ -3477,6 +3593,8 @@ done:
 	if (needfree)
 		FREE(needfree, M_IOV);
 	NDFREE(&nd, 0);
+	vrele(nd.ni_vp);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -3508,6 +3626,7 @@ extattr_get_file(p, uap)
 	if (uap->iovcnt > UIO_SMALLIOV) {
 		if (uap->iovcnt > UIO_MAXIOV) {
 			NDFREE(&nd, 0);
+			vrele(nd.ni_vp);
 			return (EINVAL);
 		}
 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
@@ -3545,6 +3664,7 @@ done:
 	if (needfree)
 		FREE(needfree, M_IOV);
 	NDFREE(&nd, 0);
+	vrele(nd.ni_vp);
 	return(error);
 }
 
@@ -3557,6 +3677,7 @@ extattr_delete_file(p, uap)
 	struct proc *p;
 	struct extattr_delete_file_args *uap;
 {
+	struct mount *mp;
 	struct nameidata nd;
 	char attrname[EXTATTR_MAXNAMELEN];
 	int	error;
@@ -3564,12 +3685,17 @@ extattr_delete_file(p, uap)
 	error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN);
 	if (error)
 		return(error);
-	NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW, UIO_USERSPACE, SCARG(uap, path),
-	    p);
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if ((error = namei(&nd)) != 0)
 		return(error);
+	if ((error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH)) != 0) {
+		vrele(nd.ni_vp);
+		return (error);
+	}
 	error = VOP_SETEXTATTR(nd.ni_vp, attrname, NULL, p->p_cred->pc_ucred,
 	    p);
 	NDFREE(&nd, 0);
+	vrele(nd.ni_vp);
+	vn_finished_write(mp);
 	return(error);
 }
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 6483660..0e5ec3f 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -453,6 +453,7 @@ getnewvnode(tag, mp, vops, vpp)
 	int s, count;
 	struct proc *p = curproc;	/* XXX */
 	struct vnode *vp = NULL;
+	struct mount *vnmp;
 	vm_object_t object;
 
 	/*
@@ -491,7 +492,14 @@ getnewvnode(tag, mp, vops, vpp)
 			vp = NULL;
 			continue;
 		}
-		break;
+		/*
+		 * Skip over it if its filesystem is being suspended.
+		 */
+		if (vn_start_write(vp, &vnmp, V_NOWAIT) == 0)
+			break;
+		simple_unlock(&vp->v_interlock);
+		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
+		vp = NULL;
 	}
 	if (vp) {
 		vp->v_flag |= VDOOMED;
@@ -504,6 +512,7 @@ getnewvnode(tag, mp, vops, vpp)
 		} else {
 			simple_unlock(&vp->v_interlock);
 		}
+		vn_finished_write(vnmp);
 
 #ifdef INVARIANTS
 		{
@@ -515,6 +524,8 @@ getnewvnode(tag, mp, vops, vpp)
 			if (vp->v_numoutput)
 				panic("Clean vnode has pending I/O's");
 			splx(s);
+			if (vp->v_writecount != 0)
+				panic("Non-zero write count");
 		}
 #endif
 		vp->v_flag = 0;
@@ -523,7 +534,6 @@ getnewvnode(tag, mp, vops, vpp)
 		vp->v_cstart = 0;
 		vp->v_clen = 0;
 		vp->v_socket = 0;
-		vp->v_writecount = 0;	/* XXX */
 	} else {
 		simple_unlock(&vnode_free_list_slock);
 		vp = (struct vnode *) zalloc(vnode_zone);
@@ -946,6 +956,7 @@ sched_sync(void)
 {
 	struct synclist *slp;
 	struct vnode *vp;
+	struct mount *mp;
 	long starttime;
 	int s;
 	struct proc *p = updateproc;
@@ -970,10 +981,12 @@ sched_sync(void)
 		splx(s);
 
 		while ((vp = LIST_FIRST(slp)) != NULL) {
-			if (VOP_ISLOCKED(vp, NULL) == 0) {
+			if (VOP_ISLOCKED(vp, NULL) == 0 &&
+			    vn_start_write(vp, &mp, V_NOWAIT) == 0) {
 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 				(void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p);
 				VOP_UNLOCK(vp, 0, p);
+				vn_finished_write(mp);
 			}
 			s = splbio();
 			if (LIST_FIRST(slp) == vp) {
@@ -1386,6 +1399,7 @@ vrele(vp)
 	struct proc *p = curproc;	/* XXX */
 
 	KASSERT(vp != NULL, ("vrele: null vp"));
+	KASSERT(vp->v_writecount < vp->v_usecount, ("vrele: missed vn_close"));
 
 	simple_lock(&vp->v_interlock);
 
@@ -1427,6 +1441,7 @@ vput(vp)
 	struct proc *p = curproc;	/* XXX */
 
 	KASSERT(vp != NULL, ("vput: null vp"));
+	KASSERT(vp->v_writecount < vp->v_usecount, ("vput: missed vn_close"));
 
 	simple_lock(&vp->v_interlock);
 
@@ -1632,6 +1647,8 @@ vclean(vp, flags, p)
 	 * If the flush fails, just toss the buffers.
 	 */
 	if (flags & DOCLOSE) {
+		if (TAILQ_FIRST(&vp->v_dirtyblkhd) != NULL)
+			(void) vn_write_suspend_wait(vp, V_WAIT);
 		if (vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0) != 0)
 			vinvalbuf(vp, 0, NOCRED, p, 0, 0);
 	}
@@ -2785,12 +2802,18 @@ sync_fsync(ap)
 		simple_unlock(&mountlist_slock);
 		return (0);
 	}
+	if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) {
+		vfs_unbusy(mp, p);
+		simple_unlock(&mountlist_slock);
+		return (0);
+	}
 	asyncflag = mp->mnt_flag & MNT_ASYNC;
 	mp->mnt_flag &= ~MNT_ASYNC;
 	vfs_msync(mp, MNT_NOWAIT);
 	VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p);
 	if (asyncflag)
 		mp->mnt_flag |= MNT_ASYNC;
+	vn_finished_write(mp);
 	vfs_unbusy(mp, p);
 	return (0);
 }
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 65a297ca..404114a 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -164,8 +164,8 @@ mount(p, uap)
 			vput(vp);
 			return (EOPNOTSUPP);	/* Needs translation */
 		}
-		mp->mnt_flag |=
-		    SCARG(uap, flags) & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
+		mp->mnt_flag |= SCARG(uap, flags) &
+		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
 		/*
 		 * Only root, or the user that did the original mount is
 		 * permitted to update it.
@@ -303,7 +303,8 @@ update:
 		vrele(vp);
 		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
 			mp->mnt_flag &= ~MNT_RDONLY;
-		mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
+		mp->mnt_flag &=~
+		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
 		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
 		if (error) {
 			mp->mnt_flag = flag;
@@ -458,7 +459,7 @@ unmount(p, uap)
  */
 int
 dounmount(mp, flags, p)
-	register struct mount *mp;
+	struct mount *mp;
 	int flags;
 	struct proc *p;
 {
@@ -469,6 +470,7 @@ dounmount(mp, flags, p)
 	simple_lock(&mountlist_slock);
 	mp->mnt_kern_flag |= MNTK_UNMOUNT;
 	lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock, p);
+	vn_start_write(NULL, &mp, V_WAIT);
 
 	if (mp->mnt_flag & MNT_EXPUBLIC)
 		vfs_setpublicfs(NULL, NULL, NULL);
@@ -481,8 +483,10 @@ dounmount(mp, flags, p)
 		vrele(mp->mnt_syncer);
 	if (((mp->mnt_flag & MNT_RDONLY) ||
 	     (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) ||
-	    (flags & MNT_FORCE))
+	    (flags & MNT_FORCE)) {
 		error = VFS_UNMOUNT(mp, flags, p);
+	}
+	vn_finished_write(mp);
 	simple_lock(&mountlist_slock);
 	if (error) {
 		if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
@@ -530,7 +534,7 @@ sync(p, uap)
 	struct proc *p;
 	struct sync_args *uap;
 {
-	register struct mount *mp, *nmp;
+	struct mount *mp, *nmp;
 	int asyncflag;
 
 	simple_lock(&mountlist_slock);
@@ -539,13 +543,15 @@ sync(p, uap)
 			nmp = TAILQ_NEXT(mp, mnt_list);
 			continue;
 		}
-		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
+		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
+		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
 			asyncflag = mp->mnt_flag & MNT_ASYNC;
 			mp->mnt_flag &= ~MNT_ASYNC;
 			vfs_msync(mp, MNT_NOWAIT);
 			VFS_SYNC(mp, MNT_NOWAIT,
-				((p != NULL) ? p->p_ucred : NOCRED), p);
+			    ((p != NULL) ? p->p_ucred : NOCRED), p);
 			mp->mnt_flag |= asyncflag;
+			vn_finished_write(mp);
 		}
 		simple_lock(&mountlist_slock);
 		nmp = TAILQ_NEXT(mp, mnt_list);
@@ -593,7 +599,7 @@ quotactl(p, uap)
 		syscallarg(caddr_t) arg;
 	} */ *uap;
 {
-	register struct mount *mp;
+	struct mount *mp;
 	int error;
 	struct nameidata nd;
 
@@ -602,11 +608,15 @@ quotactl(p, uap)
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if ((error = namei(&nd)) != 0)
 		return (error);
-	mp = nd.ni_vp->v_mount;
 	NDFREE(&nd, NDF_ONLY_PNBUF);
+	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
 	vrele(nd.ni_vp);
-	return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
-	    SCARG(uap, arg), p));
+	if (error)
+		return (error);
+	error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
+	    SCARG(uap, arg), p);
+	vn_finished_write(mp);
+	return (error);
 }
 
 /*
@@ -972,6 +982,7 @@ open(p, uap)
 	struct file *fp;
 	struct vnode *vp;
 	struct vattr vat;
+	struct mount *mp;
 	int cmode, flags, oflags;
 	struct file *nfp;
 	int type, indx, error;
@@ -1029,12 +1040,15 @@ open(p, uap)
 		fp->f_flag |= FHASLOCK;
 	}
 	if (flags & O_TRUNC) {
+		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+			goto bad;
 		VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 		VATTR_NULL(&vat);
 		vat.va_size = 0;
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 		error = VOP_SETATTR(vp, &vat, p->p_ucred, p);
 		VOP_UNLOCK(vp, 0, p);
+		vn_finished_write(mp);
 		if (error)
 			goto bad;
 	}
@@ -1101,7 +1115,8 @@ mknod(p, uap)
 		syscallarg(int) dev;
 	} */ *uap;
 {
-	register struct vnode *vp;
+	struct vnode *vp;
+	struct mount *mp;
 	struct vattr vattr;
 	int error;
 	int whiteout = 0;
@@ -1118,14 +1133,16 @@ mknod(p, uap)
 	}
 	if (error)
 		return (error);
+restart:
 	bwillwrite();
 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
-	if (vp != NULL)
+	if (vp != NULL) {
+		vrele(vp);
 		error = EEXIST;
-	else {
+	} else {
 		VATTR_NULL(&vattr);
 		vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
 		vattr.va_rdev = SCARG(uap, dev);
@@ -1149,6 +1166,13 @@ mknod(p, uap)
 			break;
 		}
 	}
+	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vput(nd.ni_dvp);
+		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
+	}
 	if (!error) {
 		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 		if (whiteout)
@@ -1159,17 +1183,10 @@ mknod(p, uap)
 			if (error == 0)
 				vput(nd.ni_vp);
 		}
-		NDFREE(&nd, NDF_ONLY_PNBUF);
-		vput(nd.ni_dvp);
-	} else {
-		NDFREE(&nd, NDF_ONLY_PNBUF);
-		if (nd.ni_dvp == vp)
-			vrele(nd.ni_dvp);
-		else
-			vput(nd.ni_dvp);
-		if (vp)
-			vrele(vp);
 	}
+	NDFREE(&nd, NDF_ONLY_PNBUF);
+	vput(nd.ni_dvp);
+	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
 	return (error);
@@ -1193,23 +1210,29 @@ mkfifo(p, uap)
 		syscallarg(int) mode;
 	} */ *uap;
 {
+	struct mount *mp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
+restart:
 	bwillwrite();
 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	if (nd.ni_vp != NULL) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
-		if (nd.ni_dvp == nd.ni_vp)
-			vrele(nd.ni_dvp);
-		else
-			vput(nd.ni_dvp);
 		vrele(nd.ni_vp);
+		vput(nd.ni_dvp);
 		return (EEXIST);
 	}
+	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vput(nd.ni_dvp);
+		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
+	}
 	VATTR_NULL(&vattr);
 	vattr.va_type = VFIFO;
 	vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
@@ -1219,6 +1242,7 @@ mkfifo(p, uap)
 		vput(nd.ni_vp);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_dvp);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -1240,7 +1264,8 @@ link(p, uap)
 		syscallarg(char *) link;
 	} */ *uap;
 {
-	register struct vnode *vp;
+	struct vnode *vp;
+	struct mount *mp;
 	struct nameidata nd;
 	int error;
 
@@ -1250,30 +1275,29 @@ link(p, uap)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vp = nd.ni_vp;
-	if (vp->v_type == VDIR)
-		error = EPERM;		/* POSIX */
-	else {
-		NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
-		error = namei(&nd);
-		if (!error) {
-			if (nd.ni_vp != NULL) {
-				if (nd.ni_vp)
-					vrele(nd.ni_vp);
-				error = EEXIST;
-			} else {
-				VOP_LEASE(nd.ni_dvp, p, p->p_ucred,
-				    LEASE_WRITE);
-				VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
-				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
-			}
-			NDFREE(&nd, NDF_ONLY_PNBUF);
-			if (nd.ni_dvp == nd.ni_vp)
-				vrele(nd.ni_dvp);
-			else
-				vput(nd.ni_dvp);
+	if (vp->v_type == VDIR) {
+		vrele(vp);
+		return (EPERM);		/* POSIX */
+	}
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
+		vrele(vp);
+		return (error);
+	}
+	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
+	if ((error = namei(&nd)) == 0) {
+		if (nd.ni_vp != NULL) {
+			vrele(nd.ni_vp);
+			error = EEXIST;
+		} else {
+			VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+			VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
+			error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 		}
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vput(nd.ni_dvp);
 	}
 	vrele(vp);
+	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
 	return (error);
@@ -1297,6 +1321,7 @@ symlink(p, uap)
 		syscallarg(char *) link;
 	} */ *uap;
 {
+	struct mount *mp;
 	struct vattr vattr;
 	char *path;
 	int error;
@@ -1305,20 +1330,25 @@ symlink(p, uap)
 	path = zalloc(namei_zone);
 	if ((error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) != 0)
 		goto out;
+restart:
 	bwillwrite();
 	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
 	if ((error = namei(&nd)) != 0)
 		goto out;
 	if (nd.ni_vp) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
-		if (nd.ni_dvp == nd.ni_vp)
-			vrele(nd.ni_dvp);
-		else
-			vput(nd.ni_dvp);
 		vrele(nd.ni_vp);
+		vput(nd.ni_dvp);
 		error = EEXIST;
 		goto out;
 	}
+	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vput(nd.ni_dvp);
+		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
+	}
 	VATTR_NULL(&vattr);
 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
@@ -1327,6 +1357,7 @@ symlink(p, uap)
 	if (error == 0)
 		vput(nd.ni_vp);
 	vput(nd.ni_dvp);
+	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
 out:
@@ -1346,8 +1377,10 @@ undelete(p, uap)
 	} */ *uap;
 {
 	int error;
+	struct mount *mp;
 	struct nameidata nd;
 
+restart:
 	bwillwrite();
 	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
 	    SCARG(uap, path), p);
@@ -1357,19 +1390,23 @@ undelete(p, uap)
 
 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
-		if (nd.ni_dvp == nd.ni_vp)
-			vrele(nd.ni_dvp);
-		else
-			vput(nd.ni_dvp);
 		if (nd.ni_vp)
 			vrele(nd.ni_vp);
+		vput(nd.ni_dvp);
 		return (EEXIST);
 	}
-
+	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vput(nd.ni_dvp);
+		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
+	}
 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_dvp);
+	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
 	return (error);
@@ -1391,18 +1428,17 @@ unlink(p, uap)
 		syscallarg(char *) path;
 	} */ *uap;
 {
-	register struct vnode *vp;
+	struct mount *mp;
+	struct vnode *vp;
 	int error;
 	struct nameidata nd;
 
+restart:
 	bwillwrite();
 	NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
-	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
-	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
-
 	if (vp->v_type == VDIR)
 		error = EPERM;		/* POSIX */
 	else {
@@ -1414,18 +1450,24 @@ unlink(p, uap)
 		if (vp->v_flag & VROOT)
 			error = EBUSY;
 	}
-
+	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vrele(vp);
+		vput(nd.ni_dvp);
+		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
+	}
+	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	if (!error) {
 		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
 	}
 	NDFREE(&nd, NDF_ONLY_PNBUF);
-	if (nd.ni_dvp == vp)
-		vrele(nd.ni_dvp);
-	else
-		vput(nd.ni_dvp);
-	if (vp != NULLVP)
-		vput(vp);
+	vput(nd.ni_dvp);
+	vput(vp);
+	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
 	return (error);
@@ -1936,6 +1978,7 @@ setfflags(p, vp, flags)
 	int flags;
 {
 	int error;
+	struct mount *mp;
 	struct vattr vattr;
 
 	/*
@@ -1948,12 +1991,15 @@ setfflags(p, vp, flags)
 	    ((error = suser_xxx(p->p_ucred, p, PRISON_ROOT)) != 0))
 		return (error);
 
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	VATTR_NULL(&vattr);
 	vattr.va_flags = flags;
 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -2020,14 +2066,18 @@ setfmode(p, vp, mode)
 	int mode;
 {
 	int error;
+	struct mount *mp;
 	struct vattr vattr;
 
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	VATTR_NULL(&vattr);
 	vattr.va_mode = mode & ALLPERMS;
 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(mp);
 	return error;
 }
 
@@ -2125,8 +2175,11 @@ setfown(p, vp, uid, gid)
 	gid_t gid;
 {
 	int error;
+	struct mount *mp;
 	struct vattr vattr;
 
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	VATTR_NULL(&vattr);
@@ -2134,6 +2187,7 @@ setfown(p, vp, uid, gid)
 	vattr.va_gid = gid;
 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(mp);
 	return error;
 }
 
@@ -2259,8 +2313,11 @@ setutimes(p, vp, ts, nullflag)
 	int nullflag;
 {
 	int error;
+	struct mount *mp;
 	struct vattr vattr;
 
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	VATTR_NULL(&vattr);
@@ -2270,6 +2327,7 @@ setutimes(p, vp, ts, nullflag)
 		vattr.va_vaflags |= VA_UTIMES_NULL;
 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(mp);
 	return error;
 }
 
@@ -2394,7 +2452,8 @@ truncate(p, uap)
 		syscallarg(off_t) length;
 	} */ *uap;
 {
-	register struct vnode *vp;
+	struct mount *mp;
+	struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
@@ -2405,6 +2464,10 @@ truncate(p, uap)
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
+		vrele(vp);
+		return (error);
+	}
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
@@ -2417,6 +2480,7 @@ truncate(p, uap)
 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	}
 	vput(vp);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -2440,6 +2504,7 @@ ftruncate(p, uap)
 		syscallarg(off_t) length;
 	} */ *uap;
 {
+	struct mount *mp;
 	struct vattr vattr;
 	struct vnode *vp;
 	struct file *fp;
@@ -2452,6 +2517,8 @@ ftruncate(p, uap)
 	if ((fp->f_flag & FWRITE) == 0)
 		return (EINVAL);
 	vp = (struct vnode *)fp->f_data;
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	if (vp->v_type == VDIR)
@@ -2462,6 +2529,7 @@ ftruncate(p, uap)
 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
 	}
 	VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -2541,13 +2609,16 @@ fsync(p, uap)
 		syscallarg(int) fd;
 	} */ *uap;
 {
-	register struct vnode *vp;
+	struct vnode *vp;
+	struct mount *mp;
 	struct file *fp;
 	int error;
 
 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 		return (error);
 	vp = (struct vnode *)fp->f_data;
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	if (vp->v_object)
 		vm_object_page_clean(vp->v_object, 0, 0, 0);
@@ -2558,6 +2629,7 @@ fsync(p, uap)
 #endif
 
 	VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -2580,7 +2652,8 @@ rename(p, uap)
 		syscallarg(char *) to;
 	} */ *uap;
 {
-	register struct vnode *tvp, *fvp, *tdvp;
+	struct mount *mp;
+	struct vnode *tvp, *fvp, *tdvp;
 	struct nameidata fromnd, tond;
 	int error;
 
@@ -2590,6 +2663,12 @@ rename(p, uap)
 	if ((error = namei(&fromnd)) != 0)
 		return (error);
 	fvp = fromnd.ni_vp;
+	if ((error = vn_start_write(fvp, &mp, V_WAIT | PCATCH)) != 0) {
+		NDFREE(&fromnd, NDF_ONLY_PNBUF);
+		vrele(fromnd.ni_dvp);
+		vrele(fvp);
+		goto out1;
+	}
 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ,
 	    UIO_USERSPACE, SCARG(uap, to), p);
 	if (fromnd.ni_vp->v_type == VDIR)
@@ -2652,6 +2731,7 @@ out:
 		vrele(fvp);
 	}
 	vrele(tond.ni_startdir);
+	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
 	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
 	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
@@ -2682,11 +2762,13 @@ mkdir(p, uap)
 		syscallarg(int) mode;
 	} */ *uap;
 {
-	register struct vnode *vp;
+	struct mount *mp;
+	struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
+restart:
 	bwillwrite();
 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
 	nd.ni_cnd.cn_flags |= WILLBEDIR;
@@ -2695,13 +2777,17 @@ mkdir(p, uap)
 	vp = nd.ni_vp;
 	if (vp != NULL) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
-		if (nd.ni_dvp == vp)
-			vrele(nd.ni_dvp);
-		else
-			vput(nd.ni_dvp);
 		vrele(vp);
+		vput(nd.ni_dvp);
 		return (EEXIST);
 	}
+	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vput(nd.ni_dvp);
+		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
+	}
 	VATTR_NULL(&vattr);
 	vattr.va_type = VDIR;
 	vattr.va_mode = (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_fd->fd_cmask;
@@ -2711,6 +2797,7 @@ mkdir(p, uap)
 	vput(nd.ni_dvp);
 	if (!error)
 		vput(nd.ni_vp);
+	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
 	return (error);
@@ -2732,10 +2819,12 @@ rmdir(p, uap)
 		syscallarg(char *) path;
 	} */ *uap;
 {
-	register struct vnode *vp;
+	struct mount *mp;
+	struct vnode *vp;
 	int error;
 	struct nameidata nd;
 
+restart:
 	bwillwrite();
 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
 	    SCARG(uap, path), p);
@@ -2756,21 +2845,32 @@ rmdir(p, uap)
 	/*
 	 * The root of a mounted filesystem cannot be deleted.
 	 */
-	if (vp->v_flag & VROOT)
+	if (vp->v_flag & VROOT) {
 		error = EBUSY;
-	else {
-		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
-		VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
-		error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+		goto out;
 	}
+	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		if (nd.ni_dvp == vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		vput(vp);
+		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
+	}
+	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
+	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+	vn_finished_write(mp);
 out:
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	if (nd.ni_dvp == vp)
 		vrele(nd.ni_dvp);
 	else
 		vput(nd.ni_dvp);
-	if (vp != NULLVP)
-		vput(vp);
+	vput(vp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
 	return (error);
@@ -3049,7 +3149,8 @@ revoke(p, uap)
 		syscallarg(char *) path;
 	} */ *uap;
 {
-	register struct vnode *vp;
+	struct mount *mp;
+	struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
@@ -3068,8 +3169,11 @@ revoke(p, uap)
 	if (p->p_ucred->cr_uid != vattr.va_uid &&
 	    (error = suser_xxx(0, p, PRISON_ROOT)))
 		goto out;
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		goto out;
 	if (vcount(vp) > 1)
 		VOP_REVOKE(vp, REVOKEALL);
+	vn_finished_write(mp);
 out:
 	vrele(vp);
 	return (error);
@@ -3228,11 +3332,16 @@ fhopen(p, uap)
 	}
 	if (fmode & O_TRUNC) {
 		VOP_UNLOCK(vp, 0, p);				/* XXX */
+		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
+			vrele(vp);
+			return (error);
+		}
 		VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);	/* XXX */
 		VATTR_NULL(vap);
 		vap->va_size = 0;
 		error = VOP_SETATTR(vp, vap, p->p_ucred, p);
+		vn_finished_write(mp);
 		if (error)
 			goto bad;
 	}
@@ -3407,10 +3516,15 @@ extattrctl(p, uap)
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if ((error = namei(&nd)) != 0)
 		return (error);
-	mp = nd.ni_vp->v_mount;
+	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
 	NDFREE(&nd, 0);
-	return (VFS_EXTATTRCTL(mp, SCARG(uap, cmd), SCARG(uap, attrname),
-	    SCARG(uap, arg), p));
+	vrele(nd.ni_vp);
+	if (error)
+		return (error);
+	error = VFS_EXTATTRCTL(mp, SCARG(uap, cmd), SCARG(uap, attrname),
+	    SCARG(uap, arg), p);
+	vn_finished_write(mp);
+	return (error);
 }
 
 /*
@@ -3425,6 +3539,7 @@ extattr_set_file(p, uap)
 	struct extattr_set_file_args *uap;
 {
 	struct nameidata nd;
+	struct mount *mp;
 	struct uio auio;
 	struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV];
 	char attrname[EXTATTR_MAXNAMELEN];
@@ -3434,10 +3549,11 @@ extattr_set_file(p, uap)
 	error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN);
 	if (error)
 		return (error);
-	NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW, UIO_USERSPACE, SCARG(uap, path),
-	    p);
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if ((error = namei(&nd)) != 0)
 		return(error);
+	if ((error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH)) != 0)
+		goto done;
 	iovlen = uap->iovcnt * sizeof(struct iovec);
 	if (uap->iovcnt > UIO_SMALLIOV) {
 		if (uap->iovcnt > UIO_MAXIOV) {
@@ -3477,6 +3593,8 @@ done:
 	if (needfree)
 		FREE(needfree, M_IOV);
 	NDFREE(&nd, 0);
+	vrele(nd.ni_vp);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -3508,6 +3626,7 @@ extattr_get_file(p, uap)
 	if (uap->iovcnt > UIO_SMALLIOV) {
 		if (uap->iovcnt > UIO_MAXIOV) {
 			NDFREE(&nd, 0);
+			vrele(nd.ni_vp);
 			return (EINVAL);
 		}
 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
@@ -3545,6 +3664,7 @@ done:
 	if (needfree)
 		FREE(needfree, M_IOV);
 	NDFREE(&nd, 0);
+	vrele(nd.ni_vp);
 	return(error);
 }
 
@@ -3557,6 +3677,7 @@ extattr_delete_file(p, uap)
 	struct proc *p;
 	struct extattr_delete_file_args *uap;
 {
+	struct mount *mp;
 	struct nameidata nd;
 	char attrname[EXTATTR_MAXNAMELEN];
 	int	error;
@@ -3564,12 +3685,17 @@ extattr_delete_file(p, uap)
 	error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN);
 	if (error)
 		return(error);
-	NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW, UIO_USERSPACE, SCARG(uap, path),
-	    p);
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if ((error = namei(&nd)) != 0)
 		return(error);
+	if ((error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH)) != 0) {
+		vrele(nd.ni_vp);
+		return (error);
+	}
 	error = VOP_SETEXTATTR(nd.ni_vp, attrname, NULL, p->p_cred->pc_ucred,
 	    p);
 	NDFREE(&nd, 0);
+	vrele(nd.ni_vp);
+	vn_finished_write(mp);
 	return(error);
 }
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index 0d0dc24..0708f7c 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -103,12 +103,14 @@ vn_open(ndp, flagp, cmode)
 	int *flagp, cmode;
 {
 	struct vnode *vp;
+	struct mount *mp;
 	struct proc *p = ndp->ni_cnd.cn_proc;
 	struct ucred *cred = p->p_ucred;
 	struct vattr vat;
 	struct vattr *vap = &vat;
 	int mode, fmode, error;
 
+restart:
 	fmode = *flagp;
 	if (fmode & O_CREAT) {
 		ndp->ni_cnd.cn_nameiop = CREATE;
@@ -124,10 +126,19 @@ vn_open(ndp, flagp, cmode)
 			vap->va_mode = cmode;
 			if (fmode & O_EXCL)
 				vap->va_vaflags |= VA_EXCLUSIVE;
+			if (vn_start_write(ndp->ni_dvp, &mp, V_NOWAIT) != 0) {
+				NDFREE(ndp, NDF_ONLY_PNBUF);
+				vput(ndp->ni_dvp);
+				if ((error = vn_start_write(NULL, &mp,
+				    V_XSLEEP | PCATCH)) != 0)
+					return (error);
+				goto restart;
+			}
 			VOP_LEASE(ndp->ni_dvp, p, cred, LEASE_WRITE);
 			error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
 					   &ndp->ni_cnd, vap);
 			vput(ndp->ni_dvp);
+			vn_finished_write(mp);
 			if (error) {
 				NDFREE(ndp, NDF_ONLY_PNBUF);
 				return (error);
@@ -293,10 +304,17 @@ vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p)
 {
 	struct uio auio;
 	struct iovec aiov;
+	struct mount *mp;
 	int error;
 
-	if ((ioflg & IO_NODELOCKED) == 0)
+	if ((ioflg & IO_NODELOCKED) == 0) {
+		mp = NULL;
+		if (rw == UIO_WRITE &&
+		    vp->v_type != VCHR && vp->v_type != VBLK &&
+		    (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+			return (error);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+	}
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	aiov.iov_base = base;
@@ -316,8 +334,10 @@ vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p)
 	else
 		if (auio.uio_resid && error == 0)
 			error = EIO;
-	if ((ioflg & IO_NODELOCKED) == 0)
+	if ((ioflg & IO_NODELOCKED) == 0) {
+		vn_finished_write(mp);
 		VOP_UNLOCK(vp, 0, p);
+	}
 	return (error);
 }
 
@@ -368,6 +388,7 @@ vn_write(fp, uio, cred, flags, p)
 	int flags;
 {
 	struct vnode *vp;
+	struct mount *mp;
 	int error, ioflag;
 
 	KASSERT(uio->uio_procp == p, ("uio_procp %p is not p %p",
@@ -384,6 +405,10 @@ vn_write(fp, uio, cred, flags, p)
 	if ((fp->f_flag & O_FSYNC) ||
 	    (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
 		ioflag |= IO_SYNC;
+	mp = NULL;
+	if (vp->v_type != VCHR && vp->v_type != VBLK &&
+	    (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
 	VOP_LEASE(vp, p, cred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	if ((flags & FOF_OFFSET) == 0)
@@ -394,6 +419,7 @@ vn_write(fp, uio, cred, flags, p)
 		fp->f_offset = uio->uio_offset;
 	fp->f_nextoff = uio->uio_offset;
 	VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -649,6 +675,140 @@ vn_closefile(fp, p)
 		fp->f_cred, p));
 }
 
+/*
+ * Preparing to start a filesystem write operation. If the operation is
+ * permitted, then we bump the count of operations in progress and
+ * proceed. If a suspend request is in progress, we wait until the
+ * suspension is over, and then proceed.
+ */
+int
+vn_start_write(vp, mpp, flags)
+	struct vnode *vp;
+	struct mount **mpp;
+	int flags;
+{
+	struct mount *mp;
+	int error;
+
+	/*
+	 * If a vnode is provided, get and return the mount point that
+	 * to which it will write.
+	 */
+	if (vp != NULL) {
+		if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) {
+			*mpp = NULL;
+			if (error != EOPNOTSUPP)
+				return (error);
+			return (0);
+		}
+	}
+	if ((mp = *mpp) == NULL)
+		return (0);
+	/*
+	 * Check on status of suspension.
+	 */
+	while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
+		if (flags & V_NOWAIT)
+			return (EWOULDBLOCK);
+		error = tsleep(&mp->mnt_flag, (PUSER - 1) | (flags & PCATCH),
+		    "suspfs", 0);
+		if (error)
+			return (error);
+	}
+	if (flags & V_XSLEEP)
+		return (0);
+	mp->mnt_writeopcount++;
+	return (0);
+}
+
+/*
+ * Secondary suspension. Used by operations such as vop_inactive
+ * routines that are needed by the higher level functions. These
+ * are allowed to proceed until all the higher level functions have
+ * completed (indicated by mnt_writeopcount dropping to zero). At that
+ * time, these operations are halted until the suspension is over.
+ */
+int
+vn_write_suspend_wait(vp, flags)
+	struct vnode *vp;
+	int flags;
+{
+	struct mount *mp;
+	int error;
+
+	if ((error = VOP_GETWRITEMOUNT(vp, &mp)) != 0) {
+		if (error != EOPNOTSUPP)
+			return (error);
+		return (0);
+	}
+	/*
+	 * If we are not suspended or have not yet reached suspended
+	 * mode, then let the operation proceed.
+	 */
+	if (mp == NULL || (mp->mnt_kern_flag & MNTK_SUSPENDED) == 0)
+		return (0);
+	if (flags & V_NOWAIT)
+		return (EWOULDBLOCK);
+	/*
+	 * Wait for the suspension to finish.
+	 */
+	return (tsleep(&mp->mnt_flag, (PUSER - 1) | (flags & PCATCH),
+	    "suspfs", 0));
+}
+
+/*
+ * Filesystem write operation has completed. If we are suspending and this
+ * operation is the last one, notify the suspender that the suspension is
+ * now in effect.
+ */
+void
+vn_finished_write(mp)
+	struct mount *mp;
+{
+
+	if (mp == NULL)
+		return;
+	mp->mnt_writeopcount--;
+	if (mp->mnt_writeopcount < 0)
+		panic("vn_finished_write: neg cnt");
+	if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 &&
+	    mp->mnt_writeopcount <= 0)
+		wakeup(&mp->mnt_writeopcount);
+}
+
+/*
+ * Request a filesystem to suspend write operations.
+ */
+void
+vfs_write_suspend(mp)
+	struct mount *mp;
+{
+	struct proc *p = curproc;
+
+	if (mp->mnt_kern_flag & MNTK_SUSPEND)
+		return;
+	mp->mnt_kern_flag |= MNTK_SUSPEND;
+	if (mp->mnt_writeopcount > 0)
+		(void) tsleep(&mp->mnt_writeopcount, PUSER - 1, "suspwt", 0);
+	VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p);
+	mp->mnt_kern_flag |= MNTK_SUSPENDED;
+}
+
+/*
+ * Request a filesystem to resume write operations.
+ */
+void
+vfs_write_resume(mp)
+	struct mount *mp;
+{
+
+	if ((mp->mnt_kern_flag & MNTK_SUSPEND) == 0)
+		return;
+	mp->mnt_kern_flag &= ~(MNTK_SUSPEND | MNTK_SUSPENDED);
+	wakeup(&mp->mnt_writeopcount);
+	wakeup(&mp->mnt_flag);
+}
+
 static int
 filt_vnattach(struct knote *kn)
 {
diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src
index 479cc92..bda7e98 100644
--- a/sys/kern/vnode_if.src
+++ b/sys/kern/vnode_if.src
@@ -394,6 +394,22 @@ vop_strategy {
 };
 
 #
+#% getwritemount vp	= = =
+#
+vop_getwritemount {
+	IN struct vnode *vp;
+	OUT struct mount **mpp;
+};
+
+#
+#% copyonwrite  vp	L L L
+#
+vop_copyonwrite {
+	IN struct vnode *vp;
+	IN struct buf *bp;
+};
+
+#
 #% print	vp	= = =
 #
 vop_print {
diff --git a/sys/miscfs/fdesc/fdesc_vnops.c b/sys/miscfs/fdesc/fdesc_vnops.c
index 772a94c..72c7cae 100644
--- a/sys/miscfs/fdesc/fdesc_vnops.c
+++ b/sys/miscfs/fdesc/fdesc_vnops.c
@@ -383,6 +383,8 @@ fdesc_setattr(ap)
 {
 	struct filedesc *fdp = ap->a_p->p_fd;
 	struct vattr *vap = ap->a_vap;
+	struct vnode *vp;
+	struct mount *mp;
 	struct file *fp;
 	unsigned fd;
 	int error;
@@ -403,8 +405,11 @@ fdesc_setattr(ap)
 	switch (fp->f_type) {
 	case DTYPE_FIFO:
 	case DTYPE_VNODE:
-		error = VOP_SETATTR((struct vnode *) fp->f_data, ap->a_vap,
-		    ap->a_cred, ap->a_p);
+		vp = (struct vnode *)fp->f_data;
+		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+			return (error);
+		error = VOP_SETATTR(vp, ap->a_vap, ap->a_cred, ap->a_p);
+		vn_finished_write(mp);
 		break;
 
 	default:
diff --git a/sys/miscfs/fifofs/fifo_vnops.c b/sys/miscfs/fifofs/fifo_vnops.c
index 5bd13a7..03e3e37 100644
--- a/sys/miscfs/fifofs/fifo_vnops.c
+++ b/sys/miscfs/fifofs/fifo_vnops.c
@@ -107,6 +107,7 @@ static struct vnodeopv_entry_desc fifo_vnodeop_entries[] = {
 	{ &vop_open_desc,		(vop_t *) fifo_open },
 	{ &vop_pathconf_desc,		(vop_t *) fifo_pathconf },
 	{ &vop_poll_desc,		(vop_t *) fifo_poll },
+	{ &vop_getwritemount_desc, 	(vop_t *) vop_stdgetwritemount },
 	{ &vop_print_desc,		(vop_t *) fifo_print },
 	{ &vop_read_desc,		(vop_t *) fifo_read },
 	{ &vop_readdir_desc,		(vop_t *) fifo_badop },
diff --git a/sys/miscfs/specfs/spec_vnops.c b/sys/miscfs/specfs/spec_vnops.c
index cbe52f4..baf40c3 100644
--- a/sys/miscfs/specfs/spec_vnops.c
+++ b/sys/miscfs/specfs/spec_vnops.c
@@ -88,6 +88,7 @@ static struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
 	{ &vop_open_desc,		(vop_t *) spec_open },
 	{ &vop_pathconf_desc,		(vop_t *) vop_stdpathconf },
 	{ &vop_poll_desc,		(vop_t *) spec_poll },
+	{ &vop_getwritemount_desc, 	(vop_t *) vop_stdgetwritemount },
 	{ &vop_print_desc,		(vop_t *) spec_print },
 	{ &vop_read_desc,		(vop_t *) spec_read },
 	{ &vop_readdir_desc,		(vop_t *) vop_panic },
@@ -415,16 +416,29 @@ spec_strategy(ap)
 	struct buf *bp;
 	struct vnode *vp;
 	struct mount *mp;
+	int error;
 
 	bp = ap->a_bp;
-	if ((bp->b_iocmd == BIO_WRITE) && (LIST_FIRST(&bp->b_dep)) != NULL)
-		buf_start(bp);
-
+	vp = ap->a_vp;
+	if ((bp->b_iocmd == BIO_WRITE)) {
+		if (vp->v_mount != NULL &&
+		    (vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
+			panic("spec_strategy: bad I/O");
+		if (LIST_FIRST(&bp->b_dep) != NULL)
+			buf_start(bp);
+		if ((vp->v_flag & VCOPYONWRITE) &&
+		    (error = VOP_COPYONWRITE(vp, bp)) != 0 &&
+		    error != EOPNOTSUPP) {
+			bp->b_io.bio_error = error;
+			bp->b_io.bio_flags |= BIO_ERROR;
+			biodone(&bp->b_io);
+			return (0);
+		}
+	}
 	/*
 	 * Collect statistics on synchronous and asynchronous read
 	 * and write counts for disks that have associated filesystems.
 	 */
-	vp = ap->a_vp;
 	if (vn_isdisk(vp, NULL) && (mp = vp->v_specmountpoint) != NULL) {
 		if (bp->b_iocmd == BIO_WRITE) {
 			if (bp->b_lock.lk_lockholder == LK_KERNPROC)
diff --git a/sys/miscfs/union/union_subr.c b/sys/miscfs/union/union_subr.c
index 6b88bef..d1d6e31 100644
--- a/sys/miscfs/union/union_subr.c
+++ b/sys/miscfs/union/union_subr.c
@@ -747,6 +747,7 @@ union_copyup(un, docopy, cred, p)
 	struct proc *p;
 {
 	int error;
+	struct mount *mp;
 	struct vnode *lvp, *uvp;
 
 	/*
@@ -759,9 +760,12 @@ union_copyup(un, docopy, cred, p)
 	if (error)
 		return (error);
 
-	error = union_vn_create(&uvp, un, p);
-	if (error)
+	if ((error = vn_start_write(un->un_dirvp, &mp, V_WAIT | PCATCH)) != 0)
 		return (error);
+	if ((error = union_vn_create(&uvp, un, p)) != 0) {
+		vn_finished_write(mp);
+		return (error);
+	}
 
 	lvp = un->un_lowervp;
 
@@ -785,6 +789,7 @@ union_copyup(un, docopy, cred, p)
 
 	}
 	VOP_UNLOCK(uvp, 0, p);
+	vn_finished_write(mp);
 	union_newupper(un, uvp);
 	KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount));
 	union_vn_close(uvp, FWRITE, cred, p);
@@ -910,11 +915,15 @@ union_mkshadow(um, dvp, cnp, vpp)
 	struct vattr va;
 	struct proc *p = cnp->cn_proc;
 	struct componentname cn;
+	struct mount *mp;
 
-	error = union_relookup(um, dvp, vpp, cnp, &cn,
-			cnp->cn_nameptr, cnp->cn_namelen);
-	if (error)
+	if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
+	if ((error = union_relookup(um, dvp, vpp, cnp, &cn,
+			cnp->cn_nameptr, cnp->cn_namelen)) != 0) {
+		vn_finished_write(mp);
 		return (error);
+	}
 
 	if (*vpp) {
 		if (cn.cn_flags & HASBUF) {
@@ -925,6 +934,7 @@ union_mkshadow(um, dvp, cnp, vpp)
 			vrele(*vpp);
 		else
 			vput(*vpp);
+		vn_finished_write(mp);
 		*vpp = NULLVP;
 		return (EEXIST);
 	}
@@ -950,6 +960,7 @@ union_mkshadow(um, dvp, cnp, vpp)
 		cn.cn_flags &= ~HASBUF;
 	}
 	/*vput(dvp);*/
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -973,10 +984,15 @@ union_mkwhiteout(um, dvp, cnp, path)
 	struct proc *p = cnp->cn_proc;
 	struct vnode *wvp;
 	struct componentname cn;
+	struct mount *mp;
 
+	if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
 	error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path));
-	if (error)
+	if (error) {
+		vn_finished_write(mp);
 		return (error);
+	}
 
 	if (wvp) {
 		if (cn.cn_flags & HASBUF) {
@@ -987,6 +1003,7 @@ union_mkwhiteout(um, dvp, cnp, path)
 			vrele(wvp);
 		else
 			vput(wvp);
+		vn_finished_write(mp);
 		return (EEXIST);
 	}
 
@@ -998,6 +1015,7 @@ union_mkwhiteout(um, dvp, cnp, path)
 		zfree(namei_zone, cn.cn_pnbuf);
 		cn.cn_flags &= ~HASBUF;
 	}
+	vn_finished_write(mp);
 	return (error);
 }
 
diff --git a/sys/miscfs/union/union_vnops.c b/sys/miscfs/union/union_vnops.c
index 1c5ed5d..d7b95f3 100644
--- a/sys/miscfs/union/union_vnops.c
+++ b/sys/miscfs/union/union_vnops.c
@@ -93,6 +93,7 @@ static int	union_print __P((struct vop_print_args *ap));
 static int	union_read __P((struct vop_read_args *ap));
 static int	union_readdir __P((struct vop_readdir_args *ap));
 static int	union_readlink __P((struct vop_readlink_args *ap));
+static int	union_getwritemount __P((struct vop_getwritemount_args *ap));
 static int	union_reclaim __P((struct vop_reclaim_args *ap));
 static int	union_remove __P((struct vop_remove_args *ap));
 static int	union_rename __P((struct vop_rename_args *ap));
@@ -1681,6 +1682,20 @@ union_readlink(ap)
 	return (error);
 }
 
+static int
+union_getwritemount(ap)
+	struct vop_getwritemount_args /* {
+		struct vnode *a_vp;
+		struct mount **a_mpp;
+	} */ *ap;
+{
+	struct vnode *vp = UPPERVP(ap->a_vp);
+
+	if (vp == NULL)
+		panic("union: missing upper layer in getwritemount");
+	return(VOP_GETWRITEMOUNT(vp, ap->a_mpp));
+}
+
 /*
  *	union_inactive:
  *
@@ -1963,6 +1978,7 @@ static struct vnodeopv_entry_desc union_vnodeop_entries[] = {
 	{ &vop_read_desc,		(vop_t *) union_read },
 	{ &vop_readdir_desc,		(vop_t *) union_readdir },
 	{ &vop_readlink_desc,		(vop_t *) union_readlink },
+	{ &vop_getwritemount_desc,	(vop_t *) union_getwritemount },
 	{ &vop_reclaim_desc,		(vop_t *) union_reclaim },
 	{ &vop_remove_desc,		(vop_t *) union_remove },
 	{ &vop_rename_desc,		(vop_t *) union_rename },
diff --git a/sys/nfs/nfs_serv.c b/sys/nfs/nfs_serv.c
index 06ce9ed..0334f74 100644
--- a/sys/nfs/nfs_serv.c
+++ b/sys/nfs/nfs_serv.c
@@ -325,10 +325,18 @@ nfsrv_setattr(nfsd, slp, procp, mrq)
 	struct mbuf *mb, *mb2, *mreq;
 	u_quad_t frev;
 	struct timespec guard;
+	struct mount *mp = NULL;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	VATTR_NULL(vap);
 	if (v3) {
 		nfsm_srvsattr(vap);
@@ -440,6 +448,7 @@ out:
 nfsmout:
 	if (vp)
 		vput(vp);
+	vn_finished_write(mp);
 	return(error);
 }
 
@@ -1039,6 +1048,7 @@ nfsrv_write(nfsd, slp, procp, mrq)
 	struct uio io, *uiop = &io;
 	off_t off;
 	u_quad_t frev;
+	struct mount *mntp = NULL;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	if (mrep == NULL) {
@@ -1048,6 +1058,13 @@ nfsrv_write(nfsd, slp, procp, mrq)
 	}
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mntp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mntp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mntp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	if (v3) {
 		nfsm_dissect(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 		off = fxdr_hyper(tl);
@@ -1205,6 +1222,7 @@ nfsrv_write(nfsd, slp, procp, mrq)
 nfsmout:
 	if (vp)
 		vput(vp);
+	vn_finished_write(mntp);
 	return(error);
 }
 
@@ -1241,6 +1259,7 @@ nfsrv_writegather(ndp, slp, procp, mrq)
 	struct vnode *vp = NULL;
 	struct uio io, *uiop = &io;
 	u_quad_t frev, cur_usec;
+	struct mount *mntp = NULL;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 #ifndef nolint
@@ -1444,8 +1463,16 @@ loop1:
 			mp = mp->m_next;
 		    }
 		    if (!error) {
+			if (vn_start_write(vp, &mntp, V_NOWAIT) != 0) {
+			    VOP_UNLOCK(vp, 0, procp);
+			    error = vn_start_write(NULL, &mntp, V_WAIT);
+			    vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, procp);
+			}
+		    }
+		    if (!error) {
 			error = VOP_WRITE(vp, uiop, ioflags, cred);
 			nfsstats.srvvop_writes++;
+			vn_finished_write(mntp);
 		    }
 		    FREE((caddr_t)iov, M_TEMP);
 		}
@@ -1620,6 +1647,8 @@ nfsrv_create(nfsd, slp, procp, mrq)
 	fhandle_t *fhp;
 	u_quad_t frev, tempsize;
 	u_char cverf[NFSX_V3CREATEVERF];
+	struct mount *mp = NULL;
+	struct vnode *vp;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 #ifndef nolint
@@ -1629,6 +1658,12 @@ nfsrv_create(nfsd, slp, procp, mrq)
 
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
 	nfsm_srvnamesiz(len);
 
 	nd.ni_cnd.cn_cred = cred;
@@ -1869,6 +1904,7 @@ nfsmout:
 	}
 	if (nd.ni_vp)
 		vput(nd.ni_vp);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -1901,12 +1937,20 @@ nfsrv_mknod(nfsd, slp, procp, mrq)
 	nfsfh_t nfh;
 	fhandle_t *fhp;
 	u_quad_t frev;
+	struct mount *mp = NULL;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	ndclear(&nd);
 
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	nfsm_srvnamesiz(len);
 
 	nd.ni_cnd.cn_cred = cred;
@@ -2030,6 +2074,7 @@ out:
 		nfsm_srvpostop_attr(0, vap);
 	}
 	nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
+	vn_finished_write(mp);
 	return (0);
 nfsmout:
 	if (dirp)
@@ -2045,6 +2090,7 @@ nfsmout:
 	}
 	if (nd.ni_vp)
 		vput(nd.ni_vp);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -2075,12 +2121,21 @@ nfsrv_remove(nfsd, slp, procp, mrq)
 	nfsfh_t nfh;
 	fhandle_t *fhp;
 	u_quad_t frev;
+	struct mount *mp = NULL;
+	struct vnode *vp;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	ndclear(&nd);
 
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	nfsm_srvnamesiz(len);
 
 	nd.ni_cnd.cn_cred = cred;
@@ -2137,6 +2192,7 @@ nfsmout:
 	}
 	if (nd.ni_vp)
 		vput(nd.ni_vp);
+	vn_finished_write(mp);
 	return(error);
 }
 
@@ -2170,6 +2226,8 @@ nfsrv_rename(nfsd, slp, procp, mrq)
 	fhandle_t *ffhp, *tfhp;
 	u_quad_t frev;
 	uid_t saved_uid;
+	struct mount *mp = NULL;
+	struct vnode *vp;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 #ifndef nolint
@@ -2186,6 +2244,13 @@ nfsrv_rename(nfsd, slp, procp, mrq)
 	ndclear(&tond);
 
 	nfsm_srvmtofh(ffhp);
+	if ((mp = vfs_getvfs(&ffhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &ffhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	nfsm_srvnamesiz(len);
 	/*
 	 * Remember our original uid so that we can reset cr_uid before
@@ -2360,6 +2425,7 @@ nfsmout:
 	if (fromnd.ni_vp)
 		vrele(fromnd.ni_vp);
 
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -2390,6 +2456,7 @@ nfsrv_link(nfsd, slp, procp, mrq)
 	nfsfh_t nfh, dnfh;
 	fhandle_t *fhp, *dfhp;
 	u_quad_t frev;
+	struct mount *mp = NULL;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	ndclear(&nd);
@@ -2397,6 +2464,13 @@ nfsrv_link(nfsd, slp, procp, mrq)
 	fhp = &nfh.fh_generic;
 	dfhp = &dnfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	nfsm_srvmtofh(dfhp);
 	nfsm_srvnamesiz(len);
 
@@ -2475,6 +2549,7 @@ nfsmout:
 	}
 	if (nd.ni_vp)
 		vrele(nd.ni_vp);
+	vn_finished_write(mp);
 	return(error);
 }
 
@@ -2508,12 +2583,21 @@ nfsrv_symlink(nfsd, slp, procp, mrq)
 	nfsfh_t nfh;
 	fhandle_t *fhp;
 	u_quad_t frev;
+	struct mount *mp = NULL;
+	struct vnode *vp;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	ndclear(&nd);
 
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	nfsm_srvnamesiz(len);
 	nd.ni_cnd.cn_cred = cred;
 	nd.ni_cnd.cn_nameiop = CREATE;
@@ -2651,6 +2735,7 @@ nfsmout:
 	if (pathcp)
 		FREE(pathcp, M_TEMP);
 
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -2685,12 +2770,21 @@ nfsrv_mkdir(nfsd, slp, procp, mrq)
 	nfsfh_t nfh;
 	fhandle_t *fhp;
 	u_quad_t frev;
+	struct mount *mp = NULL;
+	struct vnode *vp;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	ndclear(&nd);
 
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	nfsm_srvnamesiz(len);
 	nd.ni_cnd.cn_cred = cred;
 	nd.ni_cnd.cn_nameiop = CREATE;
@@ -2787,6 +2881,7 @@ nfsmout:
 		else
 			vrele(nd.ni_vp);
 	}
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -2817,12 +2912,20 @@ nfsrv_rmdir(nfsd, slp, procp, mrq)
 	fhandle_t *fhp;
 	struct nameidata nd;
 	u_quad_t frev;
+	struct mount *mp = NULL;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	ndclear(&nd);
 
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	nfsm_srvnamesiz(len);
 	nd.ni_cnd.cn_cred = cred;
 	nd.ni_cnd.cn_nameiop = DELETE;
@@ -2895,6 +2998,7 @@ nfsmout:
 	if (nd.ni_vp)
 		vput(nd.ni_vp);
 
+	vn_finished_write(mp);
 	return(error);
 }
 
@@ -3588,6 +3692,7 @@ nfsrv_commit(nfsd, slp, procp, mrq)
 	char *cp2;
 	struct mbuf *mb, *mb2, *mreq;
 	u_quad_t frev, off;
+	struct mount *mp = NULL;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 #ifndef nolint
@@ -3595,6 +3700,13 @@ nfsrv_commit(nfsd, slp, procp, mrq)
 #endif
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 
 	/*
@@ -3697,6 +3809,7 @@ nfsrv_commit(nfsd, slp, procp, mrq)
 nfsmout:
 	if (vp)
 		vput(vp);
+	vn_finished_write(mp);
 	return(error);
 }
 
@@ -4065,4 +4178,3 @@ nfsrv_access(vp, flags, cred, rdonly, p, override)
 	return error;
 }
 #endif /* NFS_NOSERVER */
-
diff --git a/sys/nfsserver/nfs_serv.c b/sys/nfsserver/nfs_serv.c
index 06ce9ed..0334f74 100644
--- a/sys/nfsserver/nfs_serv.c
+++ b/sys/nfsserver/nfs_serv.c
@@ -325,10 +325,18 @@ nfsrv_setattr(nfsd, slp, procp, mrq)
 	struct mbuf *mb, *mb2, *mreq;
 	u_quad_t frev;
 	struct timespec guard;
+	struct mount *mp = NULL;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	VATTR_NULL(vap);
 	if (v3) {
 		nfsm_srvsattr(vap);
@@ -440,6 +448,7 @@ out:
 nfsmout:
 	if (vp)
 		vput(vp);
+	vn_finished_write(mp);
 	return(error);
 }
 
@@ -1039,6 +1048,7 @@ nfsrv_write(nfsd, slp, procp, mrq)
 	struct uio io, *uiop = &io;
 	off_t off;
 	u_quad_t frev;
+	struct mount *mntp = NULL;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	if (mrep == NULL) {
@@ -1048,6 +1058,13 @@ nfsrv_write(nfsd, slp, procp, mrq)
 	}
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mntp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mntp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mntp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	if (v3) {
 		nfsm_dissect(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 		off = fxdr_hyper(tl);
@@ -1205,6 +1222,7 @@ nfsrv_write(nfsd, slp, procp, mrq)
 nfsmout:
 	if (vp)
 		vput(vp);
+	vn_finished_write(mntp);
 	return(error);
 }
 
@@ -1241,6 +1259,7 @@ nfsrv_writegather(ndp, slp, procp, mrq)
 	struct vnode *vp = NULL;
 	struct uio io, *uiop = &io;
 	u_quad_t frev, cur_usec;
+	struct mount *mntp = NULL;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 #ifndef nolint
@@ -1444,8 +1463,16 @@ loop1:
 			mp = mp->m_next;
 		    }
 		    if (!error) {
+			if (vn_start_write(vp, &mntp, V_NOWAIT) != 0) {
+			    VOP_UNLOCK(vp, 0, procp);
+			    error = vn_start_write(NULL, &mntp, V_WAIT);
+			    vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, procp);
+			}
+		    }
+		    if (!error) {
 			error = VOP_WRITE(vp, uiop, ioflags, cred);
 			nfsstats.srvvop_writes++;
+			vn_finished_write(mntp);
 		    }
 		    FREE((caddr_t)iov, M_TEMP);
 		}
@@ -1620,6 +1647,8 @@ nfsrv_create(nfsd, slp, procp, mrq)
 	fhandle_t *fhp;
 	u_quad_t frev, tempsize;
 	u_char cverf[NFSX_V3CREATEVERF];
+	struct mount *mp = NULL;
+	struct vnode *vp;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 #ifndef nolint
@@ -1629,6 +1658,12 @@ nfsrv_create(nfsd, slp, procp, mrq)
 
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
 	nfsm_srvnamesiz(len);
 
 	nd.ni_cnd.cn_cred = cred;
@@ -1869,6 +1904,7 @@ nfsmout:
 	}
 	if (nd.ni_vp)
 		vput(nd.ni_vp);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -1901,12 +1937,20 @@ nfsrv_mknod(nfsd, slp, procp, mrq)
 	nfsfh_t nfh;
 	fhandle_t *fhp;
 	u_quad_t frev;
+	struct mount *mp = NULL;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	ndclear(&nd);
 
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	nfsm_srvnamesiz(len);
 
 	nd.ni_cnd.cn_cred = cred;
@@ -2030,6 +2074,7 @@ out:
 		nfsm_srvpostop_attr(0, vap);
 	}
 	nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
+	vn_finished_write(mp);
 	return (0);
 nfsmout:
 	if (dirp)
@@ -2045,6 +2090,7 @@ nfsmout:
 	}
 	if (nd.ni_vp)
 		vput(nd.ni_vp);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -2075,12 +2121,21 @@ nfsrv_remove(nfsd, slp, procp, mrq)
 	nfsfh_t nfh;
 	fhandle_t *fhp;
 	u_quad_t frev;
+	struct mount *mp = NULL;
+	struct vnode *vp;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	ndclear(&nd);
 
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	nfsm_srvnamesiz(len);
 
 	nd.ni_cnd.cn_cred = cred;
@@ -2137,6 +2192,7 @@ nfsmout:
 	}
 	if (nd.ni_vp)
 		vput(nd.ni_vp);
+	vn_finished_write(mp);
 	return(error);
 }
 
@@ -2170,6 +2226,8 @@ nfsrv_rename(nfsd, slp, procp, mrq)
 	fhandle_t *ffhp, *tfhp;
 	u_quad_t frev;
 	uid_t saved_uid;
+	struct mount *mp = NULL;
+	struct vnode *vp;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 #ifndef nolint
@@ -2186,6 +2244,13 @@ nfsrv_rename(nfsd, slp, procp, mrq)
 	ndclear(&tond);
 
 	nfsm_srvmtofh(ffhp);
+	if ((mp = vfs_getvfs(&ffhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &ffhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	nfsm_srvnamesiz(len);
 	/*
 	 * Remember our original uid so that we can reset cr_uid before
@@ -2360,6 +2425,7 @@ nfsmout:
 	if (fromnd.ni_vp)
 		vrele(fromnd.ni_vp);
 
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -2390,6 +2456,7 @@ nfsrv_link(nfsd, slp, procp, mrq)
 	nfsfh_t nfh, dnfh;
 	fhandle_t *fhp, *dfhp;
 	u_quad_t frev;
+	struct mount *mp = NULL;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	ndclear(&nd);
@@ -2397,6 +2464,13 @@ nfsrv_link(nfsd, slp, procp, mrq)
 	fhp = &nfh.fh_generic;
 	dfhp = &dnfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	nfsm_srvmtofh(dfhp);
 	nfsm_srvnamesiz(len);
 
@@ -2475,6 +2549,7 @@ nfsmout:
 	}
 	if (nd.ni_vp)
 		vrele(nd.ni_vp);
+	vn_finished_write(mp);
 	return(error);
 }
 
@@ -2508,12 +2583,21 @@ nfsrv_symlink(nfsd, slp, procp, mrq)
 	nfsfh_t nfh;
 	fhandle_t *fhp;
 	u_quad_t frev;
+	struct mount *mp = NULL;
+	struct vnode *vp;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	ndclear(&nd);
 
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	nfsm_srvnamesiz(len);
 	nd.ni_cnd.cn_cred = cred;
 	nd.ni_cnd.cn_nameiop = CREATE;
@@ -2651,6 +2735,7 @@ nfsmout:
 	if (pathcp)
 		FREE(pathcp, M_TEMP);
 
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -2685,12 +2770,21 @@ nfsrv_mkdir(nfsd, slp, procp, mrq)
 	nfsfh_t nfh;
 	fhandle_t *fhp;
 	u_quad_t frev;
+	struct mount *mp = NULL;
+	struct vnode *vp;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	ndclear(&nd);
 
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	nfsm_srvnamesiz(len);
 	nd.ni_cnd.cn_cred = cred;
 	nd.ni_cnd.cn_nameiop = CREATE;
@@ -2787,6 +2881,7 @@ nfsmout:
 		else
 			vrele(nd.ni_vp);
 	}
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -2817,12 +2912,20 @@ nfsrv_rmdir(nfsd, slp, procp, mrq)
 	fhandle_t *fhp;
 	struct nameidata nd;
 	u_quad_t frev;
+	struct mount *mp = NULL;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	ndclear(&nd);
 
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	nfsm_srvnamesiz(len);
 	nd.ni_cnd.cn_cred = cred;
 	nd.ni_cnd.cn_nameiop = DELETE;
@@ -2895,6 +2998,7 @@ nfsmout:
 	if (nd.ni_vp)
 		vput(nd.ni_vp);
 
+	vn_finished_write(mp);
 	return(error);
 }
 
@@ -3588,6 +3692,7 @@ nfsrv_commit(nfsd, slp, procp, mrq)
 	char *cp2;
 	struct mbuf *mb, *mb2, *mreq;
 	u_quad_t frev, off;
+	struct mount *mp = NULL;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 #ifndef nolint
@@ -3595,6 +3700,13 @@ nfsrv_commit(nfsd, slp, procp, mrq)
 #endif
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 
 	/*
@@ -3697,6 +3809,7 @@ nfsrv_commit(nfsd, slp, procp, mrq)
 nfsmout:
 	if (vp)
 		vput(vp);
+	vn_finished_write(mp);
 	return(error);
 }
 
@@ -4065,4 +4178,3 @@ nfsrv_access(vp, flags, cred, rdonly, p, override)
 	return error;
 }
 #endif /* NFS_NOSERVER */
-
diff --git a/sys/svr4/svr4_fcntl.c b/sys/svr4/svr4_fcntl.c
index 4040030..c65f345 100644
--- a/sys/svr4/svr4_fcntl.c
+++ b/sys/svr4/svr4_fcntl.c
@@ -247,6 +247,7 @@ fd_revoke(p, fd)
 	struct filedesc *fdp = p->p_fd;
 	struct file *fp;
 	struct vnode *vp;
+	struct mount *mp;
 	struct vattr vattr;
 	int error, *retval;
 
@@ -271,8 +272,11 @@ fd_revoke(p, fd)
 	    (error = suser(p)) != 0)
 		goto out;
 
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		goto out;
 	if (vcount(vp) > 1)
 		VOP_REVOKE(vp, REVOKEALL);
+	vn_finished_write(mp);
 out:
 	vrele(vp);
 	return error;
diff --git a/sys/sys/buf.h b/sys/sys/buf.h
index bc8203f..116e011 100644
--- a/sys/sys/buf.h
+++ b/sys/sys/buf.h
@@ -466,6 +466,7 @@ buf_countdeps(struct buf *bp, int i)
 /* Flags to low-level allocation routines. */
 #define B_CLRBUF	0x01	/* Request allocated buffer be cleared. */
 #define B_SYNC		0x02	/* Do all allocations synchronously. */
+#define	B_METAONLY	0x04	/* Return indirect block buffer. */
 
 #ifdef _KERNEL
 extern int	nbuf;			/* The number of buffer headers */
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index d215351..fb80e5b 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -285,6 +285,7 @@ struct	proc {
 /* Marked a kernel thread */
 #define	P_BUFEXHAUST	0x100000 /* dirty buffers flush is in progress */
 #define	P_KTHREADP	0x200000 /* Process is really a kernel thread */
+#define	P_COWINPROGRESS	0x400000 /* Snapshot copy-on-write in progress */
 
 #define	P_DEADLKTREAT   0x800000 /* lock aquisition - deadlock treatment */
 
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index 5817855..3da7897 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -158,7 +158,7 @@ struct vnode {
 /* open for business    0x00800 */
 /* open for business    0x01000 */
 #define	VOBJBUF		0x02000	/* Allocate buffers in VM object */
-/* open for business    0x04000 */
+#define	VCOPYONWRITE    0x04000 /* vnode is doing copy-on-write */
 #define	VAGE		0x08000	/* Insert vnode at head of free list */
 #define	VOLOCK		0x10000	/* vnode is locked waiting for an object */
 #define	VOWANT		0x20000	/* a process is waiting for VOLOCK */
@@ -246,12 +246,15 @@ extern int		vttoif_tab[];
 /*
  * Flags to various vnode functions.
  */
-#define	SKIPSYSTEM	0x0001		/* vflush: skip vnodes marked VSYSTEM */
-#define	FORCECLOSE	0x0002		/* vflush: force file closure */
-#define	WRITECLOSE	0x0004		/* vflush: only close writable files */
-#define	DOCLOSE		0x0008		/* vclean: close active files */
-#define	V_SAVE		0x0001		/* vinvalbuf: sync file first */
-#define	REVOKEALL	0x0001		/* vop_revoke: revoke all aliases */
+#define	SKIPSYSTEM	0x0001	/* vflush: skip vnodes marked VSYSTEM */
+#define	FORCECLOSE	0x0002	/* vflush: force file closure */
+#define	WRITECLOSE	0x0004	/* vflush: only close writable files */
+#define	DOCLOSE		0x0008	/* vclean: close active files */
+#define	V_SAVE		0x0001	/* vinvalbuf: sync file first */
+#define	REVOKEALL	0x0001	/* vop_revoke: revoke all aliases */
+#define	V_WAIT		0x0001	/* vn_start_write: sleep for suspend */
+#define	V_NOWAIT	0x0002	/* vn_start_write: don't sleep for suspend */
+#define	V_XSLEEP	0x0004	/* vn_start_write: just return after sleep */
 
 #define	VREF(vp)	vref(vp)
 
@@ -572,6 +575,7 @@ int	vrecycle __P((struct vnode *vp, struct simplelock *inter_lkp,
 	    struct proc *p));
 int 	vn_close __P((struct vnode *vp,
 	    int flags, struct ucred *cred, struct proc *p));
+void	vn_finished_write __P((struct mount *mp));
 int	vn_isdisk __P((struct vnode *vp, int *errp));
 int	vn_lock __P((struct vnode *vp, int flags, struct proc *p));
 #ifdef	DEBUG_LOCKS
@@ -587,13 +591,18 @@ int 	vn_rdwr __P((enum uio_rw rw, struct vnode *vp, caddr_t base,
 	    int len, off_t offset, enum uio_seg segflg, int ioflg,
 	    struct ucred *cred, int *aresid, struct proc *p));
 int	vn_stat __P((struct vnode *vp, struct stat *sb, struct proc *p));
+int	vn_start_write __P((struct vnode *vp, struct mount **mpp, int flags));
 dev_t	vn_todev __P((struct vnode *vp));
+int	vn_write_suspend_wait __P((struct vnode *vp, int flags));
+int 	vn_writechk __P((struct vnode *vp));
 int	vfs_cache_lookup __P((struct vop_lookup_args *ap));
 int	vfs_object_create __P((struct vnode *vp, struct proc *p,
                 struct ucred *cred));
 void	vfs_timestamp __P((struct timespec *));
-int 	vn_writechk __P((struct vnode *vp));
+void	vfs_write_resume __P((struct mount *mp));
+void	vfs_write_suspend __P((struct mount *mp));
 int	vop_stdbwrite __P((struct vop_bwrite_args *ap));
+int	vop_stdgetwritemount __P((struct vop_getwritemount_args *));
 int	vop_stdislocked __P((struct vop_islocked_args *));
 int	vop_stdlock __P((struct vop_lock_args *));
 int	vop_stdunlock __P((struct vop_unlock_args *));
diff --git a/sys/ufs/ffs/README.snapshot b/sys/ufs/ffs/README.snapshot
new file mode 100644
index 0000000..f3177c3
--- /dev/null
+++ b/sys/ufs/ffs/README.snapshot
@@ -0,0 +1,112 @@
+$FreeBSD$
+
+Soft Updates Status
+
+As is detailed in the operational information below, snapshots
+are definitely alpha-test code and are NOT yet ready for production
+use. Much remains to be done to make them really useful, but I
+wanted to let folks get a chance to try it out and start reporting
+bugs and other shortcomings. Such reports should be sent to
+Kirk McKusick <mckusick@mckusick.com>.
+
+
+Snapshot Copyright Restrictions
+
+Snapshots have been introduced to FreeBSD with a `Berkeley-style'
+copyright. The file implementing snapshots resides in the sys/ufs/ffs
+directory and is compiled into the generic kernel by default.
+
+
+Using Snapshots
+
+To create a snapshot of your /var filesystem, run the command:
+
+	mount -u -o snapshot /var/snapshot/snap1 /var
+
+This command will take a snapshot of your /var filesystem and
+leave it in the file /var/snapshot/snap1. Note that snapshot
+files must be created in the filesystem that is being snapshotted.
+I use the convention of putting a `snapshot' directory at the
+root of each filesystem into which I can place snapshots.
+You may create up to 20 snapshots per filesystem. Active snapshots
+are recorded in the superblock, so they persist across unmount
+and remount operations and across system reboots. When your
+are done with a snapshot, it can be removed with the `rm'
+command. Snapshots may be removed in any order, however you
+may not get back all the space contained in the snapshot as
+another snapshot may claim some of the blocks that it is releasing. 
+Note that the `schg' flag is set on snapshots to ensure that
+not even the root user can write to them. The unlink command
+makes an exception for snapshot files in that it allows them
+to be removed even though they have the `schg' flag set, so it
+is not necessary to clear the `schg' flag before removing a
+snapshot file.
+
+Once you have taken a snapshot, there are three interesting
+things that you can do with it:
+
+1) Run fsck on the snapshot file. Assuming that the filesystem
+   was clean when it was mounted, you should always get a clean
+   (and unchanging) result from running fsck on the snapshot.
+   If you are running with soft updates and rebooted after a
+   crash without cleaning up the filesystem, then fsck of the
+   snapshot may find missing blocks and inodes or inodes with
+   link counts that are too high. I have not yet added the
+   system calls to allow fsck to add these missing resources
+   back to the filesystem - that will be added once the basic
+   snapshot code is working properly. So, view those reports
+   as informational for now.
+
+2) Run dump on the snapshot. You will get a dump that is
+   consistent with the filesystem as of the timestamp of the
+   snapshot. Note that I have not yet changed dump to set the
+   dumpdates file correctly, so do not use this feature in
+   production until that fix is made.
+
+3) Mount the snapshot as a frozen image of the filesystem.
+   To mount the snapshot /var/snapshot/snap1:
+
+	vnconfig -c vn0c /var/snapshot/snap1
+	mount -r /dev/vn0c /mnt
+
+   You can now cruise around your frozen /var filesystem
+   at /mnt. Everything will be in the same state that it
+   was at the time the snapshot was taken. The one exception
+   is that any earlier snapshots will appear as zero length
+   files. When you are done with the mounted snapshot:
+
+	umount /mnt
+	vnconfig -u vn0c
+
+   Note that under some circumstances, the process accessing
+   the frozen filesystem may deadlock. I am aware of this
+   problem, but the solution is not simple. It requires
+   using buffer read locks rather than exclusive locks when
+   traversing the inode indirect blocks. Until this problem
+   is fixed, you should avoid putting mounted snapshots into
+   production.
+
+
+Performance
+
+It takes about 30 seconds to create a snapshot of an 8Gb filesystem.
+Of that time 25 seconds is spent in preparation; filesystem activity
+is only suspended for the final 5 seconds of that period. Snapshot
+removal of an 8Gb filesystem takes about two minutes. Filesystem
+activity is never suspended during snapshot removal.
+
+The suspend time may be expanded by several minutes if a process
+is in the midst of removing many files as all the soft updates
+backlog must be cleared. Generally snapshots do not slow the system
+down appreciably except when removing many small files (i.e., any
+file less than 96Kb whose last block is a fragment) that are claimed
+by a snapshot. Here, the snapshot code must make a copy of every
+released fragment which slows the rate of file removal to about
+twenty files per second once the soft updates backlog limit is
+reached.
+
+
+How Snapshots Work
+
+For more general information on snapshots, please see:
+	http://www.mckusick.com/softdep/
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index 1f24b2b..5efe0e7 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -186,6 +186,8 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp)
 	*bpp = 0;
 	fs = ip->i_fs;
 #ifdef DIAGNOSTIC
+	if (ITOV(ip)->v_mount->mnt_kern_flag & MNTK_SUSPENDED)
+		panic("ffs_realloccg: allocation on suspended filesystem");
 	if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
 	    (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
 		printf(
@@ -763,6 +765,10 @@ ffs_hashalloc(ip, cg, pref, size, allocator)
 	long result;	/* XXX why not same type as we return? */
 	int i, icg = cg;
 
+#ifdef DIAGNOSTIC
+	if (ITOV(ip)->v_mount->mnt_kern_flag & MNTK_SUSPENDED)
+		panic("ffs_hashalloc: allocation on suspended filesystem");
+#endif
 	fs = ip->i_fs;
 	/*
 	 * 1: preferred cylinder group
@@ -1311,9 +1317,13 @@ ffs_blkfree(ip, bno, size)
 	ufs_daddr_t blkno;
 	int i, error, cg, blk, frags, bbase;
 	u_int8_t *blksfree;
+	struct vnode *vp;
 
 	fs = ip->i_fs;
-	VOP_FREEBLKS(ip->i_devvp, fsbtodb(fs, bno), size);
+#ifdef DIAGNOSTIC
+	if ((vp = ITOV(ip)) != NULL && vp->v_mount != NULL &&
+	    (vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED))
+		panic("ffs_blkfree: deallocation on suspended filesystem");
 	if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 ||
 	    fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) {
 		printf("dev=%s, bno = %ld, bsize = %ld, size = %ld, fs = %s\n",
@@ -1321,6 +1331,11 @@ ffs_blkfree(ip, bno, size)
 		    fs->fs_fsmnt);
 		panic("ffs_blkfree: bad size");
 	}
+#endif
+	if ((ip->i_devvp->v_flag & VCOPYONWRITE) &&
+	    ffs_snapblkfree(ip, bno, size))
+		return;
+	VOP_FREEBLKS(ip->i_devvp, fsbtodb(fs, bno), size);
 	cg = dtog(fs, bno);
 	if ((u_int)bno >= fs->fs_size) {
 		printf("bad block %ld, ino %lu\n",
diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c
index 28cc1ed..92fe379 100644
--- a/sys/ufs/ffs/ffs_balloc.c
+++ b/sys/ufs/ffs/ffs_balloc.c
@@ -125,6 +125,8 @@ ffs_balloc(ap)
 	 * The first NDADDR blocks are direct blocks
 	 */
 	if (lbn < NDADDR) {
+		if (flags & B_METAONLY)
+			panic("ffs_balloc: B_METAONLY for direct block");
 		nb = ip->i_db[lbn];
 		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
 			error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
@@ -289,6 +291,13 @@ ffs_balloc(ap)
 		}
 	}
 	/*
+	 * If asked only for the indirect block, then return it.
+	 */
+	if (flags & B_METAONLY) {
+		*ap->a_bpp = bp;
+		return (0);
+	}
+	/*
 	 * Get the data block, allocating if necessary.
 	 */
 	if (nb == 0) {
diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h
index fe7391b..8e011bb 100644
--- a/sys/ufs/ffs/ffs_extern.h
+++ b/sys/ufs/ffs/ffs_extern.h
@@ -67,6 +67,7 @@ struct vop_balloc_args;
 struct vop_bmap_args;
 struct vop_fsync_args;
 struct vop_reallocblks_args;
+struct vop_copyonwrite_args;
 
 int	ffs_alloc __P((struct inode *,
 	    ufs_daddr_t, ufs_daddr_t, int, struct ucred *, ufs_daddr_t *));
@@ -76,6 +77,7 @@ void	ffs_blkfree __P((struct inode *, ufs_daddr_t, long));
 ufs_daddr_t ffs_blkpref __P((struct inode *, ufs_daddr_t, int, ufs_daddr_t *));
 int	ffs_bmap __P((struct vop_bmap_args *));
 void	ffs_clrblock __P((struct fs *, u_char *, ufs_daddr_t));
+int	ffs_copyonwrite __P((struct vop_copyonwrite_args *ap));
 int	ffs_fhtovp __P((struct mount *, struct fid *, struct vnode **));
 int	ffs_flushfiles __P((struct mount *, int, struct proc *));
 void	ffs_fragacct __P((struct fs *, int, int32_t [], int));
@@ -89,6 +91,10 @@ int	ffs_reallocblks __P((struct vop_reallocblks_args *));
 int	ffs_realloccg __P((struct inode *,
 	    ufs_daddr_t, ufs_daddr_t, int, int, struct ucred *, struct buf **));
 void	ffs_setblock __P((struct fs *, u_char *, ufs_daddr_t));
+int	ffs_snapblkfree __P((struct inode *freeip, ufs_daddr_t bno, long size));
+int	ffs_snapshot __P((struct mount *mp, char *snapfile));
+void	ffs_snapshot_mount __P((struct mount *mp));
+void	ffs_snapshot_unmount __P((struct mount *mp));
 int	ffs_statfs __P((struct mount *, struct statfs *, struct proc *));
 int	ffs_sync __P((struct mount *, int, struct ucred *, struct proc *));
 int	ffs_truncate __P((struct vnode *, off_t, int, struct ucred *, struct proc *));
diff --git a/sys/ufs/ffs/ffs_snapshot.c b/sys/ufs/ffs/ffs_snapshot.c
new file mode 100644
index 0000000..73da537
--- /dev/null
+++ b/sys/ufs/ffs/ffs_snapshot.c
@@ -0,0 +1,1028 @@
+/*
+ * Copyright 2000 Marshall Kirk McKusick. All Rights Reserved.
+ *
+ * Further information about snapshots can be obtained from:
+ *
+ *	Marshall Kirk McKusick		http://www.mckusick.com/softdep/
+ *	1614 Oxford Street		mckusick@mckusick.com
+ *	Berkeley, CA 94709-1608		+1-510-843-9542
+ *	USA
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ffs_snapshot.c	8.10 (McKusick) 7/11/00
+ * $FreeBSD$
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/namei.h>
+#include <sys/stat.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/resource.h>
+#include <sys/resourcevar.h>
+#include <sys/vnode.h>
+
+#include <ufs/ufs/extattr.h>
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/ffs/fs.h>
+#include <ufs/ffs/ffs_extern.h>
+
+#define KERNCRED proc0.p_ucred
+#define CURPROC curproc
+#define DEBUG
+
+static int indiracct __P((struct vnode *, struct vnode *, int, ufs_daddr_t,
+	int, int, int, int));
+static int snapacct __P((struct vnode *, ufs_daddr_t *, ufs_daddr_t *));
+static int readblock __P((struct buf *, daddr_t));
+
+#ifdef DEBUG
+#include <sys/sysctl.h>
+int snapdebug = 0;
+SYSCTL_INT(_debug, OID_AUTO, snapdebug, CTLFLAG_RW, &snapdebug, 0, "");
+#endif /* DEBUG */
+
+/*
+ * Create a snapshot file and initialize it for the filesystem.
+ */
+int
+ffs_snapshot(mp, snapfile)
+	struct mount *mp;
+	char *snapfile;
+{
+	ufs_daddr_t rlbn;
+	ufs_daddr_t lbn, blkno, copyblkno, inoblks[FSMAXSNAP];
+	int error, cg, snaploc, indiroff, numblks;
+	int i, size, base, len, loc, inoblkcnt;
+	int blksperindir, flag = mp->mnt_flag;
+	struct fs *fs = VFSTOUFS(mp)->um_fs;
+	struct proc *p = CURPROC;
+	struct inode *devip, *ip, *xp;
+	struct buf *bp, *nbp, *ibp;
+	struct vnode *vp, *devvp;
+	struct nameidata nd;
+	struct mount *wrtmp;
+	struct dinode *dip;
+	struct vattr vat;
+	struct cg *cgp;
+
+	/*
+	 * Need to serialize access to snapshot code per filesystem.
+	 */
+	/*
+	 * Assign a snapshot slot in the superblock.
+	 */
+	for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++)
+		if (fs->fs_snapinum[snaploc] == 0)
+			break;
+	if (snaploc == FSMAXSNAP)
+		return (ENOSPC);
+	/*
+	 * Create the snapshot file.
+	 */
+restart:
+	NDINIT(&nd, CREATE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE, snapfile, p);
+	if ((error = namei(&nd)) != 0)
+		return (error);
+	if (nd.ni_vp != NULL) {
+		vput(nd.ni_vp);
+		error = EEXIST;
+	}
+	if (nd.ni_dvp->v_mount != mp)
+		error = EXDEV;
+	if (error) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		if (nd.ni_dvp == nd.ni_vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		return (error);
+	}
+	VATTR_NULL(&vat);
+	vat.va_type = VREG;
+	vat.va_mode = S_IRUSR;
+	vat.va_vaflags |= VA_EXCLUSIVE;
+	if (VOP_GETWRITEMOUNT(nd.ni_dvp, &wrtmp))
+		wrtmp = NULL;
+	if (wrtmp != mp)
+		panic("ffs_snapshot: mount mismatch");
+	if (vn_start_write(wrtmp, V_NOWAIT) != 0) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vput(nd.ni_dvp);
+		if ((error = vn_start_write(wrtmp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
+	}
+	VOP_LEASE(nd.ni_dvp, p, KERNCRED, LEASE_WRITE);
+	error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vat);
+	vput(nd.ni_dvp);
+	if (error) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vn_finished_write(wrtmp);
+		return (error);
+	}
+	vp = nd.ni_vp;
+	ip = VTOI(vp);
+	devvp = ip->i_devvp;
+	devip = VTOI(devvp);
+	/*
+	 * Allocate and copy the last block contents so as to be able
+	 * to set size to that of the filesystem.
+	 */
+	numblks = howmany(fs->fs_size, fs->fs_frag);
+	error = VOP_BALLOC(vp, lblktosize(fs, (off_t)(numblks - 1)),
+	    fs->fs_bsize, KERNCRED, B_CLRBUF, &bp);
+	if (error)
+		goto out;
+	ip->i_size = lblktosize(fs, (off_t)numblks);
+	ip->i_flag |= IN_CHANGE | IN_UPDATE;
+	if ((error = readblock(bp, numblks - 1)) != 0)
+		goto out;
+	bawrite(bp);
+	/*
+	 * Preallocate critical data structures so that we can copy
+	 * them in without further allocation after we suspend all
+	 * operations on the filesystem. We would like to just release
+	 * the allocated buffers without writing them since they will
+	 * be filled in below once we are ready to go, but this upsets
+	 * the soft update code, so we go ahead and write the new buffers.
+	 *
+	 * Allocate all indirect blocks. Also allocate shadow copies
+	 * for each of the indirect blocks.
+	 */
+	for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) {
+		error = VOP_BALLOC(vp, lblktosize(fs, (off_t)blkno),
+		    fs->fs_bsize, p->p_ucred, B_METAONLY, &ibp);
+		if (error)
+			goto out;
+		copyblkno = fragstoblks(fs, dbtofsb(fs, ibp->b_blkno));
+		bdwrite(ibp);
+		error = VOP_BALLOC(vp, lblktosize(fs, (off_t)copyblkno),
+		    fs->fs_bsize, p->p_ucred, 0, &nbp);
+		if (error)
+			goto out;
+		bawrite(nbp);
+	}
+	/*
+	 * Allocate shadow blocks to copy all of the other snapshot inodes
+	 * so that we will be able to expunge them from this snapshot.
+	 */
+	for (loc = 0, inoblkcnt = 0; loc < snaploc; loc++) {
+		blkno = fragstoblks(fs, ino_to_fsba(fs, fs->fs_snapinum[loc]));
+		for (i = 0; i < inoblkcnt; i++)
+			if (inoblks[i] == blkno)
+				break;
+		if (i == inoblkcnt) {
+			inoblks[inoblkcnt++] = blkno;
+			error = VOP_BALLOC(vp, lblktosize(fs, (off_t)blkno),
+			    fs->fs_bsize, KERNCRED, 0, &nbp);
+			if (error)
+				goto out;
+			bawrite(nbp);
+		}
+	}
+	/*
+	 * Allocate all cylinder group blocks.
+	 */
+	for (cg = 0; cg < fs->fs_ncg; cg++) {
+		error = VOP_BALLOC(vp, (off_t)(cgtod(fs, cg)) << fs->fs_fshift,
+		    fs->fs_bsize, KERNCRED, 0, &nbp);
+		if (error)
+			goto out;
+		bawrite(nbp);
+	}
+	/*
+	 * Allocate copies for the superblock and its summary information.
+	 */
+	error = VOP_BALLOC(vp, (off_t)(SBOFF), fs->fs_bsize, KERNCRED,
+	    0, &nbp);
+	if (error)
+		goto out;
+	bawrite(nbp);
+	blkno = fragstoblks(fs, fs->fs_csaddr);
+	len = howmany(fs->fs_cssize, fs->fs_bsize);
+	for (loc = 0; loc < len; loc++) {
+		error = VOP_BALLOC(vp, lblktosize(fs, (off_t)(blkno + loc)),
+		    fs->fs_bsize, KERNCRED, 0, &nbp);
+		if (error)
+			goto out;
+		bawrite(nbp);
+	}
+	/*
+	 * Change inode to snapshot type file.
+	 */
+	ip->i_flags |= SF_IMMUTABLE | SF_SNAPSHOT;
+	ip->i_flag |= IN_CHANGE | IN_UPDATE;
+	/*
+	 * Ensure that the snapshot is completely on disk.
+	 */
+	if ((error = VOP_FSYNC(vp, KERNCRED, MNT_WAIT, p)) != 0)
+		goto out;
+	/*
+	 * All allocations are done, so we can now snapshot the system.
+	 *
+	 * Suspend operation on filesystem.
+	 */
+	for (;;) {
+		vn_finished_write(wrtmp);
+		vfs_write_suspend(vp->v_mount);
+		if (mp->mnt_kern_flag & MNTK_SUSPENDED)
+			break;
+		vn_start_write(wrtmp, V_WAIT);
+	}
+	/*
+	 * First, copy all the cylinder group maps. All the unallocated
+	 * blocks are marked BLK_NOCOPY so that the snapshot knows that
+	 * it need not copy them if they are later written.
+	 */
+	len = howmany(fs->fs_fpg, fs->fs_frag);
+	for (cg = 0; cg < fs->fs_ncg; cg++) {
+		error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
+			(int)fs->fs_cgsize, KERNCRED, &bp);
+		if (error) {
+			brelse(bp);
+			goto out1;
+		}
+		cgp = (struct cg *)bp->b_data;
+		if (!cg_chkmagic(cgp)) {
+			brelse(bp);
+			error = EIO;
+			goto out1;
+		}
+		error = bread(vp, fragstoblks(fs, cgtod(fs, cg)), fs->fs_bsize,
+			KERNCRED, &nbp);
+		if (error) {
+			brelse(bp);
+			brelse(nbp);
+			goto out1;
+		}
+		bcopy(bp->b_data, nbp->b_data, fs->fs_cgsize);
+		if (fs->fs_cgsize < fs->fs_bsize)
+			bzero(&nbp->b_data[fs->fs_cgsize],
+			    fs->fs_bsize - fs->fs_cgsize);
+		bawrite(nbp);
+		base = cg * fs->fs_fpg / fs->fs_frag;
+		if (base + len > numblks)
+			len = numblks - base;
+		loc = 0;
+		if (base < NDADDR) {
+			for ( ; loc < NDADDR; loc++) {
+				if (!ffs_isblock(fs, cg_blksfree(cgp), loc))
+					continue;
+				ip->i_db[loc] = BLK_NOCOPY;
+			}
+		}
+		error = VOP_BALLOC(vp, lblktosize(fs, (off_t)(base + loc)),
+		    fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
+		if (error) {
+			brelse(bp);
+			goto out1;
+		}
+		indiroff = (base + loc - NDADDR) % NINDIR(fs);
+		for ( ; loc < len; loc++, indiroff++) {
+			if (indiroff >= NINDIR(fs)) {
+				bawrite(ibp);
+				error = VOP_BALLOC(vp,
+				    lblktosize(fs, (off_t)(base + loc)),
+				    fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
+				if (error) {
+					brelse(bp);
+					goto out1;
+				}
+				indiroff = 0;
+			}
+			if (!ffs_isblock(fs, cg_blksfree(cgp), loc))
+				continue;
+			((ufs_daddr_t *)(ibp->b_data))[indiroff] = BLK_NOCOPY;
+		}
+		brelse(bp);
+		bdwrite(ibp);
+	}
+	/*
+	 * Snapshot the superblock and its summary information.
+	 */
+	error = VOP_BALLOC(vp, (off_t)(SBOFF), fs->fs_bsize, KERNCRED,
+	    0, &nbp);
+	if (error)
+		goto out1;
+	bcopy(fs, nbp->b_data, fs->fs_sbsize);
+	((struct fs *)(nbp->b_data))->fs_clean = 1;
+	if (fs->fs_sbsize < fs->fs_bsize)
+		bzero(&nbp->b_data[fs->fs_sbsize],
+		    fs->fs_bsize - fs->fs_sbsize);
+	bawrite(nbp);
+	blkno = fragstoblks(fs, fs->fs_csaddr);
+	len = howmany(fs->fs_cssize, fs->fs_bsize) - 1;
+	size = fs->fs_bsize;
+	for (loc = 0; loc <= len; loc++) {
+		error = VOP_BALLOC(vp, lblktosize(fs, (off_t)(blkno + loc)),
+		    fs->fs_bsize, KERNCRED, 0, &nbp);
+		if (error)
+			goto out1;
+		if (loc == len) {
+			readblock(nbp, blkno + loc);
+			size = fs->fs_cssize % fs->fs_bsize;
+		}
+		bcopy(fs->fs_csp[loc], nbp->b_data, size);
+		bawrite(nbp);
+	}
+	/*
+	 * Copy the shadow blocks for the snapshot inodes so that
+	 * the copies can can be expunged.
+	 */
+	for (loc = 0; loc < inoblkcnt; loc++) {
+		error = VOP_BALLOC(vp, lblktosize(fs, (off_t)inoblks[loc]),
+		    fs->fs_bsize, KERNCRED, 0, &nbp);
+		if (error)
+			goto out1;
+		readblock(nbp, inoblks[loc]);
+		bdwrite(nbp);
+	}
+	/*
+	 * Copy allocation information from other snapshots and then
+	 * expunge them from the view of the current snapshot.
+	 */
+	for (xp = devip->i_copyonwrite; xp; xp = xp->i_copyonwrite) {
+		/*
+		 * Before expunging a snapshot inode, note all the
+		 * blocks that it claims with BLK_SNAP so that fsck will
+		 * be able to account for those blocks properly and so
+		 * that this snapshot knows that it need not copy them
+		 * if the other snapshot holding them is freed.
+		 */
+		if ((error = snapacct(vp, &xp->i_db[0], &xp->i_ib[NIADDR])) !=0)
+			goto out1;
+		blksperindir = 1;
+		lbn = -NDADDR;
+		len = numblks - NDADDR;
+		rlbn = NDADDR;
+		for (i = 0; len > 0 && i < NIADDR; i++) {
+			error = indiracct(vp, ITOV(xp), i, xp->i_ib[i], lbn,
+			    rlbn, len, blksperindir);
+			if (error)
+				goto out1;
+			blksperindir *= NINDIR(fs);
+			lbn -= blksperindir + 1;
+			len -= blksperindir;
+			rlbn += blksperindir;
+		}
+		/*
+		 * Set copied snapshot inode to be a zero length file.
+		 */
+		blkno = fragstoblks(fs, ino_to_fsba(fs, xp->i_number));
+		error = VOP_BALLOC(vp, lblktosize(fs, (off_t)blkno),
+		    fs->fs_bsize, KERNCRED, 0, &nbp);
+		if (error)
+			goto out1;
+		dip = (struct dinode *)nbp->b_data +
+		    ino_to_fsbo(fs, xp->i_number);
+		dip->di_size = 0;
+		dip->di_blocks = 0;
+		dip->di_flags &= ~(SF_IMMUTABLE | SF_SNAPSHOT);
+		bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs_daddr_t));
+		bdwrite(nbp);
+	}
+	/*
+	 * Copy all indirect blocks to their shadows (allocated above)
+	 * to avoid deadlock in ffs_copyonwrite.
+	 */
+	for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) {
+		error = VOP_BALLOC(vp, lblktosize(fs, (off_t)blkno),
+		    fs->fs_bsize, p->p_ucred, B_METAONLY, &ibp);
+		if (error)
+			goto out1;
+		copyblkno = fragstoblks(fs, dbtofsb(fs, ibp->b_blkno));
+		brelse(ibp);
+		error = VOP_BALLOC(vp, lblktosize(fs, (off_t)copyblkno),
+		    fs->fs_bsize, p->p_ucred, 0, &nbp);
+		if (error)
+			goto out1;
+		error = VOP_BALLOC(vp, lblktosize(fs, (off_t)blkno),
+		    fs->fs_bsize, p->p_ucred, B_METAONLY, &ibp);
+		if (error) {
+			brelse(nbp);
+			goto out1;
+		}
+		bcopy(ibp->b_data, nbp->b_data, fs->fs_bsize);
+		brelse(ibp);
+		bawrite(nbp);
+	}
+	/*
+	 * Record snapshot inode. Since this is the newest snapshot,
+	 * it must be placed at the end of the list.
+	 */
+	fs->fs_snapinum[snaploc] = ip->i_number;
+	if (ip->i_copyonwrite != 0)
+		panic("ffs_snapshot: %d already on list", ip->i_number);
+	if (devip->i_copyonwrite == 0) {
+		devvp->v_flag |= VCOPYONWRITE;
+		devip->i_copyonwrite = ip;
+	} else {
+		for (xp = devip->i_copyonwrite; xp->i_copyonwrite != 0; )
+			xp = xp->i_copyonwrite;
+		xp->i_copyonwrite = ip;
+	}
+	vp->v_flag |= VSYSTEM;
+	/*
+	 * Resume operation on filesystem.
+	 */
+out1:
+	vfs_write_resume(vp->v_mount);
+	vn_start_write(wrtmp, V_WAIT);
+out:
+	mp->mnt_flag = flag;
+	(void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, p);
+	if (error)
+		vput(vp);
+	else
+		VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(wrtmp);
+	return (error);
+}
+
+/*
+ * Descend an indirect block chain for vnode cancelvp accounting for all
+ * its indirect blocks in snapvp.
+ */ 
+static int
+indiracct(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks, blksperindir)
+	struct vnode *snapvp;
+	struct vnode *cancelvp;
+	int level;
+	ufs_daddr_t blkno;
+	int lbn;
+	int rlbn;
+	int remblks;
+	int blksperindir;
+{
+	int subblksperindir, error, last, num, i;
+	struct indir indirs[NIADDR + 2];
+	ufs_daddr_t *bap;
+	struct buf *bp;
+	struct fs *fs;
+
+	if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0)
+		return (error);
+	if (lbn != indirs[num - 1 - level].in_lbn || blkno == 0 || num < 2)
+		panic("indiracct: botched params");
+	/*
+	 * We have to expand bread here since it will deadlock looking
+	 * up the block number for any blocks that are not in the cache.
+	 */
+	fs = VTOI(cancelvp)->i_fs;
+	bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0);
+	bp->b_blkno = fsbtodb(fs, blkno);
+	if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 &&
+	    (error = readblock(bp, fragstoblks(fs, blkno)))) {
+		brelse(bp);
+		return (error);
+	}
+	/*
+	 * Account for the block pointers in this indirect block.
+	 */
+	last = howmany(remblks, blksperindir);
+	if (last > NINDIR(fs))
+		last = NINDIR(fs);
+	if (snapvp != cancelvp) {
+		bap = (ufs_daddr_t *)bp->b_data;
+	} else {
+		MALLOC(bap, ufs_daddr_t *, fs->fs_bsize, M_DEVBUF, M_WAITOK);
+		bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize);
+		brelse(bp);
+	}
+	error = snapacct(snapvp, &bap[0], &bap[last]);
+	if (error || level == 0)
+		goto out;
+	/*
+	 * Account for the block pointers in each of the indirect blocks
+	 * in the levels below us.
+	 */
+	subblksperindir = blksperindir / NINDIR(fs);
+	for (lbn++, level--, i = 0; i < last; i++) {
+		error = indiracct(snapvp, cancelvp, level, bap[i], lbn,
+		    rlbn, remblks, subblksperindir);
+		if (error)
+			goto out;
+		rlbn += blksperindir;
+		lbn -= blksperindir;
+		remblks -= blksperindir;
+	}
+out:
+	if (snapvp != cancelvp)
+		brelse(bp);
+	else
+		FREE(bap, M_DEVBUF);
+	return (error);
+}
+
+/*
+ * Account for a set of blocks allocated in a snapshot inode.
+ */
+static int
+snapacct(vp, oldblkp, lastblkp)
+	struct vnode *vp;
+	ufs_daddr_t *oldblkp, *lastblkp;
+{
+	struct inode *ip = VTOI(vp);
+	struct fs *fs = ip->i_fs;
+	ufs_daddr_t lbn, blkno, *blkp;
+	struct buf *ibp;
+	int error;
+
+	for ( ; oldblkp < lastblkp; oldblkp++) {
+		blkno = *oldblkp;
+		if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP)
+			continue;
+		lbn = fragstoblks(fs, blkno);
+		if (lbn < NDADDR) {
+			blkp = &ip->i_db[lbn];
+			ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		} else {
+			error = VOP_BALLOC(vp, lblktosize(fs, (off_t)lbn),
+			    fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
+			if (error)
+				return (error);
+			blkp = &((ufs_daddr_t *)(ibp->b_data))
+			    [(lbn - NDADDR) % NINDIR(fs)];
+		}
+		if (*blkp != 0)
+			panic("snapacct: bad block");
+		*blkp = BLK_SNAP;
+		if (lbn >= NDADDR)
+			bdwrite(ibp);
+	}
+	return (0);
+}
+
+/*
+ * Prepare a snapshot file for being removed.
+ */
+void
+ffs_snapremove(vp)
+	struct vnode *vp;
+{
+	struct inode *ip, *xp;
+	struct vnode *devvp;
+	struct buf *ibp;
+	struct fs *fs;
+	ufs_daddr_t blkno, dblk;
+	int error, snaploc, loc, last;
+
+	ip = VTOI(vp);
+	fs = ip->i_fs;
+	/*
+	 * Delete snapshot inode from superblock. Keep list dense.
+	 */
+	for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++)
+		if (fs->fs_snapinum[snaploc] == ip->i_number)
+			break;
+	if (snaploc < FSMAXSNAP) {
+		for (snaploc++; snaploc < FSMAXSNAP; snaploc++) {
+			if (fs->fs_snapinum[snaploc] == 0)
+				break;
+			fs->fs_snapinum[snaploc - 1] = fs->fs_snapinum[snaploc];
+		}
+		fs->fs_snapinum[snaploc - 1] = 0;
+	}
+	/*
+	 * Delete from incore list.
+	 * Clear copy-on-write flag if last snapshot.
+	 */
+	devvp = ip->i_devvp;
+	for (xp = VTOI(devvp); xp; xp = xp->i_copyonwrite) {
+		if (xp->i_copyonwrite != ip)
+			continue;
+		xp->i_copyonwrite = ip->i_copyonwrite;
+		ip->i_copyonwrite = 0;
+		break;
+	}
+	if (xp == 0) {
+		printf("ffs_snapremove: lost snapshot vnode %d\n",
+		    ip->i_number);
+		vref(vp);
+	}
+	if (VTOI(devvp)->i_copyonwrite == 0)
+		devvp->v_flag &= ~VCOPYONWRITE;
+	/*
+	 * Clear all BLK_NOCOPY fields. Pass any block claims to other
+	 * snapshots that want them (see ffs_snapblkfree below).
+	 */
+	for (blkno = 1; blkno < NDADDR; blkno++) {
+		dblk = ip->i_db[blkno];
+		if (dblk == BLK_NOCOPY || dblk == BLK_SNAP ||
+		    (dblk == blkstofrags(fs, blkno) &&
+		     ffs_snapblkfree(ip, dblk, fs->fs_bsize)))
+			ip->i_db[blkno] = 0;
+	}
+	for (blkno = NDADDR; blkno < fs->fs_size; blkno += NINDIR(fs)) {
+		error = VOP_BALLOC(vp, lblktosize(fs, (off_t)blkno),
+		    fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
+		if (error)
+			continue;
+		if ((last = fs->fs_size - blkno) > NINDIR(fs))
+			last = NINDIR(fs);
+		for (loc = 0; loc < last; loc++) {
+			dblk = ((ufs_daddr_t *)(ibp->b_data))[loc];
+			if (dblk == BLK_NOCOPY || dblk == BLK_SNAP ||
+			    (dblk == blkstofrags(fs, blkno) &&
+			     ffs_snapblkfree(ip, dblk, fs->fs_bsize)))
+				((ufs_daddr_t *)(ibp->b_data))[loc] = 0;
+		}
+		bawrite(ibp);
+	}
+	/*
+	 * Clear snapshot flag and drop reference.
+	 */
+	ip->i_flags &= ~(SF_IMMUTABLE | SF_SNAPSHOT);
+	ip->i_flag |= IN_CHANGE | IN_UPDATE;
+	vrele(vp);
+}
+
+/*
+ * Notification that a block is being freed. Return zero if the free
+ * should be allowed to proceed. Return non-zero if the snapshot file
+ * wants to claim the block. The block will be claimed if it is an
+ * uncopied part of one of the snapshots. It will be freed if it is
+ * either a BLK_NOCOPY or has already been copied in all of the snapshots.
+ * If a fragment is being freed, then all snapshots that care about
+ * it must make a copy since a snapshot file can only claim full sized
+ * blocks. Note that if more than one snapshot file maps the block,
+ * we can pick one at random to claim it. Since none of the snapshots
+ * can change, we are assurred that they will all see the same unmodified
+ * image. When deleting a snapshot file (see ffs_snapremove above), we
+ * must push any of these claimed blocks to one of the other snapshots
+ * that maps it. These claimed blocks are easily identified as they will
+ * have a block number equal to their logical block number within the
+ * snapshot. A copied block can never have this property because they
+ * must always have been allocated from a BLK_NOCOPY location.
+ */
+int
+ffs_snapblkfree(freeip, bno, size)
+	struct inode *freeip;
+	ufs_daddr_t bno;
+	long size;
+{
+	struct buf *ibp, *cbp, *savedcbp = 0;
+	struct fs *fs = freeip->i_fs;
+	struct proc *p = CURPROC;
+	struct inode *ip;
+	struct vnode *vp;
+	ufs_daddr_t lbn, blkno;
+	int indiroff = 0, error = 0, claimedblk = 0;
+
+	lbn = fragstoblks(fs, bno);
+	for (ip = VTOI(freeip->i_devvp)->i_copyonwrite; ip;
+	     ip = ip->i_copyonwrite) {
+		vp = ITOV(ip);
+		/*
+		 * Lookup block being written.
+		 */
+		if (lbn < NDADDR) {
+			blkno = ip->i_db[lbn];
+		} else {
+			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+			p->p_flag |= P_COWINPROGRESS;
+			error = VOP_BALLOC(vp, lblktosize(fs, (off_t)lbn),
+			    fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
+			p->p_flag &= ~P_COWINPROGRESS;
+			VOP_UNLOCK(vp, 0, p);
+			if (error)
+				break;
+			indiroff = (lbn - NDADDR) % NINDIR(fs);
+			blkno = ((ufs_daddr_t *)(ibp->b_data))[indiroff];
+		}
+		/*
+		 * Check to see if block needs to be copied.
+		 */
+		switch (blkno) {
+		/*
+		 * If the snapshot has already copied the block (default),
+		 * or does not care about the block, it is not needed.
+		 */
+		default:
+		case BLK_NOCOPY:
+			if (lbn >= NDADDR)
+				brelse(ibp);
+			continue;
+		/*
+		 * No previous snapshot claimed the block, so it will be
+		 * freed and become a BLK_NOCOPY (don't care) for us.
+		 */
+		case BLK_SNAP:
+			if (claimedblk)
+				panic("snapblkfree: inconsistent block type");
+			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+			if (lbn < NDADDR) {
+				ip->i_db[lbn] = BLK_NOCOPY;
+				ip->i_flag |= IN_CHANGE | IN_UPDATE;
+			} else {
+				((ufs_daddr_t *)(ibp->b_data))[indiroff] =
+				    BLK_NOCOPY;
+				bdwrite(ibp);
+			}
+			VOP_UNLOCK(vp, 0, p);
+			continue;
+		/*
+		 * A block that we map is being freed. If it has not been
+		 * claimed yet, we will claim or copy it (below).
+		 */
+		case 0:
+			claimedblk = 1;
+			break;
+		}
+		/*
+		 * If this is a full size block, we will just grab it
+		 * and assign it to the snapshot inode. Otherwise we
+		 * will proceed to copy it. See explanation for this
+		 * routine as to why only a single snapshot needs to
+		 * claim this block.
+		 */
+		if (size == fs->fs_bsize) {
+#ifdef DEBUG
+			if (snapdebug)
+				printf("%s %d lbn %d from inum %d\n",
+				    "Grabonremove: snapino", ip->i_number, lbn,
+				    freeip->i_number);
+#endif
+			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+			if (lbn < NDADDR) {
+				ip->i_db[lbn] = bno;
+			} else {
+				((ufs_daddr_t *)(ibp->b_data))[indiroff] = bno;
+				bdwrite(ibp);
+			}
+			ip->i_blocks += btodb(size);
+			ip->i_flag |= IN_CHANGE | IN_UPDATE;
+			VOP_UNLOCK(vp, 0, p);
+			return (1);
+		}
+		if (lbn >= NDADDR)
+			brelse(ibp);
+		/*
+		 * Allocate the block into which to do the copy. Note that this
+		 * allocation will never require any additional allocations for
+		 * the snapshot inode.
+		 */
+		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+		p->p_flag |= P_COWINPROGRESS;
+		error = VOP_BALLOC(vp, lblktosize(fs, (off_t)lbn),
+		    fs->fs_bsize, KERNCRED, 0, &cbp);
+		p->p_flag &= ~P_COWINPROGRESS;
+		VOP_UNLOCK(vp, 0, p);
+		if (error)
+			break;
+#ifdef DEBUG
+		if (snapdebug)
+			printf("%s%d lbn %d for inum %d size %ld to blkno %d\n",
+			    "Copyonremove: snapino ", ip->i_number, lbn,
+			    freeip->i_number, size, cbp->b_blkno);
+#endif
+		/*
+		 * If we have already read the old block contents, then
+		 * simply copy them to the new block.
+		 */
+		if (savedcbp != 0) {
+			bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize);
+			bawrite(cbp);
+			continue;
+		}
+		/*
+		 * Otherwise, read the old block contents into the buffer.
+		 */
+		if ((error = readblock(cbp, lbn)) != 0)
+			break;
+		savedcbp = cbp;
+	}
+	if (savedcbp)
+		bawrite(savedcbp);
+	/*
+	 * If we have been unable to allocate a block in which to do
+	 * the copy, then return non-zero so that the fragment will
+	 * not be freed. Although space will be lost, the snapshot
+	 * will stay consistent.
+	 */
+	return (error);
+}
+
+/*
+ * Associate snapshot files when mounting.
+ */
+void
+ffs_snapshot_mount(mp)
+	struct mount *mp;
+{
+	struct ufsmount *ump = VFSTOUFS(mp);
+	struct fs *fs = ump->um_fs;
+	struct proc *p = CURPROC;
+	struct inode *ip, **listtailp;
+	struct vnode *vp;
+	int error, snaploc, loc;
+
+	listtailp = &VTOI(ump->um_devvp)->i_copyonwrite;
+	for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) {
+		if (fs->fs_snapinum[snaploc] == 0)
+			return;
+		if ((error = VFS_VGET(mp, fs->fs_snapinum[snaploc], &vp)) != 0){
+			printf("ffs_snapshot_mount: vget failed %d\n", error);
+			continue;
+		}
+		ip = VTOI(vp);
+		if ((ip->i_flags & SF_SNAPSHOT) == 0) {
+			printf("ffs_snapshot_mount: non-snapshot inode %d\n",
+			    fs->fs_snapinum[snaploc]);
+			vput(vp);
+			for (loc = snaploc + 1; loc < FSMAXSNAP; loc++) {
+				if (fs->fs_snapinum[loc] == 0)
+					break;
+				fs->fs_snapinum[loc - 1] = fs->fs_snapinum[loc];
+			}
+			fs->fs_snapinum[loc - 1] = 0;
+			snaploc--;
+			continue;
+		}
+		if (ip->i_copyonwrite != 0)
+			panic("ffs_snapshot_mount: %d already on list",
+			    ip->i_number);
+		*listtailp = ip;
+		listtailp = &ip->i_copyonwrite;
+		vp->v_flag |= VSYSTEM;
+		VOP_UNLOCK(vp, 0, p);
+		ump->um_devvp->v_flag |= VCOPYONWRITE;
+	}
+}
+
+/*
+ * Disassociate snapshot files when unmounting.
+ */
+void
+ffs_snapshot_unmount(mp)
+	struct mount *mp;
+{
+	struct ufsmount *ump = VFSTOUFS(mp);
+	struct inode *devip = VTOI(ump->um_devvp);
+	struct inode *xp;
+
+	while ((xp = devip->i_copyonwrite) != 0) {
+		devip->i_copyonwrite = xp->i_copyonwrite;
+		xp->i_copyonwrite = 0;
+		vrele(ITOV(xp));
+	}
+	ump->um_devvp->v_flag &= ~VCOPYONWRITE;
+}
+
+/*
+ * Check for need to copy block that is about to be written,
+ * copying the block if necessary.
+ */
+int
+ffs_copyonwrite(ap)
+	struct vop_copyonwrite_args /* {
+		struct vnode *a_vp;
+		struct buf *a_bp;
+	} */ *ap;
+{
+	struct buf *ibp, *cbp, *savedcbp = 0, *bp = ap->a_bp;
+	struct fs *fs = VTOI(bp->b_vp)->i_fs;
+	struct proc *p = CURPROC;
+	struct inode *ip;
+	struct vnode *vp;
+	ufs_daddr_t lbn, blkno;
+	int indiroff, error = 0;
+
+	lbn = fragstoblks(fs, dbtofsb(fs, bp->b_blkno));
+	if (p->p_flag & P_COWINPROGRESS)
+		panic("ffs_copyonwrite: recursive call");
+	for (ip = VTOI(ap->a_vp)->i_copyonwrite; ip; ip = ip->i_copyonwrite) {
+		vp = ITOV(ip);
+		/*
+		 * We ensure that everything of our own that needs to be
+		 * copied will be done at the time that ffs_snapshot is
+		 * called. Thus we can skip the check here which can
+		 * deadlock in doing the lookup in VOP_BALLOC.
+		 */
+		if (bp->b_vp == vp)
+			continue;
+		/*
+		 * Check to see if block needs to be copied.
+		 */
+		if (lbn < NDADDR) {
+			blkno = ip->i_db[lbn];
+		} else {
+			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+			p->p_flag |= P_COWINPROGRESS;
+			error = VOP_BALLOC(vp, lblktosize(fs, (off_t)lbn),
+			    fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
+			p->p_flag &= ~P_COWINPROGRESS;
+			VOP_UNLOCK(vp, 0, p);
+			if (error)
+				break;
+			indiroff = (lbn - NDADDR) % NINDIR(fs);
+			blkno = ((ufs_daddr_t *)(ibp->b_data))[indiroff];
+			brelse(ibp);
+		}
+#ifdef DIAGNOSTIC
+		if (blkno == BLK_SNAP && bp->b_lblkno >= 0)
+			panic("ffs_copyonwrite: bad copy block");
+#endif
+		if (blkno != 0)
+			continue;
+		/*
+		 * Allocate the block into which to do the copy. Note that this
+		 * allocation will never require any additional allocations for
+		 * the snapshot inode.
+		 */
+		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+		p->p_flag |= P_COWINPROGRESS;
+		error = VOP_BALLOC(vp, lblktosize(fs, (off_t)lbn),
+		    fs->fs_bsize, KERNCRED, 0, &cbp);
+		p->p_flag &= ~P_COWINPROGRESS;
+		VOP_UNLOCK(vp, 0, p);
+#ifdef DEBUG
+		if (snapdebug) {
+			printf("Copyonwrite: snapino %d lbn %d for ",
+			    ip->i_number, lbn);
+			if (bp->b_vp == ap->a_vp)
+				printf("fs metadata");
+			else
+				printf("inum %d", VTOI(bp->b_vp)->i_number);
+			printf(" lblkno %d to blkno %d\n", bp->b_lblkno,
+			    cbp->b_blkno);
+		}
+#endif
+		if (error)
+			break;
+		/*
+		 * If we have already read the old block contents, then
+		 * simply copy them to the new block.
+		 */
+		if (savedcbp != 0) {
+			bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize);
+			bawrite(cbp);
+			continue;
+		}
+		/*
+		 * Otherwise, read the old block contents into the buffer.
+		 */
+		if ((error = readblock(cbp, lbn)) != 0)
+			break;
+		savedcbp = cbp;
+	}
+	if (savedcbp)
+		bawrite(savedcbp);
+	return (error);
+}
+
+/*
+ * Read the specified block into the given buffer.
+ * Much of this boiler-plate comes from bwrite().
+ */
+static int
+readblock(bp, lbn)
+	struct buf *bp;
+	daddr_t lbn;
+{
+	struct uio auio;
+	struct iovec aiov;
+	struct proc *p = CURPROC;
+	struct inode *ip = VTOI(bp->b_vp);
+
+	aiov.iov_base = bp->b_data;
+	aiov.iov_len = bp->b_bcount;
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	auio.uio_offset = dbtob(fsbtodb(ip->i_fs, blkstofrags(ip->i_fs, lbn)));
+	auio.uio_resid = bp->b_bcount;
+	auio.uio_rw = UIO_READ;
+	auio.uio_segflg = UIO_SYSSPACE;
+	auio.uio_procp = p;
+	return (physio(ip->i_devvp->v_rdev, &auio, 0));
+}
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index 40e9669..d9e6414 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -508,7 +508,7 @@ softdep_process_worklist(matchmnt)
 {
 	struct proc *p = CURPROC;
 	struct worklist *wk;
-	struct fs *matchfs;
+	struct mount *mp;
 	int matchcnt, loopcount;
 
 	/*
@@ -517,9 +517,6 @@ softdep_process_worklist(matchmnt)
 	 */
 	filesys_syncer = p;
 	matchcnt = 0;
-	matchfs = NULL;
-	if (matchmnt != NULL)
-		matchfs = VFSTOUFS(matchmnt)->um_fs;
 	/*
 	 * There is no danger of having multiple processes run this
 	 * code. It is single threaded solely so that softdep_flushfiles
@@ -550,30 +547,42 @@ softdep_process_worklist(matchmnt)
 
 		case D_DIRREM:
 			/* removal of a directory entry */
-			if (WK_DIRREM(wk)->dm_mnt == matchmnt)
+			mp = WK_DIRREM(wk)->dm_mnt;
+			if (mp == matchmnt)
 				matchcnt += 1;
+			vn_start_write(NULL, &mp, V_WAIT);
 			handle_workitem_remove(WK_DIRREM(wk));
+			vn_finished_write(mp);
 			break;
 
 		case D_FREEBLKS:
 			/* releasing blocks and/or fragments from a file */
-			if (WK_FREEBLKS(wk)->fb_fs == matchfs)
+			mp = WK_FREEBLKS(wk)->fb_mnt;
+			if (mp == matchmnt)
 				matchcnt += 1;
+			vn_start_write(NULL, &mp, V_WAIT);
 			handle_workitem_freeblocks(WK_FREEBLKS(wk));
+			vn_finished_write(mp);
 			break;
 
 		case D_FREEFRAG:
 			/* releasing a fragment when replaced as a file grows */
-			if (WK_FREEFRAG(wk)->ff_fs == matchfs)
+			mp = WK_FREEFRAG(wk)->ff_mnt;
+			if (mp == matchmnt)
 				matchcnt += 1;
+			vn_start_write(NULL, &mp, V_WAIT);
 			handle_workitem_freefrag(WK_FREEFRAG(wk));
+			vn_finished_write(mp);
 			break;
 
 		case D_FREEFILE:
 			/* releasing an inode when its link count drops to 0 */
-			if (WK_FREEFILE(wk)->fx_fs == matchfs)
+			mp = WK_FREEFILE(wk)->fx_mnt;
+			if (mp == matchmnt)
 				matchcnt += 1;
+			vn_start_write(NULL, &mp, V_WAIT);
 			handle_workitem_freefile(WK_FREEFILE(wk));
+			vn_finished_write(mp);
 			break;
 
 		default:
@@ -1316,7 +1325,7 @@ newfreefrag(ip, blkno, size)
 	freefrag->ff_list.wk_type = D_FREEFRAG;
 	freefrag->ff_state = ip->i_uid & ~ONWORKLIST;	/* XXX - used below */
 	freefrag->ff_inum = ip->i_number;
-	freefrag->ff_fs = fs;
+	freefrag->ff_mnt = ITOV(ip)->v_mount;
 	freefrag->ff_devvp = ip->i_devvp;
 	freefrag->ff_blkno = blkno;
 	freefrag->ff_fragsize = size;
@@ -1333,7 +1342,8 @@ handle_workitem_freefrag(freefrag)
 {
 	struct inode tip;
 
-	tip.i_fs = freefrag->ff_fs;
+	tip.i_vnode = NULL;
+	tip.i_fs = VFSTOUFS(freefrag->ff_mnt)->um_fs;
 	tip.i_devvp = freefrag->ff_devvp;
 	tip.i_dev = freefrag->ff_devvp->v_rdev;
 	tip.i_number = freefrag->ff_inum;
@@ -1601,7 +1611,7 @@ softdep_setup_freeblocks(ip, length)
 	freeblks->fb_uid = ip->i_uid;
 	freeblks->fb_previousinum = ip->i_number;
 	freeblks->fb_devvp = ip->i_devvp;
-	freeblks->fb_fs = fs;
+	freeblks->fb_mnt = ITOV(ip)->v_mount;
 	freeblks->fb_oldsize = ip->i_size;
 	freeblks->fb_newsize = length;
 	freeblks->fb_chkcnt = ip->i_blocks;
@@ -1845,7 +1855,7 @@ softdep_freefile(pvp, ino, mode)
 	freefile->fx_mode = mode;
 	freefile->fx_oldinum = ino;
 	freefile->fx_devvp = ip->i_devvp;
-	freefile->fx_fs = ip->i_fs;
+	freefile->fx_mnt = ITOV(ip)->v_mount;
 
 	/*
 	 * If the inodedep does not exist, then the zero'ed inode has
@@ -1949,13 +1959,13 @@ handle_workitem_freeblocks(freeblks)
 	int error, allerror = 0;
 	ufs_lbn_t baselbns[NIADDR], tmpval;
 
+	tip.i_fs = fs = VFSTOUFS(freeblks->fb_mnt)->um_fs;
 	tip.i_number = freeblks->fb_previousinum;
 	tip.i_devvp = freeblks->fb_devvp;
 	tip.i_dev = freeblks->fb_devvp->v_rdev;
-	tip.i_fs = freeblks->fb_fs;
 	tip.i_size = freeblks->fb_oldsize;
 	tip.i_uid = freeblks->fb_uid;
-	fs = freeblks->fb_fs;
+	tip.i_vnode = NULL;
 	tmpval = 1;
 	baselbns[0] = NDADDR;
 	for (i = 1; i < NIADDR; i++) {
@@ -2715,20 +2725,23 @@ static void
 handle_workitem_freefile(freefile)
 	struct freefile *freefile;
 {
+	struct fs *fs;
 	struct vnode vp;
 	struct inode tip;
 	struct inodedep *idp;
 	int error;
 
+	fs = VFSTOUFS(freefile->fx_mnt)->um_fs;
 #ifdef DEBUG
 	ACQUIRE_LOCK(&lk);
-	if (inodedep_lookup(freefile->fx_fs, freefile->fx_oldinum, 0, &idp))
+	if (inodedep_lookup(fs, freefile->fx_oldinum, 0, &idp))
 		panic("handle_workitem_freefile: inodedep survived");
 	FREE_LOCK(&lk);
 #endif
 	tip.i_devvp = freefile->fx_devvp;
 	tip.i_dev = freefile->fx_devvp->v_rdev;
-	tip.i_fs = freefile->fx_fs;
+	tip.i_fs = fs;
+	tip.i_vnode = &vp;
 	vp.v_data = &tip;
 	if ((error = ffs_freefile(&vp, freefile->fx_oldinum, freefile->fx_mode)) != 0)
 		softdep_error("handle_workitem_freefile", error);
@@ -4419,14 +4432,18 @@ clear_remove(p)
 			mp = pagedep->pd_mnt;
 			ino = pagedep->pd_ino;
 			FREE_LOCK(&lk);
+			if (vn_start_write(NULL, &mp, V_WAIT | PCATCH) != 0)
+				return;
 			if ((error = VFS_VGET(mp, ino, &vp)) != 0) {
 				softdep_error("clear_remove: vget", error);
+				vn_finished_write(mp);
 				return;
 			}
 			if ((error = VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT, p)))
 				softdep_error("clear_remove: fsync", error);
 			drain_output(vp, 0);
 			vput(vp);
+			vn_finished_write(mp);
 			return;
 		}
 	}
@@ -4486,8 +4503,11 @@ clear_inodedeps(p)
 		if (inodedep_lookup(fs, ino, 0, &inodedep) == 0)
 			continue;
 		FREE_LOCK(&lk);
+		if (vn_start_write(NULL, &mp, V_WAIT | PCATCH) != 0)
+			return;
 		if ((error = VFS_VGET(mp, ino, &vp)) != 0) {
 			softdep_error("clear_inodedeps: vget", error);
+			vn_finished_write(mp);
 			return;
 		}
 		if (ino == lastino) {
@@ -4499,6 +4519,7 @@ clear_inodedeps(p)
 			drain_output(vp, 0);
 		}
 		vput(vp);
+		vn_finished_write(mp);
 		ACQUIRE_LOCK(&lk);
 	}
 	FREE_LOCK(&lk);
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
index 89ff6d3..5280181 100644
--- a/sys/ufs/ffs/ffs_vfsops.c
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -133,7 +133,7 @@ VFS_SET(ufs_vfsops, ufs, 0);
  *		namei() if it is a genuine NULL from the user.
  */
 static int
-ffs_mount( mp, path, data, ndp, p)
+ffs_mount(mp, path, data, ndp, p)
         struct mount		*mp;	/* mount struct pointer*/
         char			*path;	/* path to mount point*/
         caddr_t			data;	/* arguments to FS specific mount*/
@@ -141,49 +141,34 @@ ffs_mount( mp, path, data, ndp, p)
         struct proc		*p;	/* process requesting mount*/
 {
 	size_t		size;
-	int		err = 0;
 	struct vnode	*devvp;
-
 	struct ufs_args args;
 	struct ufsmount *ump = 0;
 	register struct fs *fs;
-	int error, flags, ronly = 0;
+	int error, flags;
 	mode_t accessmode;
 
 	/*
-	 * Use NULL path to flag a root mount
+	 * Use NULL path to indicate we are mounting the root file system.
 	 */
-	if( path == NULL) {
-		/*
-		 ***
-		 * Mounting root file system
-		 ***
-		 */
-	
-		if ((err = bdevvp(rootdev, &rootvp))) {
+	if (path == NULL) {
+		if ((error = bdevvp(rootdev, &rootvp))) {
 			printf("ffs_mountroot: can't find rootvp\n");
-			return (err);
-		}
-
-		if( ( err = ffs_mountfs(rootvp, mp, p, M_FFSNODE)) != 0) {
-			/* fs specific cleanup (if any)*/
-			goto error_1;
+			return (error);
 		}
 
-		goto dostatfs;		/* success*/
+		if ((error = ffs_mountfs(rootvp, mp, p, M_FFSNODE)) != 0)
+			return (error);
 
+		(void)VFS_STATFS(mp, &mp->mnt_stat, p);
+		return (0);
 	}
 
 	/*
-	 ***
 	 * Mounting non-root file system or updating a file system
-	 ***
 	 */
-
-	/* copy in user arguments*/
-	err = copyin(data, (caddr_t)&args, sizeof (struct ufs_args));
-	if (err)
-		goto error_1;		/* can't get arguments*/
+	if ((error = copyin(data, (caddr_t)&args, sizeof(struct ufs_args)))!= 0)
+		return (error);
 
 	/*
 	 * If updating, check whether changing from read-only to
@@ -193,25 +178,36 @@ ffs_mount( mp, path, data, ndp, p)
 		ump = VFSTOUFS(mp);
 		fs = ump->um_fs;
 		devvp = ump->um_devvp;
-		err = 0;
-		ronly = fs->fs_ronly;	/* MNT_RELOAD might change this */
-		if (ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
+		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
+			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
+				return (error);
 			flags = WRITECLOSE;
 			if (mp->mnt_flag & MNT_FORCE)
 				flags |= FORCECLOSE;
 			if (mp->mnt_flag & MNT_SOFTDEP) {
-				err = softdep_flushfiles(mp, flags, p);
+				error = softdep_flushfiles(mp, flags, p);
 			} else {
-				err = ffs_flushfiles(mp, flags, p);
+				error = ffs_flushfiles(mp, flags, p);
 			}
-			ronly = 1;
-		}
-		if (!err && (mp->mnt_flag & MNT_RELOAD))
-			err = ffs_reload(mp, ndp->ni_cnd.cn_cred, p);
-		if (err) {
-			goto error_1;
+			if (error) {
+				vn_finished_write(mp);
+				return (error);
+			}
+			fs->fs_ronly = 1;
+			if ((fs->fs_flags & FS_UNCLEAN) == 0)
+				fs->fs_clean = 1;
+			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
+				fs->fs_ronly = 0;
+				fs->fs_clean = 0;
+				vn_finished_write(mp);
+				return (error);
+			}
+			vn_finished_write(mp);
 		}
-		if (ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
+		if ((mp->mnt_flag & MNT_RELOAD) &&
+		    (error = ffs_reload(mp, ndp->ni_cnd.cn_cred, p)) != 0)
+			return (error);
+		if (fs->fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
 			/*
 			 * If upgrade to read-write by non-root, then verify
 			 * that user has necessary permissions on the device.
@@ -225,31 +221,36 @@ ffs_mount( mp, path, data, ndp, p)
 				}
 				VOP_UNLOCK(devvp, 0, p);
 			}
-
 			fs->fs_flags &= ~FS_UNCLEAN;
 			if (fs->fs_clean == 0) {
 				fs->fs_flags |= FS_UNCLEAN;
 				if (mp->mnt_flag & MNT_FORCE) {
-					printf(
-"WARNING: %s was not properly dismounted\n",
-					    fs->fs_fsmnt);
+					printf("WARNING: %s was not %s\n",
+					   fs->fs_fsmnt, "properly dismounted");
 				} else {
 					printf(
 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
 					    fs->fs_fsmnt);
-					err = EPERM;
-					goto error_1;
+					return (EPERM);
 				}
 			}
-
+			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
+				return (error);
+			fs->fs_ronly = 0;
+			fs->fs_clean = 0;
+			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
+				vn_finished_write(mp);
+				return (error);
+			}
 			/* check to see if we need to start softdep */
-			if (fs->fs_flags & FS_DOSOFTDEP) {
-				err = softdep_mount(devvp, mp, fs, p->p_ucred);
-				if (err)
-					goto error_1;
+			if ((fs->fs_flags & FS_DOSOFTDEP) &&
+			    (error = softdep_mount(devvp, mp, fs, p->p_ucred))){
+				vn_finished_write(mp);
+				return (error);
 			}
-
-			ronly = 0;
+			if (fs->fs_snapinum[0] != 0)
+				ffs_snapshot_mount(mp);
+			vn_finished_write(mp);
 		}
 		/*
 		 * Soft updates is incompatible with "async",
@@ -258,18 +259,18 @@ ffs_mount( mp, path, data, ndp, p)
 		 * Softdep_mount() clears it in an initial mount 
 		 * or ro->rw remount.
 		 */
-		if (mp->mnt_flag & MNT_SOFTDEP) {
+		if (mp->mnt_flag & MNT_SOFTDEP)
 			mp->mnt_flag &= ~MNT_ASYNC;
-		}
-		/* if not updating name...*/
-		if (args.fspec == 0) {
-			/*
-			 * Process export requests.  Jumping to "success"
-			 * will return the vfs_export() error code.
-			 */
-			err = vfs_export(mp, &ump->um_export, &args.export);
-			goto success;
-		}
+		/*
+		 * If not updating name, process export requests.
+		 */
+		if (args.fspec == 0)
+			return (vfs_export(mp, &ump->um_export, &args.export));
+		/*
+		 * If this is a snapshot request, take the snapshot.
+		 */
+		if (mp->mnt_flag & MNT_SNAPSHOT)
+			return (ffs_snapshot(mp, args.fspec));
 	}
 
 	/*
@@ -277,17 +278,14 @@ ffs_mount( mp, path, data, ndp, p)
 	 * and verify that it refers to a sensible block device.
 	 */
 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
-	err = namei(ndp);
-	if (err) {
-		/* can't get devvp!*/
-		goto error_1;
-	}
-
+	if ((error = namei(ndp)) != 0)
+		return (error);
 	NDFREE(ndp, NDF_ONLY_PNBUF);
 	devvp = ndp->ni_vp;
-
-	if (!vn_isdisk(devvp, &err))
-		goto error_2;
+	if (!vn_isdisk(devvp, &error)) {
+		vrele(devvp);
+		return (error);
+	}
 
 	/*
 	 * If mount by non-root, then verify that user has necessary
@@ -298,7 +296,7 @@ ffs_mount( mp, path, data, ndp, p)
 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
 			accessmode |= VWRITE;
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
-		if ((error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p)) != 0) {
+		if ((error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p))!= 0){
 			vput(devvp);
 			return (error);
 		}
@@ -307,96 +305,43 @@ ffs_mount( mp, path, data, ndp, p)
 
 	if (mp->mnt_flag & MNT_UPDATE) {
 		/*
-		 ********************
-		 * UPDATE
+		 * Update only
+		 *
 		 * If it's not the same vnode, or at least the same device
 		 * then it's not correct.
-		 ********************
 		 */
 
-		if (devvp != ump->um_devvp) {
-			if ( devvp->v_rdev == ump->um_devvp->v_rdev) {
-				vrele(devvp);
-			} else {
-				err = EINVAL;	/* needs translation */
-			}
-		} else
-			vrele(devvp);
-		/*
-		 * Update device name only on success
-		 */
-		if( !err) {
-			/* Save "mounted from" info for mount point (NULL pad)*/
-			copyinstr(	args.fspec,
-					mp->mnt_stat.f_mntfromname,
-					MNAMELEN - 1,
-					&size);
-			bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
-		}
+		if (devvp != ump->um_devvp &&
+		    devvp->v_rdev != ump->um_devvp->v_rdev)
+			error = EINVAL;	/* needs translation */
+		vrele(devvp);
+		if (error)
+			return (error);
 	} else {
 		/*
-		 ********************
-		 * NEW MOUNT
-		 ********************
+		 * New mount
+		 *
+		 * We need the name for the mount point (also used for
+		 * "last mounted on") copied in. If an error occurs,
+		 * the mount point is discarded by the upper level code.
 		 */
-
-		/*
-		 * Since this is a new mount, we want the names for
-		 * the device and the mount point copied in.  If an
-		 * error occurs,  the mountpoint is discarded by the
-		 * upper level code.
-		 */
-		/* Save "last mounted on" info for mount point (NULL pad)*/
-		copyinstr(	path,				/* mount point*/
-				mp->mnt_stat.f_mntonname,	/* save area*/
-				MNAMELEN - 1,			/* max size*/
-				&size);				/* real size*/
+		copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
 		bzero( mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
-
-		/* Save "mounted from" info for mount point (NULL pad)*/
-		copyinstr(	args.fspec,			/* device name*/
-				mp->mnt_stat.f_mntfromname,	/* save area*/
-				MNAMELEN - 1,			/* max size*/
-				&size);				/* real size*/
-		bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
-
-		err = ffs_mountfs(devvp, mp, p, M_FFSNODE);
-	}
-	if (err) {
-		goto error_2;
+		if ((error = ffs_mountfs(devvp, mp, p, M_FFSNODE)) != 0) {
+			vrele(devvp);
+			return (error);
+		}
 	}
-
-dostatfs:
 	/*
-	 * Initialize FS stat information in mount struct; uses both
-	 * mp->mnt_stat.f_mntonname and mp->mnt_stat.f_mntfromname
-	 *
-	 * This code is common to root and non-root mounts
+	 * Save "mounted from" device name info for mount point (NULL pad).
+	 */
+	copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
+	bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+	/*
+	 * Initialize filesystem stat information in mount struct.
 	 */
 	(void)VFS_STATFS(mp, &mp->mnt_stat, p);
-
-	goto success;
-
-
-error_2:	/* error with devvp held*/
-
-	/* release devvp before failing*/
-	vrele(devvp);
-
-error_1:	/* no state to back out*/
-
-success:
-	if (!err && path && (mp->mnt_flag & MNT_UPDATE)) {
-		/* Update clean flag after changing read-onlyness. */
-		fs = ump->um_fs;
-		if (ronly != fs->fs_ronly) {
-			fs->fs_ronly = ronly;
-			fs->fs_clean = ronly &&
-			    (fs->fs_flags & FS_UNCLEAN) == 0 ? 1 : 0;
-			ffs_sbupdate(ump, MNT_WAIT);
-		}
-	}
-	return (err);
+	return (0);
 }
 
 /*
@@ -478,7 +423,7 @@ ffs_reload(mp, cred, p)
 	newfs->fs_maxcluster = fs->fs_maxcluster;
 	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
 	if (fs->fs_sbsize < SBSIZE)
-		bp->b_flags |= B_INVAL;
+		bp->b_flags |= B_INVAL | B_NOCACHE;
 	brelse(bp);
 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
 	ffs_oldfscompat(fs);
@@ -670,7 +615,7 @@ ffs_mountfs(devvp, mp, p, malloctype)
 	ump->um_vfree = ffs_vfree;
 	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
 	if (fs->fs_sbsize < SBSIZE)
-		bp->b_flags |= B_INVAL;
+		bp->b_flags |= B_INVAL | B_NOCACHE;
 	brelse(bp);
 	bp = NULL;
 	fs = ump->um_fs;
@@ -750,6 +695,8 @@ ffs_mountfs(devvp, mp, p, malloctype)
 			free(base, M_UFSMNT);
 			goto out;
 		}
+		if (fs->fs_snapinum[0] != 0)
+			ffs_snapshot_mount(mp);
 		fs->fs_fmod = 1;
 		fs->fs_clean = 0;
 		(void) ffs_sbupdate(ump, MNT_WAIT);
@@ -886,6 +833,15 @@ ffs_flushfiles(mp, flags, p)
 		 */
 	}
 #endif
+	if (ump->um_devvp->v_flag & VCOPYONWRITE) {
+		if ((error = vflush(mp, NULL, SKIPSYSTEM | flags)) != 0)
+			return (error);
+		ffs_snapshot_unmount(mp);
+		/*
+		 * Here we fall through to vflush again to ensure
+		 * that we have gotten rid of all the system vnodes.
+		 */
+	}
         /*
 	 * Flush all the files.
 	 */
diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c
index 539f302..eb6d621 100644
--- a/sys/ufs/ffs/ffs_vnops.c
+++ b/sys/ufs/ffs/ffs_vnops.c
@@ -95,6 +95,7 @@ vop_t **ffs_specop_p;
 static struct vnodeopv_entry_desc ffs_specop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) ufs_vnoperatespec },
 	{ &vop_fsync_desc,		(vop_t *) ffs_fsync },
+	{ &vop_copyonwrite_desc,	(vop_t *) ffs_copyonwrite },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc ffs_specop_opv_desc =
@@ -129,11 +130,20 @@ ffs_fsync(ap)
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
+	struct inode *ip = VTOI(vp);
 	struct buf *bp;
 	struct buf *nbp;
 	int s, error, wait, passes, skipmeta;
 	daddr_t lbn;
 
+	/*
+	 * Snapshots have to be unlocked so they do not deadlock
+	 * checking whether they need to copy their written buffers.
+	 * We always hold a reference, so they cannot be removed
+	 * out from underneath us.
+	 */
+	if (ip->i_flags & SF_SNAPSHOT)
+		VOP_UNLOCK(vp, 0, ap->a_p);
 	wait = (ap->a_waitfor == MNT_WAIT);
 	if (vn_isdisk(vp, NULL)) {
 		lbn = INT_MAX;
@@ -141,8 +151,6 @@ ffs_fsync(ap)
 		    (vp->v_specmountpoint->mnt_flag & MNT_SOFTDEP))
 			softdep_fsync_mountdev(vp);
 	} else {
-		struct inode *ip;
-		ip = VTOI(vp);
 		lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1));
 	}
 
@@ -279,5 +287,7 @@ loop:
 	}
 	splx(s);
 	error = UFS_UPDATE(vp, wait);
+	if (ip->i_flags & SF_SNAPSHOT)
+		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p);
 	return (error);
 }
diff --git a/sys/ufs/ffs/softdep.h b/sys/ufs/ffs/softdep.h
index 1908a3e..cf9cac8 100644
--- a/sys/ufs/ffs/softdep.h
+++ b/sys/ufs/ffs/softdep.h
@@ -382,7 +382,7 @@ struct freefrag {
 	struct	worklist ff_list;	/* id_inowait or delayed worklist */
 #	define	ff_state ff_list.wk_state /* owning user; should be uid_t */
 	struct	vnode *ff_devvp;	/* filesystem device vnode */
-	struct	fs *ff_fs;		/* addr of superblock */
+	struct	mount *ff_mnt;		/* associated mount point */
 	ufs_daddr_t ff_blkno;		/* fragment physical block number */
 	long	ff_fragsize;		/* size of fragment being deleted */
 	ino_t	ff_inum;		/* owning inode number */
@@ -398,7 +398,7 @@ struct freeblks {
 	struct	worklist fb_list;	/* id_inowait or delayed worklist */
 	ino_t	fb_previousinum;	/* inode of previous owner of blocks */
 	struct	vnode *fb_devvp;	/* filesystem device vnode */
-	struct	fs *fb_fs;		/* addr of superblock */
+	struct	mount *fb_mnt;		/* associated mount point */
 	off_t	fb_oldsize;		/* previous file size */
 	off_t	fb_newsize;		/* new file size */
 	int	fb_chkcnt;		/* used to check cnt of blks released */
@@ -418,7 +418,7 @@ struct freefile {
 	mode_t	fx_mode;		/* mode of inode */
 	ino_t	fx_oldinum;		/* inum of the unlinked file */
 	struct	vnode *fx_devvp;	/* filesystem device vnode */
-	struct	fs *fx_fs;		/* addr of superblock */
+	struct	mount *fx_mnt;		/* associated mount point */
 };
 
 /*
diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h
index 83960b0..6417a10 100644
--- a/sys/ufs/ufs/inode.h
+++ b/sys/ufs/ufs/inode.h
@@ -84,6 +84,7 @@ struct inode {
 	struct	 dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */
 	u_quad_t i_modrev;	/* Revision level for NFS lease. */
 	struct	 lockf *i_lockf;/* Head of byte-level lock list. */
+	struct	 inode *i_copyonwrite; /* copy-on-write list */
 	/*
 	 * Side effects; used during directory lookup.
 	 */
diff --git a/sys/ufs/ufs/ufs_bmap.c b/sys/ufs/ufs/ufs_bmap.c
index 9056340..ab4ac52 100644
--- a/sys/ufs/ufs/ufs_bmap.c
+++ b/sys/ufs/ufs/ufs_bmap.c
@@ -47,6 +47,7 @@
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/resourcevar.h>
+#include <sys/stat.h>
 
 #include <ufs/ufs/extattr.h>
 #include <ufs/ufs/quota.h>
@@ -115,7 +116,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
 	struct indir a[NIADDR+1], *xap;
 	ufs_daddr_t daddr;
 	long metalbn;
-	int error, maxrun, num;
+	int error, num, maxrun = 0;
 
 	ip = VTOI(vp);
 	mp = vp->v_mount;
@@ -127,6 +128,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
 #endif
 
 	if (runp) {
+		maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1;
 		*runp = 0;
 	}
 
@@ -134,7 +136,6 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
 		*runb = 0;
 	}
 
-	maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1;
 
 	xap = ap == NULL ? a : ap;
 	if (!nump)
@@ -146,9 +147,12 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
 	num = *nump;
 	if (num == 0) {
 		*bnp = blkptrtodb(ump, ip->i_db[bn]);
-		if (*bnp == 0)
-			*bnp = -1;
-		else if (runp) {
+		if (*bnp == 0) {
+			if (ip->i_flags & SF_SNAPSHOT)
+				*bnp = blkptrtodb(ump, bn * ump->um_seqinc);
+			else
+				*bnp = -1;
+		} else if (runp) {
 			daddr_t bnb = bn;
 			for (++bn; bn < NDADDR && *runp < maxrun &&
 			    is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]);
@@ -226,8 +230,13 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
 	if (bp)
 		bqrelse(bp);
 
-	daddr = blkptrtodb(ump, daddr);
-	*bnp = daddr == 0 ? -1 : daddr;
+	*bnp = blkptrtodb(ump, daddr);
+	if (*bnp == 0) {
+		if (ip->i_flags & SF_SNAPSHOT)
+			*bnp = blkptrtodb(ump, bn * ump->um_seqinc);
+		else
+			*bnp = -1;
+	}
 	return (0);
 }
 
diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h
index d576be9..b740792 100644
--- a/sys/ufs/ufs/ufs_extern.h
+++ b/sys/ufs/ufs/ufs_extern.h
@@ -87,6 +87,7 @@ int	 ufs_init __P((struct vfsconf *));
 void	 ufs_itimes __P((struct vnode *vp));
 int	 ufs_lookup __P((struct vop_cachedlookup_args *));
 int	 ufs_reclaim __P((struct vop_reclaim_args *));
+void	 ffs_snapremove __P((struct vnode *vp));
 int	 ufs_root __P((struct mount *, struct vnode **));
 int	 ufs_start __P((struct mount *, int, struct proc *));
 int	 ufs_vinit __P((struct mount *, vop_t **, vop_t **, struct vnode **));
diff --git a/sys/ufs/ufs/ufs_inode.c b/sys/ufs/ufs/ufs_inode.c
index 507e716..485a6d2 100644
--- a/sys/ufs/ufs/ufs_inode.c
+++ b/sys/ufs/ufs/ufs_inode.c
@@ -77,6 +77,7 @@ ufs_inactive(ap)
 	if (ip->i_mode == 0)
 		goto out;
 	if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
+		(void) vn_write_suspend_wait(vp, V_WAIT);
 #ifdef QUOTA
 		if (!getinoquota(ip))
 			(void)chkiq(ip, -1, NOCRED, 0);
@@ -91,8 +92,15 @@ ufs_inactive(ap)
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 		UFS_VFREE(vp, ip->i_number, mode);
 	}
-	if (ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE))
-		UFS_UPDATE(vp, 0);
+	if (ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) {
+		if ((ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) == 0 &&
+		    vn_write_suspend_wait(vp, V_NOWAIT)) {
+			ip->i_flag &= ~IN_ACCESS;
+		} else {
+			(void) vn_write_suspend_wait(vp, V_WAIT);
+			UFS_UPDATE(vp, 0);
+		}
+	}
 out:
 	VOP_UNLOCK(vp, 0, p);
 	/*
diff --git a/sys/ufs/ufs/ufs_quota.c b/sys/ufs/ufs/ufs_quota.c
index 574a330..6396f67 100644
--- a/sys/ufs/ufs/ufs_quota.c
+++ b/sys/ufs/ufs/ufs_quota.c
@@ -889,6 +889,7 @@ dqsync(vp, dq)
 	struct vnode *dqvp;
 	struct iovec aiov;
 	struct uio auio;
+	struct mount *mp;
 	int error;
 
 	if (dq == NODQUOT)
@@ -897,6 +898,7 @@ dqsync(vp, dq)
 		return (0);
 	if ((dqvp = dq->dq_ump->um_quotas[dq->dq_type]) == NULLVP)
 		panic("dqsync: file");
+	(void) vn_write_suspend_wait(dqvp, V_WAIT);
 	if (vp != dqvp)
 		vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY, p);
 	while (dq->dq_flags & DQ_LOCK) {
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index e3b6e29..d97568c 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -702,6 +702,8 @@ ufs_remove(ap)
 	int error;
 
 	ip = VTOI(vp);
+	if ((ip->i_flags & SF_SNAPSHOT) != 0)
+		ffs_snapremove(vp);
 	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
 	    (VTOI(dvp)->i_flags & APPEND)) {
 		error = EPERM;
@@ -2215,6 +2217,7 @@ static struct vnodeopv_entry_desc ufs_vnodeop_entries[] = {
 	{ &vop_open_desc,		(vop_t *) ufs_open },
 	{ &vop_pathconf_desc,		(vop_t *) ufs_pathconf },
 	{ &vop_poll_desc,		(vop_t *) vop_stdpoll },
+	{ &vop_getwritemount_desc, 	(vop_t *) vop_stdgetwritemount },
 	{ &vop_print_desc,		(vop_t *) ufs_print },
 	{ &vop_readdir_desc,		(vop_t *) ufs_readdir },
 	{ &vop_readlink_desc,		(vop_t *) ufs_readlink },
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index 307dd0b..97b221e 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -777,6 +777,7 @@ rescan0:
 			int written;
 			int swap_pageouts_ok;
 			struct vnode *vp = NULL;
+			struct mount *mp;
 
 			object = m->object;
 
@@ -853,9 +854,13 @@ rescan0:
 			if (object->type == OBJT_VNODE) {
 				vp = object->handle;
 
+				mp = NULL;
+				if (vp->v_type == VREG)
+					vn_start_write(vp, &mp, V_NOWAIT);
 				if (VOP_ISLOCKED(vp, NULL) ||
 				    vp->v_data == NULL ||
 				    vget(vp, LK_EXCLUSIVE|LK_NOOBJ, curproc)) {
+					vn_finished_write(mp);
 					if ((m->queue == PQ_INACTIVE) &&
 						(m->hold_count == 0) &&
 						(m->busy == 0) &&
@@ -878,6 +883,7 @@ rescan0:
 					if (object->flags & OBJ_MIGHTBEDIRTY)
 						vnodes_skipped++;
 					vput(vp);
+					vn_finished_write(mp);
 					continue;
 				}
 	
@@ -888,6 +894,7 @@ rescan0:
 				 */
 				if (m->busy || (m->flags & PG_BUSY)) {
 					vput(vp);
+					vn_finished_write(mp);
 					continue;
 				}
 
@@ -902,6 +909,7 @@ rescan0:
 					if (object->flags & OBJ_MIGHTBEDIRTY)
 						vnodes_skipped++;
 					vput(vp);
+					vn_finished_write(mp);
 					continue;
 				}
 			}
@@ -913,8 +921,10 @@ rescan0:
 			 * start the cleaning operation.
 			 */
 			written = vm_pageout_clean(m);
-			if (vp)
+			if (vp) {
 				vput(vp);
+				vn_finished_write(mp);
+			}
 
 			maxlaunder -= written;
 		}
diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c
index 2633426..3dd12ec 100644
--- a/sys/vm/vnode_pager.c
+++ b/sys/vm/vnode_pager.c
@@ -850,6 +850,7 @@ vnode_pager_putpages(object, m, count, sync, rtvals)
 {
 	int rtval;
 	struct vnode *vp;
+	struct mount *mp;
 	int bytes = count * PAGE_SIZE;
 
 	/*
@@ -872,11 +873,15 @@ vnode_pager_putpages(object, m, count, sync, rtvals)
 	 */
 
 	vp = object->handle;
+	if (vp->v_type != VREG)
+		mp = NULL;
+	(void)vn_start_write(vp, &mp, V_WAIT);
 	rtval = VOP_PUTPAGES(vp, m, bytes, sync, rtvals, 0);
 	if (rtval == EOPNOTSUPP) {
 	    printf("vnode_pager: *** WARNING *** stale FS putpages\n");
 	    rtval = vnode_pager_generic_putpages( vp, m, bytes, sync, rtvals);
 	}
+	vn_finished_write(mp);
 }
author	mckusick <mckusick@FreeBSD.org>	2000-07-11 22:07:57 +0000
committer	mckusick <mckusick@FreeBSD.org>	2000-07-11 22:07:57 +0000
commit	a3d0c189ea25a7af3dfab30112f5d8d65e214e1c (patch)
tree	c84458dcf49aaf90ff010ebc108cb3b6ca3c2f4a /sys
parent	c8c04452402a28eabd1ed8a1a06e0a14ac3d22c6 (diff)
download	FreeBSD-src-a3d0c189ea25a7af3dfab30112f5d8d65e214e1c.zip FreeBSD-src-a3d0c189ea25a7af3dfab30112f5d8d65e214e1c.tar.gz