52 files changed, 2536 insertions, 446 deletions
diff --git a/sys/compat/svr4/svr4_fcntl.c b/sys/compat/svr4/svr4_fcntl.c
index 4040030..c65f345 100644
--- a/sys/compat/svr4/svr4_fcntl.c
+++ b/sys/compat/svr4/svr4_fcntl.c
@@ -247,6 +247,7 @@ fd_revoke(p, fd)
 	struct filedesc *fdp = p->p_fd;
 	struct file *fp;
 	struct vnode *vp;
+	struct mount *mp;
 	struct vattr vattr;
 	int error, *retval;
 
@@ -271,8 +272,11 @@ fd_revoke(p, fd)
 	    (error = suser(p)) != 0)
 		goto out;
 
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		goto out;
 	if (vcount(vp) > 1)
 		VOP_REVOKE(vp, REVOKEALL);
+	vn_finished_write(mp);
 out:
 	vrele(vp);
 	return error;
diff --git a/sys/conf/files b/sys/conf/files
index 84130e1..01f088f 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -906,6 +906,8 @@ ufs/ffs/ffs_balloc.c	optional ffs
 ufs/ffs/ffs_balloc.c	optional mfs
 ufs/ffs/ffs_inode.c	optional ffs
 ufs/ffs/ffs_inode.c	optional mfs
+ufs/ffs/ffs_snapshot.c	optional ffs
+ufs/ffs/ffs_snapshot.c	optional mfs
 ufs/ffs/ffs_softdep.c	optional softupdates
 ufs/ffs/ffs_softdep_stub.c	standard
 ufs/ffs/ffs_subr.c	optional ffs
diff --git a/sys/dev/vn/vn.c b/sys/dev/vn/vn.c
index 88e3801..efbc437 100644
--- a/sys/dev/vn/vn.c
+++ b/sys/dev/vn/vn.c
@@ -276,7 +276,6 @@ vnstrategy(struct bio *bp)
 	int unit;
 	struct vn_softc *vn;
 	int error;
-	int isvplocked = 0;
 
 	unit = dkunit(bp->bio_dev);
 	vn = bp->bio_dev->si_drv1;
@@ -360,6 +359,7 @@ vnstrategy(struct bio *bp)
 		 */
 		struct uio auio;
 		struct iovec aiov;
+		struct mount *mp;
 
 		bzero(&auio, sizeof(auio));
 
@@ -375,18 +375,18 @@ vnstrategy(struct bio *bp)
 			auio.uio_rw = UIO_WRITE;
 		auio.uio_resid = bp->bio_bcount;
 		auio.uio_procp = curproc;
-		if (!VOP_ISLOCKED(vn->sc_vp, NULL)) {
-			isvplocked = 1;
+		if (VOP_ISLOCKED(vn->sc_vp, NULL))
+			vprint("unexpected vn driver lock", vn->sc_vp);
+		if (bp->bio_cmd == BIO_READ) {
 			vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY, curproc);
-		}
-		if(bp->bio_cmd == BIO_READ)
 			error = VOP_READ(vn->sc_vp, &auio, 0, vn->sc_cred);
-		else
+		} else {
+			(void) vn_start_write(vn->sc_vp, &mp, V_WAIT);
+			vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY, curproc);
 			error = VOP_WRITE(vn->sc_vp, &auio, 0, vn->sc_cred);
-		if (isvplocked) {
-			VOP_UNLOCK(vn->sc_vp, 0, curproc);
-			isvplocked = 0;
+			vn_finished_write(mp);
 		}
+		VOP_UNLOCK(vn->sc_vp, 0, curproc);
 		bp->bio_resid = auio.uio_resid;
 
 		if (error) {
diff --git a/sys/fs/fdescfs/fdesc_vnops.c b/sys/fs/fdescfs/fdesc_vnops.c
index 772a94c..72c7cae 100644
--- a/sys/fs/fdescfs/fdesc_vnops.c
+++ b/sys/fs/fdescfs/fdesc_vnops.c
@@ -383,6 +383,8 @@ fdesc_setattr(ap)
 {
 	struct filedesc *fdp = ap->a_p->p_fd;
 	struct vattr *vap = ap->a_vap;
+	struct vnode *vp;
+	struct mount *mp;
 	struct file *fp;
 	unsigned fd;
 	int error;
@@ -403,8 +405,11 @@ fdesc_setattr(ap)
 	switch (fp->f_type) {
 	case DTYPE_FIFO:
 	case DTYPE_VNODE:
-		error = VOP_SETATTR((struct vnode *) fp->f_data, ap->a_vap,
-		    ap->a_cred, ap->a_p);
+		vp = (struct vnode *)fp->f_data;
+		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+			return (error);
+		error = VOP_SETATTR(vp, ap->a_vap, ap->a_cred, ap->a_p);
+		vn_finished_write(mp);
 		break;
 
 	default:
diff --git a/sys/fs/fifofs/fifo_vnops.c b/sys/fs/fifofs/fifo_vnops.c
index 5bd13a7..03e3e37 100644
--- a/sys/fs/fifofs/fifo_vnops.c
+++ b/sys/fs/fifofs/fifo_vnops.c
@@ -107,6 +107,7 @@ static struct vnodeopv_entry_desc fifo_vnodeop_entries[] = {
 	{ &vop_open_desc,		(vop_t *) fifo_open },
 	{ &vop_pathconf_desc,		(vop_t *) fifo_pathconf },
 	{ &vop_poll_desc,		(vop_t *) fifo_poll },
+	{ &vop_getwritemount_desc, 	(vop_t *) vop_stdgetwritemount },
 	{ &vop_print_desc,		(vop_t *) fifo_print },
 	{ &vop_read_desc,		(vop_t *) fifo_read },
 	{ &vop_readdir_desc,		(vop_t *) fifo_badop },
diff --git a/sys/fs/specfs/spec_vnops.c b/sys/fs/specfs/spec_vnops.c
index cbe52f4..baf40c3 100644
--- a/sys/fs/specfs/spec_vnops.c
+++ b/sys/fs/specfs/spec_vnops.c
@@ -88,6 +88,7 @@ static struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
 	{ &vop_open_desc,		(vop_t *) spec_open },
 	{ &vop_pathconf_desc,		(vop_t *) vop_stdpathconf },
 	{ &vop_poll_desc,		(vop_t *) spec_poll },
+	{ &vop_getwritemount_desc, 	(vop_t *) vop_stdgetwritemount },
 	{ &vop_print_desc,		(vop_t *) spec_print },
 	{ &vop_read_desc,		(vop_t *) spec_read },
 	{ &vop_readdir_desc,		(vop_t *) vop_panic },
@@ -415,16 +416,29 @@ spec_strategy(ap)
 	struct buf *bp;
 	struct vnode *vp;
 	struct mount *mp;
+	int error;
 
 	bp = ap->a_bp;
-	if ((bp->b_iocmd == BIO_WRITE) && (LIST_FIRST(&bp->b_dep)) != NULL)
-		buf_start(bp);
-
+	vp = ap->a_vp;
+	if ((bp->b_iocmd == BIO_WRITE)) {
+		if (vp->v_mount != NULL &&
+		    (vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
+			panic("spec_strategy: bad I/O");
+		if (LIST_FIRST(&bp->b_dep) != NULL)
+			buf_start(bp);
+		if ((vp->v_flag & VCOPYONWRITE) &&
+		    (error = VOP_COPYONWRITE(vp, bp)) != 0 &&
+		    error != EOPNOTSUPP) {
+			bp->b_io.bio_error = error;
+			bp->b_io.bio_flags |= BIO_ERROR;
+			biodone(&bp->b_io);
+			return (0);
+		}
+	}
 	/*
 	 * Collect statistics on synchronous and asynchronous read
 	 * and write counts for disks that have associated filesystems.
 	 */
-	vp = ap->a_vp;
 	if (vn_isdisk(vp, NULL) && (mp = vp->v_specmountpoint) != NULL) {
 		if (bp->b_iocmd == BIO_WRITE) {
 			if (bp->b_lock.lk_lockholder == LK_KERNPROC)
diff --git a/sys/fs/unionfs/union_subr.c b/sys/fs/unionfs/union_subr.c
index 6b88bef..d1d6e31 100644
--- a/sys/fs/unionfs/union_subr.c
+++ b/sys/fs/unionfs/union_subr.c
@@ -747,6 +747,7 @@ union_copyup(un, docopy, cred, p)
 	struct proc *p;
 {
 	int error;
+	struct mount *mp;
 	struct vnode *lvp, *uvp;
 
 	/*
@@ -759,9 +760,12 @@ union_copyup(un, docopy, cred, p)
 	if (error)
 		return (error);
 
-	error = union_vn_create(&uvp, un, p);
-	if (error)
+	if ((error = vn_start_write(un->un_dirvp, &mp, V_WAIT | PCATCH)) != 0)
 		return (error);
+	if ((error = union_vn_create(&uvp, un, p)) != 0) {
+		vn_finished_write(mp);
+		return (error);
+	}
 
 	lvp = un->un_lowervp;
 
@@ -785,6 +789,7 @@ union_copyup(un, docopy, cred, p)
 
 	}
 	VOP_UNLOCK(uvp, 0, p);
+	vn_finished_write(mp);
 	union_newupper(un, uvp);
 	KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount));
 	union_vn_close(uvp, FWRITE, cred, p);
@@ -910,11 +915,15 @@ union_mkshadow(um, dvp, cnp, vpp)
 	struct vattr va;
 	struct proc *p = cnp->cn_proc;
 	struct componentname cn;
+	struct mount *mp;
 
-	error = union_relookup(um, dvp, vpp, cnp, &cn,
-			cnp->cn_nameptr, cnp->cn_namelen);
-	if (error)
+	if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
+	if ((error = union_relookup(um, dvp, vpp, cnp, &cn,
+			cnp->cn_nameptr, cnp->cn_namelen)) != 0) {
+		vn_finished_write(mp);
 		return (error);
+	}
 
 	if (*vpp) {
 		if (cn.cn_flags & HASBUF) {
@@ -925,6 +934,7 @@ union_mkshadow(um, dvp, cnp, vpp)
 			vrele(*vpp);
 		else
 			vput(*vpp);
+		vn_finished_write(mp);
 		*vpp = NULLVP;
 		return (EEXIST);
 	}
@@ -950,6 +960,7 @@ union_mkshadow(um, dvp, cnp, vpp)
 		cn.cn_flags &= ~HASBUF;
 	}
 	/*vput(dvp);*/
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -973,10 +984,15 @@ union_mkwhiteout(um, dvp, cnp, path)
 	struct proc *p = cnp->cn_proc;
 	struct vnode *wvp;
 	struct componentname cn;
+	struct mount *mp;
 
+	if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
 	error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path));
-	if (error)
+	if (error) {
+		vn_finished_write(mp);
 		return (error);
+	}
 
 	if (wvp) {
 		if (cn.cn_flags & HASBUF) {
@@ -987,6 +1003,7 @@ union_mkwhiteout(um, dvp, cnp, path)
 			vrele(wvp);
 		else
 			vput(wvp);
+		vn_finished_write(mp);
 		return (EEXIST);
 	}
 
@@ -998,6 +1015,7 @@ union_mkwhiteout(um, dvp, cnp, path)
 		zfree(namei_zone, cn.cn_pnbuf);
 		cn.cn_flags &= ~HASBUF;
 	}
+	vn_finished_write(mp);
 	return (error);
 }
 
diff --git a/sys/fs/unionfs/union_vnops.c b/sys/fs/unionfs/union_vnops.c
index 1c5ed5d..d7b95f3 100644
--- a/sys/fs/unionfs/union_vnops.c
+++ b/sys/fs/unionfs/union_vnops.c
@@ -93,6 +93,7 @@ static int	union_print __P((struct vop_print_args *ap));
 static int	union_read __P((struct vop_read_args *ap));
 static int	union_readdir __P((struct vop_readdir_args *ap));
 static int	union_readlink __P((struct vop_readlink_args *ap));
+static int	union_getwritemount __P((struct vop_getwritemount_args *ap));
 static int	union_reclaim __P((struct vop_reclaim_args *ap));
 static int	union_remove __P((struct vop_remove_args *ap));
 static int	union_rename __P((struct vop_rename_args *ap));
@@ -1681,6 +1682,20 @@ union_readlink(ap)
 	return (error);
 }
 
+static int
+union_getwritemount(ap)
+	struct vop_getwritemount_args /* {
+		struct vnode *a_vp;
+		struct mount **a_mpp;
+	} */ *ap;
+{
+	struct vnode *vp = UPPERVP(ap->a_vp);
+
+	if (vp == NULL)
+		panic("union: missing upper layer in getwritemount");
+	return(VOP_GETWRITEMOUNT(vp, ap->a_mpp));
+}
+
 /*
  *	union_inactive:
  *
@@ -1963,6 +1978,7 @@ static struct vnodeopv_entry_desc union_vnodeop_entries[] = {
 	{ &vop_read_desc,		(vop_t *) union_read },
 	{ &vop_readdir_desc,		(vop_t *) union_readdir },
 	{ &vop_readlink_desc,		(vop_t *) union_readlink },
+	{ &vop_getwritemount_desc,	(vop_t *) union_getwritemount },
 	{ &vop_reclaim_desc,		(vop_t *) union_reclaim },
 	{ &vop_remove_desc,		(vop_t *) union_remove },
 	{ &vop_rename_desc,		(vop_t *) union_rename },
diff --git a/sys/gnu/ext2fs/ext2_bmap.c b/sys/gnu/ext2fs/ext2_bmap.c
index 9056340..ab4ac52 100644
--- a/sys/gnu/ext2fs/ext2_bmap.c
+++ b/sys/gnu/ext2fs/ext2_bmap.c
@@ -47,6 +47,7 @@
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/resourcevar.h>
+#include <sys/stat.h>
 
 #include <ufs/ufs/extattr.h>
 #include <ufs/ufs/quota.h>
@@ -115,7 +116,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
 	struct indir a[NIADDR+1], *xap;
 	ufs_daddr_t daddr;
 	long metalbn;
-	int error, maxrun, num;
+	int error, num, maxrun = 0;
 
 	ip = VTOI(vp);
 	mp = vp->v_mount;
@@ -127,6 +128,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
 #endif
 
 	if (runp) {
+		maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1;
 		*runp = 0;
 	}
 
@@ -134,7 +136,6 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
 		*runb = 0;
 	}
 
-	maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1;
 
 	xap = ap == NULL ? a : ap;
 	if (!nump)
@@ -146,9 +147,12 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
 	num = *nump;
 	if (num == 0) {
 		*bnp = blkptrtodb(ump, ip->i_db[bn]);
-		if (*bnp == 0)
-			*bnp = -1;
-		else if (runp) {
+		if (*bnp == 0) {
+			if (ip->i_flags & SF_SNAPSHOT)
+				*bnp = blkptrtodb(ump, bn * ump->um_seqinc);
+			else
+				*bnp = -1;
+		} else if (runp) {
 			daddr_t bnb = bn;
 			for (++bn; bn < NDADDR && *runp < maxrun &&
 			    is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]);
@@ -226,8 +230,13 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
 	if (bp)
 		bqrelse(bp);
 
-	daddr = blkptrtodb(ump, daddr);
-	*bnp = daddr == 0 ? -1 : daddr;
+	*bnp = blkptrtodb(ump, daddr);
+	if (*bnp == 0) {
+		if (ip->i_flags & SF_SNAPSHOT)
+			*bnp = blkptrtodb(ump, bn * ump->um_seqinc);
+		else
+			*bnp = -1;
+	}
 	return (0);
 }
 
diff --git a/sys/gnu/ext2fs/inode.h b/sys/gnu/ext2fs/inode.h
index 83960b0..6417a10 100644
--- a/sys/gnu/ext2fs/inode.h
+++ b/sys/gnu/ext2fs/inode.h
@@ -84,6 +84,7 @@ struct inode {
 	struct	 dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */
 	u_quad_t i_modrev;	/* Revision level for NFS lease. */
 	struct	 lockf *i_lockf;/* Head of byte-level lock list. */
+	struct	 inode *i_copyonwrite; /* copy-on-write list */
 	/*
 	 * Side effects; used during directory lookup.
 	 */
diff --git a/sys/gnu/fs/ext2fs/ext2_bmap.c b/sys/gnu/fs/ext2fs/ext2_bmap.c
index 9056340..ab4ac52 100644
--- a/sys/gnu/fs/ext2fs/ext2_bmap.c
+++ b/sys/gnu/fs/ext2fs/ext2_bmap.c
@@ -47,6 +47,7 @@
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/resourcevar.h>
+#include <sys/stat.h>
 
 #include <ufs/ufs/extattr.h>
 #include <ufs/ufs/quota.h>
@@ -115,7 +116,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
 	struct indir a[NIADDR+1], *xap;
 	ufs_daddr_t daddr;
 	long metalbn;
-	int error, maxrun, num;
+	int error, num, maxrun = 0;
 
 	ip = VTOI(vp);
 	mp = vp->v_mount;
@@ -127,6 +128,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
 #endif
 
 	if (runp) {
+		maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1;
 		*runp = 0;
 	}
 
@@ -134,7 +136,6 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
 		*runb = 0;
 	}
 
-	maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1;
 
 	xap = ap == NULL ? a : ap;
 	if (!nump)
@@ -146,9 +147,12 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
 	num = *nump;
 	if (num == 0) {
 		*bnp = blkptrtodb(ump, ip->i_db[bn]);
-		if (*bnp == 0)
-			*bnp = -1;
-		else if (runp) {
+		if (*bnp == 0) {
+			if (ip->i_flags & SF_SNAPSHOT)
+				*bnp = blkptrtodb(ump, bn * ump->um_seqinc);
+			else
+				*bnp = -1;
+		} else if (runp) {
 			daddr_t bnb = bn;
 			for (++bn; bn < NDADDR && *runp < maxrun &&
 			    is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]);
@@ -226,8 +230,13 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
 	if (bp)
 		bqrelse(bp);
 
-	daddr = blkptrtodb(ump, daddr);
-	*bnp = daddr == 0 ? -1 : daddr;
+	*bnp = blkptrtodb(ump, daddr);
+	if (*bnp == 0) {
+		if (ip->i_flags & SF_SNAPSHOT)
+			*bnp = blkptrtodb(ump, bn * ump->um_seqinc);
+		else
+			*bnp = -1;
+	}
 	return (0);
 }
 
diff --git a/sys/gnu/fs/ext2fs/inode.h b/sys/gnu/fs/ext2fs/inode.h
index 83960b0..6417a10 100644
--- a/sys/gnu/fs/ext2fs/inode.h
+++ b/sys/gnu/fs/ext2fs/inode.h
@@ -84,6 +84,7 @@ struct inode {
 	struct	 dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */
 	u_quad_t i_modrev;	/* Revision level for NFS lease. */
 	struct	 lockf *i_lockf;/* Head of byte-level lock list. */
+	struct	 inode *i_copyonwrite; /* copy-on-write list */
 	/*
 	 * Side effects; used during directory lookup.
 	 */
diff --git a/sys/kern/kern_ktrace.c b/sys/kern/kern_ktrace.c
index d914fc2..b0530f9 100644
--- a/sys/kern/kern_ktrace.c
+++ b/sys/kern/kern_ktrace.c
@@ -457,7 +457,8 @@ ktrwrite(vp, kth, uio)
 {
 	struct uio auio;
 	struct iovec aiov[2];
-	register struct proc *p = curproc;	/* XXX */
+	struct proc *p = curproc;	/* XXX */
+	struct mount *mp;
 	int error;
 
 	if (vp == NULL)
@@ -479,6 +480,7 @@ ktrwrite(vp, kth, uio)
 		if (uio != NULL)
 			kth->ktr_len += uio->uio_resid;
 	}
+	vn_start_write(vp, &mp, V_WAIT);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	(void)VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, p->p_ucred);
@@ -487,6 +489,7 @@ ktrwrite(vp, kth, uio)
 		error = VOP_WRITE(vp, uio, IO_UNIT | IO_APPEND, p->p_ucred);
 	}
 	VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(mp);
 	if (!error)
 		return;
 	/*
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index e96f471..2d87b63 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -1599,6 +1599,7 @@ coredump(p)
 	struct nameidata nd;
 	struct vattr vattr;
 	int error, error1, flags;
+	struct mount *mp;
 	char *name;			/* name of corefile */
 	off_t limit;
 	
@@ -1619,6 +1620,7 @@ coredump(p)
 	if (limit == 0)
 		return 0;
 
+restart:
 	name = expand_name(p->p_comm, p->p_ucred->cr_uid, p->p_pid);
 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name, p);
 	flags = O_CREAT | FWRITE | O_NOFOLLOW;
@@ -1628,6 +1630,14 @@ coredump(p)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vp = nd.ni_vp;
+	if (vn_start_write(vp, &mp, V_NOWAIT) != 0) {
+		VOP_UNLOCK(vp, 0, p);
+		if ((error = vn_close(vp, FWRITE, cred, p)) != 0)
+			return (error);
+		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
+	}
 
 	/* Don't dump to non-regular files or files with links. */
 	if (vp->v_type != VREG ||
@@ -1647,6 +1657,7 @@ coredump(p)
 
 out:
 	VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(mp);
 	error1 = vn_close(vp, FWRITE, cred, p);
 	if (error == 0)
 		error = error1;
diff --git a/sys/kern/tty_tty.c b/sys/kern/tty_tty.c
index 2d15c70..66f7a7b 100644
--- a/sys/kern/tty_tty.c
+++ b/sys/kern/tty_tty.c
@@ -133,13 +133,19 @@ cttywrite(dev, uio, flag)
 {
 	struct proc *p = uio->uio_procp;
 	struct vnode *ttyvp = cttyvp(uio->uio_procp);
+	struct mount *mp;
 	int error;
 
 	if (ttyvp == NULL)
 		return (EIO);
+	mp = NULL;
+	if (ttyvp->v_type != VCHR &&
+	    (error = vn_start_write(ttyvp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
 	vn_lock(ttyvp, LK_EXCLUSIVE | LK_RETRY, p);
 	error = VOP_WRITE(ttyvp, uio, flag, NOCRED);
 	VOP_UNLOCK(ttyvp, 0, p);
+	vn_finished_write(mp);
 	return (error);
 }
 
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index 0103877..a0b4072 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -538,7 +538,8 @@ unp_bind(unp, nam, p)
 	struct proc *p;
 {
 	struct sockaddr_un *soun = (struct sockaddr_un *)nam;
-	register struct vnode *vp;
+	struct vnode *vp;
+	struct mount *mp;
 	struct vattr vattr;
 	int error, namelen;
 	struct nameidata nd;
@@ -552,6 +553,7 @@ unp_bind(unp, nam, p)
 		return EINVAL;
 	strncpy(buf, soun->sun_path, namelen);
 	buf[namelen] = 0;	/* null-terminate the string */
+restart:
 	NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE,
 	    buf, p);
 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
@@ -559,14 +561,19 @@ unp_bind(unp, nam, p)
 	if (error)
 		return (error);
 	vp = nd.ni_vp;
-	if (vp != NULL) {
+	if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		if (nd.ni_dvp == vp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
-		vrele(vp);
-		return (EADDRINUSE);
+		if (vp != NULL) {
+			vrele(vp);
+			return (EADDRINUSE);
+		}
+		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
 	}
 	VATTR_NULL(&vattr);
 	vattr.va_type = VSOCK;
@@ -582,6 +589,7 @@ unp_bind(unp, nam, p)
 	unp->unp_vnode = vp;
 	unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1);
 	VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(mp);
 	return (0);
 }
 
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index dba2151..96fbd63 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -1165,6 +1165,8 @@ brelse(struct buf * bp)
 	BUF_UNLOCK(bp);
 	bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF);
 	bp->b_ioflags &= ~BIO_ORDERED;
+	if ((bp->b_flags & B_DELWRI) == 0 && (bp->b_xflags & BX_VNDIRTY))
+		panic("brelse: not dirty");
 	splx(s);
 }
 
@@ -1225,6 +1227,8 @@ bqrelse(struct buf * bp)
 	BUF_UNLOCK(bp);
 	bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF);
 	bp->b_ioflags &= ~BIO_ORDERED;
+	if ((bp->b_flags & B_DELWRI) == 0 && (bp->b_xflags & BX_VNDIRTY))
+		panic("bqrelse: not dirty");
 	splx(s);
 }
 
@@ -1420,7 +1424,7 @@ getnewbuf(int slpflag, int slptimeo, int size, int maxsize)
 	int isspecial;
 	static int flushingbufs;
 
-	if (curproc && (curproc->p_flag & P_BUFEXHAUST) == 0)
+	if (curproc && (curproc->p_flag & (P_COWINPROGRESS|P_BUFEXHAUST)) == 0)
 		isspecial = 0;
 	else
 		isspecial = 1;
diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c
index f478aa2..00f9beb 100644
--- a/sys/kern/vfs_default.c
+++ b/sys/kern/vfs_default.c
@@ -500,6 +500,21 @@ vop_noislocked(ap)
 	return (lockstatus(vp->v_vnlock, ap->a_p));
 }
 
+/*
+ * Return our mount point, as we will take charge of the writes.
+ */
+int
+vop_stdgetwritemount(ap)
+	struct vop_getwritemount_args /* {
+		struct vnode *a_vp;
+		struct mount **a_mpp;
+	} */ *ap;
+{
+
+	*(ap->a_mpp) = ap->a_vp->v_mount;
+	return (0);
+}
+
 /* 
  * vfs default ops
  * used to fill the vfs fucntion table to get reasonable default return values.
diff --git a/sys/kern/vfs_export.c b/sys/kern/vfs_export.c
index 6483660..0e5ec3f 100644
--- a/sys/kern/vfs_export.c
+++ b/sys/kern/vfs_export.c
@@ -453,6 +453,7 @@ getnewvnode(tag, mp, vops, vpp)
 	int s, count;
 	struct proc *p = curproc;	/* XXX */
 	struct vnode *vp = NULL;
+	struct mount *vnmp;
 	vm_object_t object;
 
 	/*
@@ -491,7 +492,14 @@ getnewvnode(tag, mp, vops, vpp)
 			vp = NULL;
 			continue;
 		}
-		break;
+		/*
+		 * Skip over it if its filesystem is being suspended.
+		 */
+		if (vn_start_write(vp, &vnmp, V_NOWAIT) == 0)
+			break;
+		simple_unlock(&vp->v_interlock);
+		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
+		vp = NULL;
 	}
 	if (vp) {
 		vp->v_flag |= VDOOMED;
@@ -504,6 +512,7 @@ getnewvnode(tag, mp, vops, vpp)
 		} else {
 			simple_unlock(&vp->v_interlock);
 		}
+		vn_finished_write(vnmp);
 
 #ifdef INVARIANTS
 		{
@@ -515,6 +524,8 @@ getnewvnode(tag, mp, vops, vpp)
 			if (vp->v_numoutput)
 				panic("Clean vnode has pending I/O's");
 			splx(s);
+			if (vp->v_writecount != 0)
+				panic("Non-zero write count");
 		}
 #endif
 		vp->v_flag = 0;
@@ -523,7 +534,6 @@ getnewvnode(tag, mp, vops, vpp)
 		vp->v_cstart = 0;
 		vp->v_clen = 0;
 		vp->v_socket = 0;
-		vp->v_writecount = 0;	/* XXX */
 	} else {
 		simple_unlock(&vnode_free_list_slock);
 		vp = (struct vnode *) zalloc(vnode_zone);
@@ -946,6 +956,7 @@ sched_sync(void)
 {
 	struct synclist *slp;
 	struct vnode *vp;
+	struct mount *mp;
 	long starttime;
 	int s;
 	struct proc *p = updateproc;
@@ -970,10 +981,12 @@ sched_sync(void)
 		splx(s);
 
 		while ((vp = LIST_FIRST(slp)) != NULL) {
-			if (VOP_ISLOCKED(vp, NULL) == 0) {
+			if (VOP_ISLOCKED(vp, NULL) == 0 &&
+			    vn_start_write(vp, &mp, V_NOWAIT) == 0) {
 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 				(void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p);
 				VOP_UNLOCK(vp, 0, p);
+				vn_finished_write(mp);
 			}
 			s = splbio();
 			if (LIST_FIRST(slp) == vp) {
@@ -1386,6 +1399,7 @@ vrele(vp)
 	struct proc *p = curproc;	/* XXX */
 
 	KASSERT(vp != NULL, ("vrele: null vp"));
+	KASSERT(vp->v_writecount < vp->v_usecount, ("vrele: missed vn_close"));
 
 	simple_lock(&vp->v_interlock);
 
@@ -1427,6 +1441,7 @@ vput(vp)
 	struct proc *p = curproc;	/* XXX */
 
 	KASSERT(vp != NULL, ("vput: null vp"));
+	KASSERT(vp->v_writecount < vp->v_usecount, ("vput: missed vn_close"));
 
 	simple_lock(&vp->v_interlock);
 
@@ -1632,6 +1647,8 @@ vclean(vp, flags, p)
 	 * If the flush fails, just toss the buffers.
 	 */
 	if (flags & DOCLOSE) {
+		if (TAILQ_FIRST(&vp->v_dirtyblkhd) != NULL)
+			(void) vn_write_suspend_wait(vp, V_WAIT);
 		if (vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0) != 0)
 			vinvalbuf(vp, 0, NOCRED, p, 0, 0);
 	}
@@ -2785,12 +2802,18 @@ sync_fsync(ap)
 		simple_unlock(&mountlist_slock);
 		return (0);
 	}
+	if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) {
+		vfs_unbusy(mp, p);
+		simple_unlock(&mountlist_slock);
+		return (0);
+	}
 	asyncflag = mp->mnt_flag & MNT_ASYNC;
 	mp->mnt_flag &= ~MNT_ASYNC;
 	vfs_msync(mp, MNT_NOWAIT);
 	VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p);
 	if (asyncflag)
 		mp->mnt_flag |= MNT_ASYNC;
+	vn_finished_write(mp);
 	vfs_unbusy(mp, p);
 	return (0);
 }
diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c
index 65a297ca..404114a 100644
--- a/sys/kern/vfs_extattr.c
+++ b/sys/kern/vfs_extattr.c
@@ -164,8 +164,8 @@ mount(p, uap)
 			vput(vp);
 			return (EOPNOTSUPP);	/* Needs translation */
 		}
-		mp->mnt_flag |=
-		    SCARG(uap, flags) & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
+		mp->mnt_flag |= SCARG(uap, flags) &
+		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
 		/*
 		 * Only root, or the user that did the original mount is
 		 * permitted to update it.
@@ -303,7 +303,8 @@ update:
 		vrele(vp);
 		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
 			mp->mnt_flag &= ~MNT_RDONLY;
-		mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
+		mp->mnt_flag &=~
+		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
 		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
 		if (error) {
 			mp->mnt_flag = flag;
@@ -458,7 +459,7 @@ unmount(p, uap)
  */
 int
 dounmount(mp, flags, p)
-	register struct mount *mp;
+	struct mount *mp;
 	int flags;
 	struct proc *p;
 {
@@ -469,6 +470,7 @@ dounmount(mp, flags, p)
 	simple_lock(&mountlist_slock);
 	mp->mnt_kern_flag |= MNTK_UNMOUNT;
 	lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock, p);
+	vn_start_write(NULL, &mp, V_WAIT);
 
 	if (mp->mnt_flag & MNT_EXPUBLIC)
 		vfs_setpublicfs(NULL, NULL, NULL);
@@ -481,8 +483,10 @@ dounmount(mp, flags, p)
 		vrele(mp->mnt_syncer);
 	if (((mp->mnt_flag & MNT_RDONLY) ||
 	     (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) ||
-	    (flags & MNT_FORCE))
+	    (flags & MNT_FORCE)) {
 		error = VFS_UNMOUNT(mp, flags, p);
+	}
+	vn_finished_write(mp);
 	simple_lock(&mountlist_slock);
 	if (error) {
 		if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
@@ -530,7 +534,7 @@ sync(p, uap)
 	struct proc *p;
 	struct sync_args *uap;
 {
-	register struct mount *mp, *nmp;
+	struct mount *mp, *nmp;
 	int asyncflag;
 
 	simple_lock(&mountlist_slock);
@@ -539,13 +543,15 @@ sync(p, uap)
 			nmp = TAILQ_NEXT(mp, mnt_list);
 			continue;
 		}
-		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
+		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
+		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
 			asyncflag = mp->mnt_flag & MNT_ASYNC;
 			mp->mnt_flag &= ~MNT_ASYNC;
 			vfs_msync(mp, MNT_NOWAIT);
 			VFS_SYNC(mp, MNT_NOWAIT,
-				((p != NULL) ? p->p_ucred : NOCRED), p);
+			    ((p != NULL) ? p->p_ucred : NOCRED), p);
 			mp->mnt_flag |= asyncflag;
+			vn_finished_write(mp);
 		}
 		simple_lock(&mountlist_slock);
 		nmp = TAILQ_NEXT(mp, mnt_list);
@@ -593,7 +599,7 @@ quotactl(p, uap)
 		syscallarg(caddr_t) arg;
 	} */ *uap;
 {
-	register struct mount *mp;
+	struct mount *mp;
 	int error;
 	struct nameidata nd;
 
@@ -602,11 +608,15 @@ quotactl(p, uap)
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if ((error = namei(&nd)) != 0)
 		return (error);
-	mp = nd.ni_vp->v_mount;
 	NDFREE(&nd, NDF_ONLY_PNBUF);
+	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
 	vrele(nd.ni_vp);
-	return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
-	    SCARG(uap, arg), p));
+	if (error)
+		return (error);
+	error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
+	    SCARG(uap, arg), p);
+	vn_finished_write(mp);
+	return (error);
 }
 
 /*
@@ -972,6 +982,7 @@ open(p, uap)
 	struct file *fp;
 	struct vnode *vp;
 	struct vattr vat;
+	struct mount *mp;
 	int cmode, flags, oflags;
 	struct file *nfp;
 	int type, indx, error;
@@ -1029,12 +1040,15 @@ open(p, uap)
 		fp->f_flag |= FHASLOCK;
 	}
 	if (flags & O_TRUNC) {
+		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+			goto bad;
 		VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 		VATTR_NULL(&vat);
 		vat.va_size = 0;
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 		error = VOP_SETATTR(vp, &vat, p->p_ucred, p);
 		VOP_UNLOCK(vp, 0, p);
+		vn_finished_write(mp);
 		if (error)
 			goto bad;
 	}
@@ -1101,7 +1115,8 @@ mknod(p, uap)
 		syscallarg(int) dev;
 	} */ *uap;
 {
-	register struct vnode *vp;
+	struct vnode *vp;
+	struct mount *mp;
 	struct vattr vattr;
 	int error;
 	int whiteout = 0;
@@ -1118,14 +1133,16 @@ mknod(p, uap)
 	}
 	if (error)
 		return (error);
+restart:
 	bwillwrite();
 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
-	if (vp != NULL)
+	if (vp != NULL) {
+		vrele(vp);
 		error = EEXIST;
-	else {
+	} else {
 		VATTR_NULL(&vattr);
 		vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
 		vattr.va_rdev = SCARG(uap, dev);
@@ -1149,6 +1166,13 @@ mknod(p, uap)
 			break;
 		}
 	}
+	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vput(nd.ni_dvp);
+		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
+	}
 	if (!error) {
 		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 		if (whiteout)
@@ -1159,17 +1183,10 @@ mknod(p, uap)
 			if (error == 0)
 				vput(nd.ni_vp);
 		}
-		NDFREE(&nd, NDF_ONLY_PNBUF);
-		vput(nd.ni_dvp);
-	} else {
-		NDFREE(&nd, NDF_ONLY_PNBUF);
-		if (nd.ni_dvp == vp)
-			vrele(nd.ni_dvp);
-		else
-			vput(nd.ni_dvp);
-		if (vp)
-			vrele(vp);
 	}
+	NDFREE(&nd, NDF_ONLY_PNBUF);
+	vput(nd.ni_dvp);
+	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
 	return (error);
@@ -1193,23 +1210,29 @@ mkfifo(p, uap)
 		syscallarg(int) mode;
 	} */ *uap;
 {
+	struct mount *mp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
+restart:
 	bwillwrite();
 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	if (nd.ni_vp != NULL) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
-		if (nd.ni_dvp == nd.ni_vp)
-			vrele(nd.ni_dvp);
-		else
-			vput(nd.ni_dvp);
 		vrele(nd.ni_vp);
+		vput(nd.ni_dvp);
 		return (EEXIST);
 	}
+	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vput(nd.ni_dvp);
+		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
+	}
 	VATTR_NULL(&vattr);
 	vattr.va_type = VFIFO;
 	vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
@@ -1219,6 +1242,7 @@ mkfifo(p, uap)
 		vput(nd.ni_vp);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_dvp);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -1240,7 +1264,8 @@ link(p, uap)
 		syscallarg(char *) link;
 	} */ *uap;
 {
-	register struct vnode *vp;
+	struct vnode *vp;
+	struct mount *mp;
 	struct nameidata nd;
 	int error;
 
@@ -1250,30 +1275,29 @@ link(p, uap)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vp = nd.ni_vp;
-	if (vp->v_type == VDIR)
-		error = EPERM;		/* POSIX */
-	else {
-		NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
-		error = namei(&nd);
-		if (!error) {
-			if (nd.ni_vp != NULL) {
-				if (nd.ni_vp)
-					vrele(nd.ni_vp);
-				error = EEXIST;
-			} else {
-				VOP_LEASE(nd.ni_dvp, p, p->p_ucred,
-				    LEASE_WRITE);
-				VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
-				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
-			}
-			NDFREE(&nd, NDF_ONLY_PNBUF);
-			if (nd.ni_dvp == nd.ni_vp)
-				vrele(nd.ni_dvp);
-			else
-				vput(nd.ni_dvp);
+	if (vp->v_type == VDIR) {
+		vrele(vp);
+		return (EPERM);		/* POSIX */
+	}
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
+		vrele(vp);
+		return (error);
+	}
+	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
+	if ((error = namei(&nd)) == 0) {
+		if (nd.ni_vp != NULL) {
+			vrele(nd.ni_vp);
+			error = EEXIST;
+		} else {
+			VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+			VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
+			error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 		}
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vput(nd.ni_dvp);
 	}
 	vrele(vp);
+	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
 	return (error);
@@ -1297,6 +1321,7 @@ symlink(p, uap)
 		syscallarg(char *) link;
 	} */ *uap;
 {
+	struct mount *mp;
 	struct vattr vattr;
 	char *path;
 	int error;
@@ -1305,20 +1330,25 @@ symlink(p, uap)
 	path = zalloc(namei_zone);
 	if ((error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) != 0)
 		goto out;
+restart:
 	bwillwrite();
 	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
 	if ((error = namei(&nd)) != 0)
 		goto out;
 	if (nd.ni_vp) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
-		if (nd.ni_dvp == nd.ni_vp)
-			vrele(nd.ni_dvp);
-		else
-			vput(nd.ni_dvp);
 		vrele(nd.ni_vp);
+		vput(nd.ni_dvp);
 		error = EEXIST;
 		goto out;
 	}
+	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vput(nd.ni_dvp);
+		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
+	}
 	VATTR_NULL(&vattr);
 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
@@ -1327,6 +1357,7 @@ symlink(p, uap)
 	if (error == 0)
 		vput(nd.ni_vp);
 	vput(nd.ni_dvp);
+	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
 out:
@@ -1346,8 +1377,10 @@ undelete(p, uap)
 	} */ *uap;
 {
 	int error;
+	struct mount *mp;
 	struct nameidata nd;
 
+restart:
 	bwillwrite();
 	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
 	    SCARG(uap, path), p);
@@ -1357,19 +1390,23 @@ undelete(p, uap)
 
 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
-		if (nd.ni_dvp == nd.ni_vp)
-			vrele(nd.ni_dvp);
-		else
-			vput(nd.ni_dvp);
 		if (nd.ni_vp)
 			vrele(nd.ni_vp);
+		vput(nd.ni_dvp);
 		return (EEXIST);
 	}
-
+	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vput(nd.ni_dvp);
+		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
+	}
 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_dvp);
+	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
 	return (error);
@@ -1391,18 +1428,17 @@ unlink(p, uap)
 		syscallarg(char *) path;
 	} */ *uap;
 {
-	register struct vnode *vp;
+	struct mount *mp;
+	struct vnode *vp;
 	int error;
 	struct nameidata nd;
 
+restart:
 	bwillwrite();
 	NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
-	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
-	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
-
 	if (vp->v_type == VDIR)
 		error = EPERM;		/* POSIX */
 	else {
@@ -1414,18 +1450,24 @@ unlink(p, uap)
 		if (vp->v_flag & VROOT)
 			error = EBUSY;
 	}
-
+	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vrele(vp);
+		vput(nd.ni_dvp);
+		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
+	}
+	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	if (!error) {
 		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
 	}
 	NDFREE(&nd, NDF_ONLY_PNBUF);
-	if (nd.ni_dvp == vp)
-		vrele(nd.ni_dvp);
-	else
-		vput(nd.ni_dvp);
-	if (vp != NULLVP)
-		vput(vp);
+	vput(nd.ni_dvp);
+	vput(vp);
+	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
 	return (error);
@@ -1936,6 +1978,7 @@ setfflags(p, vp, flags)
 	int flags;
 {
 	int error;
+	struct mount *mp;
 	struct vattr vattr;
 
 	/*
@@ -1948,12 +1991,15 @@ setfflags(p, vp, flags)
 	    ((error = suser_xxx(p->p_ucred, p, PRISON_ROOT)) != 0))
 		return (error);
 
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	VATTR_NULL(&vattr);
 	vattr.va_flags = flags;
 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -2020,14 +2066,18 @@ setfmode(p, vp, mode)
 	int mode;
 {
 	int error;
+	struct mount *mp;
 	struct vattr vattr;
 
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	VATTR_NULL(&vattr);
 	vattr.va_mode = mode & ALLPERMS;
 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(mp);
 	return error;
 }
 
@@ -2125,8 +2175,11 @@ setfown(p, vp, uid, gid)
 	gid_t gid;
 {
 	int error;
+	struct mount *mp;
 	struct vattr vattr;
 
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	VATTR_NULL(&vattr);
@@ -2134,6 +2187,7 @@ setfown(p, vp, uid, gid)
 	vattr.va_gid = gid;
 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(mp);
 	return error;
 }
 
@@ -2259,8 +2313,11 @@ setutimes(p, vp, ts, nullflag)
 	int nullflag;
 {
 	int error;
+	struct mount *mp;
 	struct vattr vattr;
 
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	VATTR_NULL(&vattr);
@@ -2270,6 +2327,7 @@ setutimes(p, vp, ts, nullflag)
 		vattr.va_vaflags |= VA_UTIMES_NULL;
 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(mp);
 	return error;
 }
 
@@ -2394,7 +2452,8 @@ truncate(p, uap)
 		syscallarg(off_t) length;
 	} */ *uap;
 {
-	register struct vnode *vp;
+	struct mount *mp;
+	struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
@@ -2405,6 +2464,10 @@ truncate(p, uap)
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
+		vrele(vp);
+		return (error);
+	}
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
@@ -2417,6 +2480,7 @@ truncate(p, uap)
 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	}
 	vput(vp);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -2440,6 +2504,7 @@ ftruncate(p, uap)
 		syscallarg(off_t) length;
 	} */ *uap;
 {
+	struct mount *mp;
 	struct vattr vattr;
 	struct vnode *vp;
 	struct file *fp;
@@ -2452,6 +2517,8 @@ ftruncate(p, uap)
 	if ((fp->f_flag & FWRITE) == 0)
 		return (EINVAL);
 	vp = (struct vnode *)fp->f_data;
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	if (vp->v_type == VDIR)
@@ -2462,6 +2529,7 @@ ftruncate(p, uap)
 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
 	}
 	VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -2541,13 +2609,16 @@ fsync(p, uap)
 		syscallarg(int) fd;
 	} */ *uap;
 {
-	register struct vnode *vp;
+	struct vnode *vp;
+	struct mount *mp;
 	struct file *fp;
 	int error;
 
 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 		return (error);
 	vp = (struct vnode *)fp->f_data;
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	if (vp->v_object)
 		vm_object_page_clean(vp->v_object, 0, 0, 0);
@@ -2558,6 +2629,7 @@ fsync(p, uap)
 #endif
 
 	VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -2580,7 +2652,8 @@ rename(p, uap)
 		syscallarg(char *) to;
 	} */ *uap;
 {
-	register struct vnode *tvp, *fvp, *tdvp;
+	struct mount *mp;
+	struct vnode *tvp, *fvp, *tdvp;
 	struct nameidata fromnd, tond;
 	int error;
 
@@ -2590,6 +2663,12 @@ rename(p, uap)
 	if ((error = namei(&fromnd)) != 0)
 		return (error);
 	fvp = fromnd.ni_vp;
+	if ((error = vn_start_write(fvp, &mp, V_WAIT | PCATCH)) != 0) {
+		NDFREE(&fromnd, NDF_ONLY_PNBUF);
+		vrele(fromnd.ni_dvp);
+		vrele(fvp);
+		goto out1;
+	}
 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ,
 	    UIO_USERSPACE, SCARG(uap, to), p);
 	if (fromnd.ni_vp->v_type == VDIR)
@@ -2652,6 +2731,7 @@ out:
 		vrele(fvp);
 	}
 	vrele(tond.ni_startdir);
+	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
 	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
 	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
@@ -2682,11 +2762,13 @@ mkdir(p, uap)
 		syscallarg(int) mode;
 	} */ *uap;
 {
-	register struct vnode *vp;
+	struct mount *mp;
+	struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
+restart:
 	bwillwrite();
 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
 	nd.ni_cnd.cn_flags |= WILLBEDIR;
@@ -2695,13 +2777,17 @@ mkdir(p, uap)
 	vp = nd.ni_vp;
 	if (vp != NULL) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
-		if (nd.ni_dvp == vp)
-			vrele(nd.ni_dvp);
-		else
-			vput(nd.ni_dvp);
 		vrele(vp);
+		vput(nd.ni_dvp);
 		return (EEXIST);
 	}
+	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vput(nd.ni_dvp);
+		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
+	}
 	VATTR_NULL(&vattr);
 	vattr.va_type = VDIR;
 	vattr.va_mode = (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_fd->fd_cmask;
@@ -2711,6 +2797,7 @@ mkdir(p, uap)
 	vput(nd.ni_dvp);
 	if (!error)
 		vput(nd.ni_vp);
+	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
 	return (error);
@@ -2732,10 +2819,12 @@ rmdir(p, uap)
 		syscallarg(char *) path;
 	} */ *uap;
 {
-	register struct vnode *vp;
+	struct mount *mp;
+	struct vnode *vp;
 	int error;
 	struct nameidata nd;
 
+restart:
 	bwillwrite();
 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
 	    SCARG(uap, path), p);
@@ -2756,21 +2845,32 @@ rmdir(p, uap)
 	/*
 	 * The root of a mounted filesystem cannot be deleted.
 	 */
-	if (vp->v_flag & VROOT)
+	if (vp->v_flag & VROOT) {
 		error = EBUSY;
-	else {
-		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
-		VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
-		error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+		goto out;
 	}
+	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		if (nd.ni_dvp == vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		vput(vp);
+		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
+	}
+	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
+	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+	vn_finished_write(mp);
 out:
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	if (nd.ni_dvp == vp)
 		vrele(nd.ni_dvp);
 	else
 		vput(nd.ni_dvp);
-	if (vp != NULLVP)
-		vput(vp);
+	vput(vp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
 	return (error);
@@ -3049,7 +3149,8 @@ revoke(p, uap)
 		syscallarg(char *) path;
 	} */ *uap;
 {
-	register struct vnode *vp;
+	struct mount *mp;
+	struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
@@ -3068,8 +3169,11 @@ revoke(p, uap)
 	if (p->p_ucred->cr_uid != vattr.va_uid &&
 	    (error = suser_xxx(0, p, PRISON_ROOT)))
 		goto out;
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		goto out;
 	if (vcount(vp) > 1)
 		VOP_REVOKE(vp, REVOKEALL);
+	vn_finished_write(mp);
 out:
 	vrele(vp);
 	return (error);
@@ -3228,11 +3332,16 @@ fhopen(p, uap)
 	}
 	if (fmode & O_TRUNC) {
 		VOP_UNLOCK(vp, 0, p);				/* XXX */
+		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
+			vrele(vp);
+			return (error);
+		}
 		VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);	/* XXX */
 		VATTR_NULL(vap);
 		vap->va_size = 0;
 		error = VOP_SETATTR(vp, vap, p->p_ucred, p);
+		vn_finished_write(mp);
 		if (error)
 			goto bad;
 	}
@@ -3407,10 +3516,15 @@ extattrctl(p, uap)
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if ((error = namei(&nd)) != 0)
 		return (error);
-	mp = nd.ni_vp->v_mount;
+	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
 	NDFREE(&nd, 0);
-	return (VFS_EXTATTRCTL(mp, SCARG(uap, cmd), SCARG(uap, attrname),
-	    SCARG(uap, arg), p));
+	vrele(nd.ni_vp);
+	if (error)
+		return (error);
+	error = VFS_EXTATTRCTL(mp, SCARG(uap, cmd), SCARG(uap, attrname),
+	    SCARG(uap, arg), p);
+	vn_finished_write(mp);
+	return (error);
 }
 
 /*
@@ -3425,6 +3539,7 @@ extattr_set_file(p, uap)
 	struct extattr_set_file_args *uap;
 {
 	struct nameidata nd;
+	struct mount *mp;
 	struct uio auio;
 	struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV];
 	char attrname[EXTATTR_MAXNAMELEN];
@@ -3434,10 +3549,11 @@ extattr_set_file(p, uap)
 	error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN);
 	if (error)
 		return (error);
-	NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW, UIO_USERSPACE, SCARG(uap, path),
-	    p);
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if ((error = namei(&nd)) != 0)
 		return(error);
+	if ((error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH)) != 0)
+		goto done;
 	iovlen = uap->iovcnt * sizeof(struct iovec);
 	if (uap->iovcnt > UIO_SMALLIOV) {
 		if (uap->iovcnt > UIO_MAXIOV) {
@@ -3477,6 +3593,8 @@ done:
 	if (needfree)
 		FREE(needfree, M_IOV);
 	NDFREE(&nd, 0);
+	vrele(nd.ni_vp);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -3508,6 +3626,7 @@ extattr_get_file(p, uap)
 	if (uap->iovcnt > UIO_SMALLIOV) {
 		if (uap->iovcnt > UIO_MAXIOV) {
 			NDFREE(&nd, 0);
+			vrele(nd.ni_vp);
 			return (EINVAL);
 		}
 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
@@ -3545,6 +3664,7 @@ done:
 	if (needfree)
 		FREE(needfree, M_IOV);
 	NDFREE(&nd, 0);
+	vrele(nd.ni_vp);
 	return(error);
 }
 
@@ -3557,6 +3677,7 @@ extattr_delete_file(p, uap)
 	struct proc *p;
 	struct extattr_delete_file_args *uap;
 {
+	struct mount *mp;
 	struct nameidata nd;
 	char attrname[EXTATTR_MAXNAMELEN];
 	int	error;
@@ -3564,12 +3685,17 @@ extattr_delete_file(p, uap)
 	error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN);
 	if (error)
 		return(error);
-	NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW, UIO_USERSPACE, SCARG(uap, path),
-	    p);
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if ((error = namei(&nd)) != 0)
 		return(error);
+	if ((error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH)) != 0) {
+		vrele(nd.ni_vp);
+		return (error);
+	}
 	error = VOP_SETEXTATTR(nd.ni_vp, attrname, NULL, p->p_cred->pc_ucred,
 	    p);
 	NDFREE(&nd, 0);
+	vrele(nd.ni_vp);
+	vn_finished_write(mp);
 	return(error);
 }
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 6483660..0e5ec3f 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -453,6 +453,7 @@ getnewvnode(tag, mp, vops, vpp)
 	int s, count;
 	struct proc *p = curproc;	/* XXX */
 	struct vnode *vp = NULL;
+	struct mount *vnmp;
 	vm_object_t object;
 
 	/*
@@ -491,7 +492,14 @@ getnewvnode(tag, mp, vops, vpp)
 			vp = NULL;
 			continue;
 		}
-		break;
+		/*
+		 * Skip over it if its filesystem is being suspended.
+		 */
+		if (vn_start_write(vp, &vnmp, V_NOWAIT) == 0)
+			break;
+		simple_unlock(&vp->v_interlock);
+		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
+		vp = NULL;
 	}
 	if (vp) {
 		vp->v_flag |= VDOOMED;
@@ -504,6 +512,7 @@ getnewvnode(tag, mp, vops, vpp)
 		} else {
 			simple_unlock(&vp->v_interlock);
 		}
+		vn_finished_write(vnmp);
 
 #ifdef INVARIANTS
 		{
@@ -515,6 +524,8 @@ getnewvnode(tag, mp, vops, vpp)
 			if (vp->v_numoutput)
 				panic("Clean vnode has pending I/O's");
 			splx(s);
+			if (vp->v_writecount != 0)
+				panic("Non-zero write count");
 		}
 #endif
 		vp->v_flag = 0;
@@ -523,7 +534,6 @@ getnewvnode(tag, mp, vops, vpp)
 		vp->v_cstart = 0;
 		vp->v_clen = 0;
 		vp->v_socket = 0;
-		vp->v_writecount = 0;	/* XXX */
 	} else {
 		simple_unlock(&vnode_free_list_slock);
 		vp = (struct vnode *) zalloc(vnode_zone);
@@ -946,6 +956,7 @@ sched_sync(void)
 {
 	struct synclist *slp;
 	struct vnode *vp;
+	struct mount *mp;
 	long starttime;
 	int s;
 	struct proc *p = updateproc;
@@ -970,10 +981,12 @@ sched_sync(void)
 		splx(s);
 
 		while ((vp = LIST_FIRST(slp)) != NULL) {
-			if (VOP_ISLOCKED(vp, NULL) == 0) {
+			if (VOP_ISLOCKED(vp, NULL) == 0 &&
+			    vn_start_write(vp, &mp, V_NOWAIT) == 0) {
 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 				(void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p);
 				VOP_UNLOCK(vp, 0, p);
+				vn_finished_write(mp);
 			}
 			s = splbio();
 			if (LIST_FIRST(slp) == vp) {
@@ -1386,6 +1399,7 @@ vrele(vp)
 	struct proc *p = curproc;	/* XXX */
 
 	KASSERT(vp != NULL, ("vrele: null vp"));
+	KASSERT(vp->v_writecount < vp->v_usecount, ("vrele: missed vn_close"));
 
 	simple_lock(&vp->v_interlock);
 
@@ -1427,6 +1441,7 @@ vput(vp)
 	struct proc *p = curproc;	/* XXX */
 
 	KASSERT(vp != NULL, ("vput: null vp"));
+	KASSERT(vp->v_writecount < vp->v_usecount, ("vput: missed vn_close"));
 
 	simple_lock(&vp->v_interlock);
 
@@ -1632,6 +1647,8 @@ vclean(vp, flags, p)
 	 * If the flush fails, just toss the buffers.
 	 */
 	if (flags & DOCLOSE) {
+		if (TAILQ_FIRST(&vp->v_dirtyblkhd) != NULL)
+			(void) vn_write_suspend_wait(vp, V_WAIT);
 		if (vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0) != 0)
 			vinvalbuf(vp, 0, NOCRED, p, 0, 0);
 	}
@@ -2785,12 +2802,18 @@ sync_fsync(ap)
 		simple_unlock(&mountlist_slock);
 		return (0);
 	}
+	if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) {
+		vfs_unbusy(mp, p);
+		simple_unlock(&mountlist_slock);
+		return (0);
+	}
 	asyncflag = mp->mnt_flag & MNT_ASYNC;
 	mp->mnt_flag &= ~MNT_ASYNC;
 	vfs_msync(mp, MNT_NOWAIT);
 	VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p);
 	if (asyncflag)
 		mp->mnt_flag |= MNT_ASYNC;
+	vn_finished_write(mp);
 	vfs_unbusy(mp, p);
 	return (0);
 }
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 65a297ca..404114a 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -164,8 +164,8 @@ mount(p, uap)
 			vput(vp);
 			return (EOPNOTSUPP);	/* Needs translation */
 		}
-		mp->mnt_flag |=
-		    SCARG(uap, flags) & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
+		mp->mnt_flag |= SCARG(uap, flags) &
+		    (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
 		/*
 		 * Only root, or the user that did the original mount is
 		 * permitted to update it.
@@ -303,7 +303,8 @@ update:
 		vrele(vp);
 		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
 			mp->mnt_flag &= ~MNT_RDONLY;
-		mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
+		mp->mnt_flag &=~
+		    (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
 		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
 		if (error) {
 			mp->mnt_flag = flag;
@@ -458,7 +459,7 @@ unmount(p, uap)
  */
 int
 dounmount(mp, flags, p)
-	register struct mount *mp;
+	struct mount *mp;
 	int flags;
 	struct proc *p;
 {
@@ -469,6 +470,7 @@ dounmount(mp, flags, p)
 	simple_lock(&mountlist_slock);
 	mp->mnt_kern_flag |= MNTK_UNMOUNT;
 	lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock, p);
+	vn_start_write(NULL, &mp, V_WAIT);
 
 	if (mp->mnt_flag & MNT_EXPUBLIC)
 		vfs_setpublicfs(NULL, NULL, NULL);
@@ -481,8 +483,10 @@ dounmount(mp, flags, p)
 		vrele(mp->mnt_syncer);
 	if (((mp->mnt_flag & MNT_RDONLY) ||
 	     (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) ||
-	    (flags & MNT_FORCE))
+	    (flags & MNT_FORCE)) {
 		error = VFS_UNMOUNT(mp, flags, p);
+	}
+	vn_finished_write(mp);
 	simple_lock(&mountlist_slock);
 	if (error) {
 		if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
@@ -530,7 +534,7 @@ sync(p, uap)
 	struct proc *p;
 	struct sync_args *uap;
 {
-	register struct mount *mp, *nmp;
+	struct mount *mp, *nmp;
 	int asyncflag;
 
 	simple_lock(&mountlist_slock);
@@ -539,13 +543,15 @@ sync(p, uap)
 			nmp = TAILQ_NEXT(mp, mnt_list);
 			continue;
 		}
-		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
+		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
+		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
 			asyncflag = mp->mnt_flag & MNT_ASYNC;
 			mp->mnt_flag &= ~MNT_ASYNC;
 			vfs_msync(mp, MNT_NOWAIT);
 			VFS_SYNC(mp, MNT_NOWAIT,
-				((p != NULL) ? p->p_ucred : NOCRED), p);
+			    ((p != NULL) ? p->p_ucred : NOCRED), p);
 			mp->mnt_flag |= asyncflag;
+			vn_finished_write(mp);
 		}
 		simple_lock(&mountlist_slock);
 		nmp = TAILQ_NEXT(mp, mnt_list);
@@ -593,7 +599,7 @@ quotactl(p, uap)
 		syscallarg(caddr_t) arg;
 	} */ *uap;
 {
-	register struct mount *mp;
+	struct mount *mp;
 	int error;
 	struct nameidata nd;
 
@@ -602,11 +608,15 @@ quotactl(p, uap)
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if ((error = namei(&nd)) != 0)
 		return (error);
-	mp = nd.ni_vp->v_mount;
 	NDFREE(&nd, NDF_ONLY_PNBUF);
+	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
 	vrele(nd.ni_vp);
-	return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
-	    SCARG(uap, arg), p));
+	if (error)
+		return (error);
+	error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
+	    SCARG(uap, arg), p);
+	vn_finished_write(mp);
+	return (error);
 }
 
 /*
@@ -972,6 +982,7 @@ open(p, uap)
 	struct file *fp;
 	struct vnode *vp;
 	struct vattr vat;
+	struct mount *mp;
 	int cmode, flags, oflags;
 	struct file *nfp;
 	int type, indx, error;
@@ -1029,12 +1040,15 @@ open(p, uap)
 		fp->f_flag |= FHASLOCK;
 	}
 	if (flags & O_TRUNC) {
+		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+			goto bad;
 		VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 		VATTR_NULL(&vat);
 		vat.va_size = 0;
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 		error = VOP_SETATTR(vp, &vat, p->p_ucred, p);
 		VOP_UNLOCK(vp, 0, p);
+		vn_finished_write(mp);
 		if (error)
 			goto bad;
 	}
@@ -1101,7 +1115,8 @@ mknod(p, uap)
 		syscallarg(int) dev;
 	} */ *uap;
 {
-	register struct vnode *vp;
+	struct vnode *vp;
+	struct mount *mp;
 	struct vattr vattr;
 	int error;
 	int whiteout = 0;
@@ -1118,14 +1133,16 @@ mknod(p, uap)
 	}
 	if (error)
 		return (error);
+restart:
 	bwillwrite();
 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
-	if (vp != NULL)
+	if (vp != NULL) {
+		vrele(vp);
 		error = EEXIST;
-	else {
+	} else {
 		VATTR_NULL(&vattr);
 		vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
 		vattr.va_rdev = SCARG(uap, dev);
@@ -1149,6 +1166,13 @@ mknod(p, uap)
 			break;
 		}
 	}
+	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vput(nd.ni_dvp);
+		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
+	}
 	if (!error) {
 		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 		if (whiteout)
@@ -1159,17 +1183,10 @@ mknod(p, uap)
 			if (error == 0)
 				vput(nd.ni_vp);
 		}
-		NDFREE(&nd, NDF_ONLY_PNBUF);
-		vput(nd.ni_dvp);
-	} else {
-		NDFREE(&nd, NDF_ONLY_PNBUF);
-		if (nd.ni_dvp == vp)
-			vrele(nd.ni_dvp);
-		else
-			vput(nd.ni_dvp);
-		if (vp)
-			vrele(vp);
 	}
+	NDFREE(&nd, NDF_ONLY_PNBUF);
+	vput(nd.ni_dvp);
+	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
 	return (error);
@@ -1193,23 +1210,29 @@ mkfifo(p, uap)
 		syscallarg(int) mode;
 	} */ *uap;
 {
+	struct mount *mp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
+restart:
 	bwillwrite();
 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	if (nd.ni_vp != NULL) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
-		if (nd.ni_dvp == nd.ni_vp)
-			vrele(nd.ni_dvp);
-		else
-			vput(nd.ni_dvp);
 		vrele(nd.ni_vp);
+		vput(nd.ni_dvp);
 		return (EEXIST);
 	}
+	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vput(nd.ni_dvp);
+		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
+	}
 	VATTR_NULL(&vattr);
 	vattr.va_type = VFIFO;
 	vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
@@ -1219,6 +1242,7 @@ mkfifo(p, uap)
 		vput(nd.ni_vp);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_dvp);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -1240,7 +1264,8 @@ link(p, uap)
 		syscallarg(char *) link;
 	} */ *uap;
 {
-	register struct vnode *vp;
+	struct vnode *vp;
+	struct mount *mp;
 	struct nameidata nd;
 	int error;
 
@@ -1250,30 +1275,29 @@ link(p, uap)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vp = nd.ni_vp;
-	if (vp->v_type == VDIR)
-		error = EPERM;		/* POSIX */
-	else {
-		NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
-		error = namei(&nd);
-		if (!error) {
-			if (nd.ni_vp != NULL) {
-				if (nd.ni_vp)
-					vrele(nd.ni_vp);
-				error = EEXIST;
-			} else {
-				VOP_LEASE(nd.ni_dvp, p, p->p_ucred,
-				    LEASE_WRITE);
-				VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
-				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
-			}
-			NDFREE(&nd, NDF_ONLY_PNBUF);
-			if (nd.ni_dvp == nd.ni_vp)
-				vrele(nd.ni_dvp);
-			else
-				vput(nd.ni_dvp);
+	if (vp->v_type == VDIR) {
+		vrele(vp);
+		return (EPERM);		/* POSIX */
+	}
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
+		vrele(vp);
+		return (error);
+	}
+	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
+	if ((error = namei(&nd)) == 0) {
+		if (nd.ni_vp != NULL) {
+			vrele(nd.ni_vp);
+			error = EEXIST;
+		} else {
+			VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+			VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
+			error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 		}
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vput(nd.ni_dvp);
 	}
 	vrele(vp);
+	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
 	return (error);
@@ -1297,6 +1321,7 @@ symlink(p, uap)
 		syscallarg(char *) link;
 	} */ *uap;
 {
+	struct mount *mp;
 	struct vattr vattr;
 	char *path;
 	int error;
@@ -1305,20 +1330,25 @@ symlink(p, uap)
 	path = zalloc(namei_zone);
 	if ((error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) != 0)
 		goto out;
+restart:
 	bwillwrite();
 	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
 	if ((error = namei(&nd)) != 0)
 		goto out;
 	if (nd.ni_vp) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
-		if (nd.ni_dvp == nd.ni_vp)
-			vrele(nd.ni_dvp);
-		else
-			vput(nd.ni_dvp);
 		vrele(nd.ni_vp);
+		vput(nd.ni_dvp);
 		error = EEXIST;
 		goto out;
 	}
+	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vput(nd.ni_dvp);
+		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
+	}
 	VATTR_NULL(&vattr);
 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
@@ -1327,6 +1357,7 @@ symlink(p, uap)
 	if (error == 0)
 		vput(nd.ni_vp);
 	vput(nd.ni_dvp);
+	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
 out:
@@ -1346,8 +1377,10 @@ undelete(p, uap)
 	} */ *uap;
 {
 	int error;
+	struct mount *mp;
 	struct nameidata nd;
 
+restart:
 	bwillwrite();
 	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
 	    SCARG(uap, path), p);
@@ -1357,19 +1390,23 @@ undelete(p, uap)
 
 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
-		if (nd.ni_dvp == nd.ni_vp)
-			vrele(nd.ni_dvp);
-		else
-			vput(nd.ni_dvp);
 		if (nd.ni_vp)
 			vrele(nd.ni_vp);
+		vput(nd.ni_dvp);
 		return (EEXIST);
 	}
-
+	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vput(nd.ni_dvp);
+		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
+	}
 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_dvp);
+	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
 	return (error);
@@ -1391,18 +1428,17 @@ unlink(p, uap)
 		syscallarg(char *) path;
 	} */ *uap;
 {
-	register struct vnode *vp;
+	struct mount *mp;
+	struct vnode *vp;
 	int error;
 	struct nameidata nd;
 
+restart:
 	bwillwrite();
 	NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
-	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
-	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
-
 	if (vp->v_type == VDIR)
 		error = EPERM;		/* POSIX */
 	else {
@@ -1414,18 +1450,24 @@ unlink(p, uap)
 		if (vp->v_flag & VROOT)
 			error = EBUSY;
 	}
-
+	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vrele(vp);
+		vput(nd.ni_dvp);
+		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
+	}
+	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	if (!error) {
 		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
 	}
 	NDFREE(&nd, NDF_ONLY_PNBUF);
-	if (nd.ni_dvp == vp)
-		vrele(nd.ni_dvp);
-	else
-		vput(nd.ni_dvp);
-	if (vp != NULLVP)
-		vput(vp);
+	vput(nd.ni_dvp);
+	vput(vp);
+	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
 	return (error);
@@ -1936,6 +1978,7 @@ setfflags(p, vp, flags)
 	int flags;
 {
 	int error;
+	struct mount *mp;
 	struct vattr vattr;
 
 	/*
@@ -1948,12 +1991,15 @@ setfflags(p, vp, flags)
 	    ((error = suser_xxx(p->p_ucred, p, PRISON_ROOT)) != 0))
 		return (error);
 
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	VATTR_NULL(&vattr);
 	vattr.va_flags = flags;
 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -2020,14 +2066,18 @@ setfmode(p, vp, mode)
 	int mode;
 {
 	int error;
+	struct mount *mp;
 	struct vattr vattr;
 
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	VATTR_NULL(&vattr);
 	vattr.va_mode = mode & ALLPERMS;
 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(mp);
 	return error;
 }
 
@@ -2125,8 +2175,11 @@ setfown(p, vp, uid, gid)
 	gid_t gid;
 {
 	int error;
+	struct mount *mp;
 	struct vattr vattr;
 
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	VATTR_NULL(&vattr);
@@ -2134,6 +2187,7 @@ setfown(p, vp, uid, gid)
 	vattr.va_gid = gid;
 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(mp);
 	return error;
 }
 
@@ -2259,8 +2313,11 @@ setutimes(p, vp, ts, nullflag)
 	int nullflag;
 {
 	int error;
+	struct mount *mp;
 	struct vattr vattr;
 
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	VATTR_NULL(&vattr);
@@ -2270,6 +2327,7 @@ setutimes(p, vp, ts, nullflag)
 		vattr.va_vaflags |= VA_UTIMES_NULL;
 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(mp);
 	return error;
 }
 
@@ -2394,7 +2452,8 @@ truncate(p, uap)
 		syscallarg(off_t) length;
 	} */ *uap;
 {
-	register struct vnode *vp;
+	struct mount *mp;
+	struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
@@ -2405,6 +2464,10 @@ truncate(p, uap)
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
+		vrele(vp);
+		return (error);
+	}
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
@@ -2417,6 +2480,7 @@ truncate(p, uap)
 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	}
 	vput(vp);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -2440,6 +2504,7 @@ ftruncate(p, uap)
 		syscallarg(off_t) length;
 	} */ *uap;
 {
+	struct mount *mp;
 	struct vattr vattr;
 	struct vnode *vp;
 	struct file *fp;
@@ -2452,6 +2517,8 @@ ftruncate(p, uap)
 	if ((fp->f_flag & FWRITE) == 0)
 		return (EINVAL);
 	vp = (struct vnode *)fp->f_data;
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	if (vp->v_type == VDIR)
@@ -2462,6 +2529,7 @@ ftruncate(p, uap)
 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
 	}
 	VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -2541,13 +2609,16 @@ fsync(p, uap)
 		syscallarg(int) fd;
 	} */ *uap;
 {
-	register struct vnode *vp;
+	struct vnode *vp;
+	struct mount *mp;
 	struct file *fp;
 	int error;
 
 	if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
 		return (error);
 	vp = (struct vnode *)fp->f_data;
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	if (vp->v_object)
 		vm_object_page_clean(vp->v_object, 0, 0, 0);
@@ -2558,6 +2629,7 @@ fsync(p, uap)
 #endif
 
 	VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -2580,7 +2652,8 @@ rename(p, uap)
 		syscallarg(char *) to;
 	} */ *uap;
 {
-	register struct vnode *tvp, *fvp, *tdvp;
+	struct mount *mp;
+	struct vnode *tvp, *fvp, *tdvp;
 	struct nameidata fromnd, tond;
 	int error;
 
@@ -2590,6 +2663,12 @@ rename(p, uap)
 	if ((error = namei(&fromnd)) != 0)
 		return (error);
 	fvp = fromnd.ni_vp;
+	if ((error = vn_start_write(fvp, &mp, V_WAIT | PCATCH)) != 0) {
+		NDFREE(&fromnd, NDF_ONLY_PNBUF);
+		vrele(fromnd.ni_dvp);
+		vrele(fvp);
+		goto out1;
+	}
 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ,
 	    UIO_USERSPACE, SCARG(uap, to), p);
 	if (fromnd.ni_vp->v_type == VDIR)
@@ -2652,6 +2731,7 @@ out:
 		vrele(fvp);
 	}
 	vrele(tond.ni_startdir);
+	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
 	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
 	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
@@ -2682,11 +2762,13 @@ mkdir(p, uap)
 		syscallarg(int) mode;
 	} */ *uap;
 {
-	register struct vnode *vp;
+	struct mount *mp;
+	struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
+restart:
 	bwillwrite();
 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
 	nd.ni_cnd.cn_flags |= WILLBEDIR;
@@ -2695,13 +2777,17 @@ mkdir(p, uap)
 	vp = nd.ni_vp;
 	if (vp != NULL) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
-		if (nd.ni_dvp == vp)
-			vrele(nd.ni_dvp);
-		else
-			vput(nd.ni_dvp);
 		vrele(vp);
+		vput(nd.ni_dvp);
 		return (EEXIST);
 	}
+	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vput(nd.ni_dvp);
+		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
+	}
 	VATTR_NULL(&vattr);
 	vattr.va_type = VDIR;
 	vattr.va_mode = (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_fd->fd_cmask;
@@ -2711,6 +2797,7 @@ mkdir(p, uap)
 	vput(nd.ni_dvp);
 	if (!error)
 		vput(nd.ni_vp);
+	vn_finished_write(mp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
 	return (error);
@@ -2732,10 +2819,12 @@ rmdir(p, uap)
 		syscallarg(char *) path;
 	} */ *uap;
 {
-	register struct vnode *vp;
+	struct mount *mp;
+	struct vnode *vp;
 	int error;
 	struct nameidata nd;
 
+restart:
 	bwillwrite();
 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
 	    SCARG(uap, path), p);
@@ -2756,21 +2845,32 @@ rmdir(p, uap)
 	/*
 	 * The root of a mounted filesystem cannot be deleted.
 	 */
-	if (vp->v_flag & VROOT)
+	if (vp->v_flag & VROOT) {
 		error = EBUSY;
-	else {
-		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
-		VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
-		error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+		goto out;
 	}
+	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		if (nd.ni_dvp == vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		vput(vp);
+		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
+	}
+	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
+	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+	vn_finished_write(mp);
 out:
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	if (nd.ni_dvp == vp)
 		vrele(nd.ni_dvp);
 	else
 		vput(nd.ni_dvp);
-	if (vp != NULLVP)
-		vput(vp);
+	vput(vp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
 	return (error);
@@ -3049,7 +3149,8 @@ revoke(p, uap)
 		syscallarg(char *) path;
 	} */ *uap;
 {
-	register struct vnode *vp;
+	struct mount *mp;
+	struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
@@ -3068,8 +3169,11 @@ revoke(p, uap)
 	if (p->p_ucred->cr_uid != vattr.va_uid &&
 	    (error = suser_xxx(0, p, PRISON_ROOT)))
 		goto out;
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		goto out;
 	if (vcount(vp) > 1)
 		VOP_REVOKE(vp, REVOKEALL);
+	vn_finished_write(mp);
 out:
 	vrele(vp);
 	return (error);
@@ -3228,11 +3332,16 @@ fhopen(p, uap)
 	}
 	if (fmode & O_TRUNC) {
 		VOP_UNLOCK(vp, 0, p);				/* XXX */
+		if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
+			vrele(vp);
+			return (error);
+		}
 		VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);	/* XXX */
 		VATTR_NULL(vap);
 		vap->va_size = 0;
 		error = VOP_SETATTR(vp, vap, p->p_ucred, p);
+		vn_finished_write(mp);
 		if (error)
 			goto bad;
 	}
@@ -3407,10 +3516,15 @@ extattrctl(p, uap)
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if ((error = namei(&nd)) != 0)
 		return (error);
-	mp = nd.ni_vp->v_mount;
+	error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
 	NDFREE(&nd, 0);
-	return (VFS_EXTATTRCTL(mp, SCARG(uap, cmd), SCARG(uap, attrname),
-	    SCARG(uap, arg), p));
+	vrele(nd.ni_vp);
+	if (error)
+		return (error);
+	error = VFS_EXTATTRCTL(mp, SCARG(uap, cmd), SCARG(uap, attrname),
+	    SCARG(uap, arg), p);
+	vn_finished_write(mp);
+	return (error);
 }
 
 /*
@@ -3425,6 +3539,7 @@ extattr_set_file(p, uap)
 	struct extattr_set_file_args *uap;
 {
 	struct nameidata nd;
+	struct mount *mp;
 	struct uio auio;
 	struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV];
 	char attrname[EXTATTR_MAXNAMELEN];
@@ -3434,10 +3549,11 @@ extattr_set_file(p, uap)
 	error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN);
 	if (error)
 		return (error);
-	NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW, UIO_USERSPACE, SCARG(uap, path),
-	    p);
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if ((error = namei(&nd)) != 0)
 		return(error);
+	if ((error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH)) != 0)
+		goto done;
 	iovlen = uap->iovcnt * sizeof(struct iovec);
 	if (uap->iovcnt > UIO_SMALLIOV) {
 		if (uap->iovcnt > UIO_MAXIOV) {
@@ -3477,6 +3593,8 @@ done:
 	if (needfree)
 		FREE(needfree, M_IOV);
 	NDFREE(&nd, 0);
+	vrele(nd.ni_vp);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -3508,6 +3626,7 @@ extattr_get_file(p, uap)
 	if (uap->iovcnt > UIO_SMALLIOV) {
 		if (uap->iovcnt > UIO_MAXIOV) {
 			NDFREE(&nd, 0);
+			vrele(nd.ni_vp);
 			return (EINVAL);
 		}
 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
@@ -3545,6 +3664,7 @@ done:
 	if (needfree)
 		FREE(needfree, M_IOV);
 	NDFREE(&nd, 0);
+	vrele(nd.ni_vp);
 	return(error);
 }
 
@@ -3557,6 +3677,7 @@ extattr_delete_file(p, uap)
 	struct proc *p;
 	struct extattr_delete_file_args *uap;
 {
+	struct mount *mp;
 	struct nameidata nd;
 	char attrname[EXTATTR_MAXNAMELEN];
 	int	error;
@@ -3564,12 +3685,17 @@ extattr_delete_file(p, uap)
 	error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN);
 	if (error)
 		return(error);
-	NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW, UIO_USERSPACE, SCARG(uap, path),
-	    p);
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if ((error = namei(&nd)) != 0)
 		return(error);
+	if ((error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH)) != 0) {
+		vrele(nd.ni_vp);
+		return (error);
+	}
 	error = VOP_SETEXTATTR(nd.ni_vp, attrname, NULL, p->p_cred->pc_ucred,
 	    p);
 	NDFREE(&nd, 0);
+	vrele(nd.ni_vp);
+	vn_finished_write(mp);
 	return(error);
 }
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index 0d0dc24..0708f7c 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -103,12 +103,14 @@ vn_open(ndp, flagp, cmode)
 	int *flagp, cmode;
 {
 	struct vnode *vp;
+	struct mount *mp;
 	struct proc *p = ndp->ni_cnd.cn_proc;
 	struct ucred *cred = p->p_ucred;
 	struct vattr vat;
 	struct vattr *vap = &vat;
 	int mode, fmode, error;
 
+restart:
 	fmode = *flagp;
 	if (fmode & O_CREAT) {
 		ndp->ni_cnd.cn_nameiop = CREATE;
@@ -124,10 +126,19 @@ vn_open(ndp, flagp, cmode)
 			vap->va_mode = cmode;
 			if (fmode & O_EXCL)
 				vap->va_vaflags |= VA_EXCLUSIVE;
+			if (vn_start_write(ndp->ni_dvp, &mp, V_NOWAIT) != 0) {
+				NDFREE(ndp, NDF_ONLY_PNBUF);
+				vput(ndp->ni_dvp);
+				if ((error = vn_start_write(NULL, &mp,
+				    V_XSLEEP | PCATCH)) != 0)
+					return (error);
+				goto restart;
+			}
 			VOP_LEASE(ndp->ni_dvp, p, cred, LEASE_WRITE);
 			error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
 					   &ndp->ni_cnd, vap);
 			vput(ndp->ni_dvp);
+			vn_finished_write(mp);
 			if (error) {
 				NDFREE(ndp, NDF_ONLY_PNBUF);
 				return (error);
@@ -293,10 +304,17 @@ vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p)
 {
 	struct uio auio;
 	struct iovec aiov;
+	struct mount *mp;
 	int error;
 
-	if ((ioflg & IO_NODELOCKED) == 0)
+	if ((ioflg & IO_NODELOCKED) == 0) {
+		mp = NULL;
+		if (rw == UIO_WRITE &&
+		    vp->v_type != VCHR && vp->v_type != VBLK &&
+		    (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+			return (error);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+	}
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	aiov.iov_base = base;
@@ -316,8 +334,10 @@ vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p)
 	else
 		if (auio.uio_resid && error == 0)
 			error = EIO;
-	if ((ioflg & IO_NODELOCKED) == 0)
+	if ((ioflg & IO_NODELOCKED) == 0) {
+		vn_finished_write(mp);
 		VOP_UNLOCK(vp, 0, p);
+	}
 	return (error);
 }
 
@@ -368,6 +388,7 @@ vn_write(fp, uio, cred, flags, p)
 	int flags;
 {
 	struct vnode *vp;
+	struct mount *mp;
 	int error, ioflag;
 
 	KASSERT(uio->uio_procp == p, ("uio_procp %p is not p %p",
@@ -384,6 +405,10 @@ vn_write(fp, uio, cred, flags, p)
 	if ((fp->f_flag & O_FSYNC) ||
 	    (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
 		ioflag |= IO_SYNC;
+	mp = NULL;
+	if (vp->v_type != VCHR && vp->v_type != VBLK &&
+	    (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
 	VOP_LEASE(vp, p, cred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	if ((flags & FOF_OFFSET) == 0)
@@ -394,6 +419,7 @@ vn_write(fp, uio, cred, flags, p)
 		fp->f_offset = uio->uio_offset;
 	fp->f_nextoff = uio->uio_offset;
 	VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -649,6 +675,140 @@ vn_closefile(fp, p)
 		fp->f_cred, p));
 }
 
+/*
+ * Preparing to start a filesystem write operation. If the operation is
+ * permitted, then we bump the count of operations in progress and
+ * proceed. If a suspend request is in progress, we wait until the
+ * suspension is over, and then proceed.
+ */
+int
+vn_start_write(vp, mpp, flags)
+	struct vnode *vp;
+	struct mount **mpp;
+	int flags;
+{
+	struct mount *mp;
+	int error;
+
+	/*
+	 * If a vnode is provided, get and return the mount point that
+	 * to which it will write.
+	 */
+	if (vp != NULL) {
+		if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) {
+			*mpp = NULL;
+			if (error != EOPNOTSUPP)
+				return (error);
+			return (0);
+		}
+	}
+	if ((mp = *mpp) == NULL)
+		return (0);
+	/*
+	 * Check on status of suspension.
+	 */
+	while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
+		if (flags & V_NOWAIT)
+			return (EWOULDBLOCK);
+		error = tsleep(&mp->mnt_flag, (PUSER - 1) | (flags & PCATCH),
+		    "suspfs", 0);
+		if (error)
+			return (error);
+	}
+	if (flags & V_XSLEEP)
+		return (0);
+	mp->mnt_writeopcount++;
+	return (0);
+}
+
+/*
+ * Secondary suspension. Used by operations such as vop_inactive
+ * routines that are needed by the higher level functions. These
+ * are allowed to proceed until all the higher level functions have
+ * completed (indicated by mnt_writeopcount dropping to zero). At that
+ * time, these operations are halted until the suspension is over.
+ */
+int
+vn_write_suspend_wait(vp, flags)
+	struct vnode *vp;
+	int flags;
+{
+	struct mount *mp;
+	int error;
+
+	if ((error = VOP_GETWRITEMOUNT(vp, &mp)) != 0) {
+		if (error != EOPNOTSUPP)
+			return (error);
+		return (0);
+	}
+	/*
+	 * If we are not suspended or have not yet reached suspended
+	 * mode, then let the operation proceed.
+	 */
+	if (mp == NULL || (mp->mnt_kern_flag & MNTK_SUSPENDED) == 0)
+		return (0);
+	if (flags & V_NOWAIT)
+		return (EWOULDBLOCK);
+	/*
+	 * Wait for the suspension to finish.
+	 */
+	return (tsleep(&mp->mnt_flag, (PUSER - 1) | (flags & PCATCH),
+	    "suspfs", 0));
+}
+
+/*
+ * Filesystem write operation has completed. If we are suspending and this
+ * operation is the last one, notify the suspender that the suspension is
+ * now in effect.
+ */
+void
+vn_finished_write(mp)
+	struct mount *mp;
+{
+
+	if (mp == NULL)
+		return;
+	mp->mnt_writeopcount--;
+	if (mp->mnt_writeopcount < 0)
+		panic("vn_finished_write: neg cnt");
+	if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 &&
+	    mp->mnt_writeopcount <= 0)
+		wakeup(&mp->mnt_writeopcount);
+}
+
+/*
+ * Request a filesystem to suspend write operations.
+ */
+void
+vfs_write_suspend(mp)
+	struct mount *mp;
+{
+	struct proc *p = curproc;
+
+	if (mp->mnt_kern_flag & MNTK_SUSPEND)
+		return;
+	mp->mnt_kern_flag |= MNTK_SUSPEND;
+	if (mp->mnt_writeopcount > 0)
+		(void) tsleep(&mp->mnt_writeopcount, PUSER - 1, "suspwt", 0);
+	VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p);
+	mp->mnt_kern_flag |= MNTK_SUSPENDED;
+}
+
+/*
+ * Request a filesystem to resume write operations.
+ */
+void
+vfs_write_resume(mp)
+	struct mount *mp;
+{
+
+	if ((mp->mnt_kern_flag & MNTK_SUSPEND) == 0)
+		return;
+	mp->mnt_kern_flag &= ~(MNTK_SUSPEND | MNTK_SUSPENDED);
+	wakeup(&mp->mnt_writeopcount);
+	wakeup(&mp->mnt_flag);
+}
+
 static int
 filt_vnattach(struct knote *kn)
 {
diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src
index 479cc92..bda7e98 100644
--- a/sys/kern/vnode_if.src
+++ b/sys/kern/vnode_if.src
@@ -394,6 +394,22 @@ vop_strategy {
 };
 
 #
+#% getwritemount vp	= = =
+#
+vop_getwritemount {
+	IN struct vnode *vp;
+	OUT struct mount **mpp;
+};
+
+#
+#% copyonwrite  vp	L L L
+#
+vop_copyonwrite {
+	IN struct vnode *vp;
+	IN struct buf *bp;
+};
+
+#
 #% print	vp	= = =
 #
 vop_print {
diff --git a/sys/miscfs/fdesc/fdesc_vnops.c b/sys/miscfs/fdesc/fdesc_vnops.c
index 772a94c..72c7cae 100644
--- a/sys/miscfs/fdesc/fdesc_vnops.c
+++ b/sys/miscfs/fdesc/fdesc_vnops.c
@@ -383,6 +383,8 @@ fdesc_setattr(ap)
 {
 	struct filedesc *fdp = ap->a_p->p_fd;
 	struct vattr *vap = ap->a_vap;
+	struct vnode *vp;
+	struct mount *mp;
 	struct file *fp;
 	unsigned fd;
 	int error;
@@ -403,8 +405,11 @@ fdesc_setattr(ap)
 	switch (fp->f_type) {
 	case DTYPE_FIFO:
 	case DTYPE_VNODE:
-		error = VOP_SETATTR((struct vnode *) fp->f_data, ap->a_vap,
-		    ap->a_cred, ap->a_p);
+		vp = (struct vnode *)fp->f_data;
+		if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+			return (error);
+		error = VOP_SETATTR(vp, ap->a_vap, ap->a_cred, ap->a_p);
+		vn_finished_write(mp);
 		break;
 
 	default:
diff --git a/sys/miscfs/fifofs/fifo_vnops.c b/sys/miscfs/fifofs/fifo_vnops.c
index 5bd13a7..03e3e37 100644
--- a/sys/miscfs/fifofs/fifo_vnops.c
+++ b/sys/miscfs/fifofs/fifo_vnops.c
@@ -107,6 +107,7 @@ static struct vnodeopv_entry_desc fifo_vnodeop_entries[] = {
 	{ &vop_open_desc,		(vop_t *) fifo_open },
 	{ &vop_pathconf_desc,		(vop_t *) fifo_pathconf },
 	{ &vop_poll_desc,		(vop_t *) fifo_poll },
+	{ &vop_getwritemount_desc, 	(vop_t *) vop_stdgetwritemount },
 	{ &vop_print_desc,		(vop_t *) fifo_print },
 	{ &vop_read_desc,		(vop_t *) fifo_read },
 	{ &vop_readdir_desc,		(vop_t *) fifo_badop },
diff --git a/sys/miscfs/specfs/spec_vnops.c b/sys/miscfs/specfs/spec_vnops.c
index cbe52f4..baf40c3 100644
--- a/sys/miscfs/specfs/spec_vnops.c
+++ b/sys/miscfs/specfs/spec_vnops.c
@@ -88,6 +88,7 @@ static struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
 	{ &vop_open_desc,		(vop_t *) spec_open },
 	{ &vop_pathconf_desc,		(vop_t *) vop_stdpathconf },
 	{ &vop_poll_desc,		(vop_t *) spec_poll },
+	{ &vop_getwritemount_desc, 	(vop_t *) vop_stdgetwritemount },
 	{ &vop_print_desc,		(vop_t *) spec_print },
 	{ &vop_read_desc,		(vop_t *) spec_read },
 	{ &vop_readdir_desc,		(vop_t *) vop_panic },
@@ -415,16 +416,29 @@ spec_strategy(ap)
 	struct buf *bp;
 	struct vnode *vp;
 	struct mount *mp;
+	int error;
 
 	bp = ap->a_bp;
-	if ((bp->b_iocmd == BIO_WRITE) && (LIST_FIRST(&bp->b_dep)) != NULL)
-		buf_start(bp);
-
+	vp = ap->a_vp;
+	if ((bp->b_iocmd == BIO_WRITE)) {
+		if (vp->v_mount != NULL &&
+		    (vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
+			panic("spec_strategy: bad I/O");
+		if (LIST_FIRST(&bp->b_dep) != NULL)
+			buf_start(bp);
+		if ((vp->v_flag & VCOPYONWRITE) &&
+		    (error = VOP_COPYONWRITE(vp, bp)) != 0 &&
+		    error != EOPNOTSUPP) {
+			bp->b_io.bio_error = error;
+			bp->b_io.bio_flags |= BIO_ERROR;
+			biodone(&bp->b_io);
+			return (0);
+		}
+	}
 	/*
 	 * Collect statistics on synchronous and asynchronous read
 	 * and write counts for disks that have associated filesystems.
 	 */
-	vp = ap->a_vp;
 	if (vn_isdisk(vp, NULL) && (mp = vp->v_specmountpoint) != NULL) {
 		if (bp->b_iocmd == BIO_WRITE) {
 			if (bp->b_lock.lk_lockholder == LK_KERNPROC)
diff --git a/sys/miscfs/union/union_subr.c b/sys/miscfs/union/union_subr.c
index 6b88bef..d1d6e31 100644
--- a/sys/miscfs/union/union_subr.c
+++ b/sys/miscfs/union/union_subr.c
@@ -747,6 +747,7 @@ union_copyup(un, docopy, cred, p)
 	struct proc *p;
 {
 	int error;
+	struct mount *mp;
 	struct vnode *lvp, *uvp;
 
 	/*
@@ -759,9 +760,12 @@ union_copyup(un, docopy, cred, p)
 	if (error)
 		return (error);
 
-	error = union_vn_create(&uvp, un, p);
-	if (error)
+	if ((error = vn_start_write(un->un_dirvp, &mp, V_WAIT | PCATCH)) != 0)
 		return (error);
+	if ((error = union_vn_create(&uvp, un, p)) != 0) {
+		vn_finished_write(mp);
+		return (error);
+	}
 
 	lvp = un->un_lowervp;
 
@@ -785,6 +789,7 @@ union_copyup(un, docopy, cred, p)
 
 	}
 	VOP_UNLOCK(uvp, 0, p);
+	vn_finished_write(mp);
 	union_newupper(un, uvp);
 	KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount));
 	union_vn_close(uvp, FWRITE, cred, p);
@@ -910,11 +915,15 @@ union_mkshadow(um, dvp, cnp, vpp)
 	struct vattr va;
 	struct proc *p = cnp->cn_proc;
 	struct componentname cn;
+	struct mount *mp;
 
-	error = union_relookup(um, dvp, vpp, cnp, &cn,
-			cnp->cn_nameptr, cnp->cn_namelen);
-	if (error)
+	if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
+	if ((error = union_relookup(um, dvp, vpp, cnp, &cn,
+			cnp->cn_nameptr, cnp->cn_namelen)) != 0) {
+		vn_finished_write(mp);
 		return (error);
+	}
 
 	if (*vpp) {
 		if (cn.cn_flags & HASBUF) {
@@ -925,6 +934,7 @@ union_mkshadow(um, dvp, cnp, vpp)
 			vrele(*vpp);
 		else
 			vput(*vpp);
+		vn_finished_write(mp);
 		*vpp = NULLVP;
 		return (EEXIST);
 	}
@@ -950,6 +960,7 @@ union_mkshadow(um, dvp, cnp, vpp)
 		cn.cn_flags &= ~HASBUF;
 	}
 	/*vput(dvp);*/
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -973,10 +984,15 @@ union_mkwhiteout(um, dvp, cnp, path)
 	struct proc *p = cnp->cn_proc;
 	struct vnode *wvp;
 	struct componentname cn;
+	struct mount *mp;
 
+	if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)) != 0)
+		return (error);
 	error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path));
-	if (error)
+	if (error) {
+		vn_finished_write(mp);
 		return (error);
+	}
 
 	if (wvp) {
 		if (cn.cn_flags & HASBUF) {
@@ -987,6 +1003,7 @@ union_mkwhiteout(um, dvp, cnp, path)
 			vrele(wvp);
 		else
 			vput(wvp);
+		vn_finished_write(mp);
 		return (EEXIST);
 	}
 
@@ -998,6 +1015,7 @@ union_mkwhiteout(um, dvp, cnp, path)
 		zfree(namei_zone, cn.cn_pnbuf);
 		cn.cn_flags &= ~HASBUF;
 	}
+	vn_finished_write(mp);
 	return (error);
 }
 
diff --git a/sys/miscfs/union/union_vnops.c b/sys/miscfs/union/union_vnops.c
index 1c5ed5d..d7b95f3 100644
--- a/sys/miscfs/union/union_vnops.c
+++ b/sys/miscfs/union/union_vnops.c
@@ -93,6 +93,7 @@ static int	union_print __P((struct vop_print_args *ap));
 static int	union_read __P((struct vop_read_args *ap));
 static int	union_readdir __P((struct vop_readdir_args *ap));
 static int	union_readlink __P((struct vop_readlink_args *ap));
+static int	union_getwritemount __P((struct vop_getwritemount_args *ap));
 static int	union_reclaim __P((struct vop_reclaim_args *ap));
 static int	union_remove __P((struct vop_remove_args *ap));
 static int	union_rename __P((struct vop_rename_args *ap));
@@ -1681,6 +1682,20 @@ union_readlink(ap)
 	return (error);
 }
 
+static int
+union_getwritemount(ap)
+	struct vop_getwritemount_args /* {
+		struct vnode *a_vp;
+		struct mount **a_mpp;
+	} */ *ap;
+{
+	struct vnode *vp = UPPERVP(ap->a_vp);
+
+	if (vp == NULL)
+		panic("union: missing upper layer in getwritemount");
+	return(VOP_GETWRITEMOUNT(vp, ap->a_mpp));
+}
+
 /*
  *	union_inactive:
  *
@@ -1963,6 +1978,7 @@ static struct vnodeopv_entry_desc union_vnodeop_entries[] = {
 	{ &vop_read_desc,		(vop_t *) union_read },
 	{ &vop_readdir_desc,		(vop_t *) union_readdir },
 	{ &vop_readlink_desc,		(vop_t *) union_readlink },
+	{ &vop_getwritemount_desc,	(vop_t *) union_getwritemount },
 	{ &vop_reclaim_desc,		(vop_t *) union_reclaim },
 	{ &vop_remove_desc,		(vop_t *) union_remove },
 	{ &vop_rename_desc,		(vop_t *) union_rename },
diff --git a/sys/nfs/nfs_serv.c b/sys/nfs/nfs_serv.c
index 06ce9ed..0334f74 100644
--- a/sys/nfs/nfs_serv.c
+++ b/sys/nfs/nfs_serv.c
@@ -325,10 +325,18 @@ nfsrv_setattr(nfsd, slp, procp, mrq)
 	struct mbuf *mb, *mb2, *mreq;
 	u_quad_t frev;
 	struct timespec guard;
+	struct mount *mp = NULL;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	VATTR_NULL(vap);
 	if (v3) {
 		nfsm_srvsattr(vap);
@@ -440,6 +448,7 @@ out:
 nfsmout:
 	if (vp)
 		vput(vp);
+	vn_finished_write(mp);
 	return(error);
 }
 
@@ -1039,6 +1048,7 @@ nfsrv_write(nfsd, slp, procp, mrq)
 	struct uio io, *uiop = &io;
 	off_t off;
 	u_quad_t frev;
+	struct mount *mntp = NULL;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	if (mrep == NULL) {
@@ -1048,6 +1058,13 @@ nfsrv_write(nfsd, slp, procp, mrq)
 	}
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mntp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mntp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mntp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	if (v3) {
 		nfsm_dissect(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 		off = fxdr_hyper(tl);
@@ -1205,6 +1222,7 @@ nfsrv_write(nfsd, slp, procp, mrq)
 nfsmout:
 	if (vp)
 		vput(vp);
+	vn_finished_write(mntp);
 	return(error);
 }
 
@@ -1241,6 +1259,7 @@ nfsrv_writegather(ndp, slp, procp, mrq)
 	struct vnode *vp = NULL;
 	struct uio io, *uiop = &io;
 	u_quad_t frev, cur_usec;
+	struct mount *mntp = NULL;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 #ifndef nolint
@@ -1444,8 +1463,16 @@ loop1:
 			mp = mp->m_next;
 		    }
 		    if (!error) {
+			if (vn_start_write(vp, &mntp, V_NOWAIT) != 0) {
+			    VOP_UNLOCK(vp, 0, procp);
+			    error = vn_start_write(NULL, &mntp, V_WAIT);
+			    vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, procp);
+			}
+		    }
+		    if (!error) {
 			error = VOP_WRITE(vp, uiop, ioflags, cred);
 			nfsstats.srvvop_writes++;
+			vn_finished_write(mntp);
 		    }
 		    FREE((caddr_t)iov, M_TEMP);
 		}
@@ -1620,6 +1647,8 @@ nfsrv_create(nfsd, slp, procp, mrq)
 	fhandle_t *fhp;
 	u_quad_t frev, tempsize;
 	u_char cverf[NFSX_V3CREATEVERF];
+	struct mount *mp = NULL;
+	struct vnode *vp;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 #ifndef nolint
@@ -1629,6 +1658,12 @@ nfsrv_create(nfsd, slp, procp, mrq)
 
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
 	nfsm_srvnamesiz(len);
 
 	nd.ni_cnd.cn_cred = cred;
@@ -1869,6 +1904,7 @@ nfsmout:
 	}
 	if (nd.ni_vp)
 		vput(nd.ni_vp);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -1901,12 +1937,20 @@ nfsrv_mknod(nfsd, slp, procp, mrq)
 	nfsfh_t nfh;
 	fhandle_t *fhp;
 	u_quad_t frev;
+	struct mount *mp = NULL;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	ndclear(&nd);
 
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	nfsm_srvnamesiz(len);
 
 	nd.ni_cnd.cn_cred = cred;
@@ -2030,6 +2074,7 @@ out:
 		nfsm_srvpostop_attr(0, vap);
 	}
 	nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
+	vn_finished_write(mp);
 	return (0);
 nfsmout:
 	if (dirp)
@@ -2045,6 +2090,7 @@ nfsmout:
 	}
 	if (nd.ni_vp)
 		vput(nd.ni_vp);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -2075,12 +2121,21 @@ nfsrv_remove(nfsd, slp, procp, mrq)
 	nfsfh_t nfh;
 	fhandle_t *fhp;
 	u_quad_t frev;
+	struct mount *mp = NULL;
+	struct vnode *vp;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	ndclear(&nd);
 
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	nfsm_srvnamesiz(len);
 
 	nd.ni_cnd.cn_cred = cred;
@@ -2137,6 +2192,7 @@ nfsmout:
 	}
 	if (nd.ni_vp)
 		vput(nd.ni_vp);
+	vn_finished_write(mp);
 	return(error);
 }
 
@@ -2170,6 +2226,8 @@ nfsrv_rename(nfsd, slp, procp, mrq)
 	fhandle_t *ffhp, *tfhp;
 	u_quad_t frev;
 	uid_t saved_uid;
+	struct mount *mp = NULL;
+	struct vnode *vp;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 #ifndef nolint
@@ -2186,6 +2244,13 @@ nfsrv_rename(nfsd, slp, procp, mrq)
 	ndclear(&tond);
 
 	nfsm_srvmtofh(ffhp);
+	if ((mp = vfs_getvfs(&ffhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &ffhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	nfsm_srvnamesiz(len);
 	/*
 	 * Remember our original uid so that we can reset cr_uid before
@@ -2360,6 +2425,7 @@ nfsmout:
 	if (fromnd.ni_vp)
 		vrele(fromnd.ni_vp);
 
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -2390,6 +2456,7 @@ nfsrv_link(nfsd, slp, procp, mrq)
 	nfsfh_t nfh, dnfh;
 	fhandle_t *fhp, *dfhp;
 	u_quad_t frev;
+	struct mount *mp = NULL;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	ndclear(&nd);
@@ -2397,6 +2464,13 @@ nfsrv_link(nfsd, slp, procp, mrq)
 	fhp = &nfh.fh_generic;
 	dfhp = &dnfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	nfsm_srvmtofh(dfhp);
 	nfsm_srvnamesiz(len);
 
@@ -2475,6 +2549,7 @@ nfsmout:
 	}
 	if (nd.ni_vp)
 		vrele(nd.ni_vp);
+	vn_finished_write(mp);
 	return(error);
 }
 
@@ -2508,12 +2583,21 @@ nfsrv_symlink(nfsd, slp, procp, mrq)
 	nfsfh_t nfh;
 	fhandle_t *fhp;
 	u_quad_t frev;
+	struct mount *mp = NULL;
+	struct vnode *vp;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	ndclear(&nd);
 
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	nfsm_srvnamesiz(len);
 	nd.ni_cnd.cn_cred = cred;
 	nd.ni_cnd.cn_nameiop = CREATE;
@@ -2651,6 +2735,7 @@ nfsmout:
 	if (pathcp)
 		FREE(pathcp, M_TEMP);
 
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -2685,12 +2770,21 @@ nfsrv_mkdir(nfsd, slp, procp, mrq)
 	nfsfh_t nfh;
 	fhandle_t *fhp;
 	u_quad_t frev;
+	struct mount *mp = NULL;
+	struct vnode *vp;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	ndclear(&nd);
 
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	nfsm_srvnamesiz(len);
 	nd.ni_cnd.cn_cred = cred;
 	nd.ni_cnd.cn_nameiop = CREATE;
@@ -2787,6 +2881,7 @@ nfsmout:
 		else
 			vrele(nd.ni_vp);
 	}
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -2817,12 +2912,20 @@ nfsrv_rmdir(nfsd, slp, procp, mrq)
 	fhandle_t *fhp;
 	struct nameidata nd;
 	u_quad_t frev;
+	struct mount *mp = NULL;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	ndclear(&nd);
 
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	nfsm_srvnamesiz(len);
 	nd.ni_cnd.cn_cred = cred;
 	nd.ni_cnd.cn_nameiop = DELETE;
@@ -2895,6 +2998,7 @@ nfsmout:
 	if (nd.ni_vp)
 		vput(nd.ni_vp);
 
+	vn_finished_write(mp);
 	return(error);
 }
 
@@ -3588,6 +3692,7 @@ nfsrv_commit(nfsd, slp, procp, mrq)
 	char *cp2;
 	struct mbuf *mb, *mb2, *mreq;
 	u_quad_t frev, off;
+	struct mount *mp = NULL;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 #ifndef nolint
@@ -3595,6 +3700,13 @@ nfsrv_commit(nfsd, slp, procp, mrq)
 #endif
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 
 	/*
@@ -3697,6 +3809,7 @@ nfsrv_commit(nfsd, slp, procp, mrq)
 nfsmout:
 	if (vp)
 		vput(vp);
+	vn_finished_write(mp);
 	return(error);
 }
 
@@ -4065,4 +4178,3 @@ nfsrv_access(vp, flags, cred, rdonly, p, override)
 	return error;
 }
 #endif /* NFS_NOSERVER */
-
diff --git a/sys/nfsserver/nfs_serv.c b/sys/nfsserver/nfs_serv.c
index 06ce9ed..0334f74 100644
--- a/sys/nfsserver/nfs_serv.c
+++ b/sys/nfsserver/nfs_serv.c
@@ -325,10 +325,18 @@ nfsrv_setattr(nfsd, slp, procp, mrq)
 	struct mbuf *mb, *mb2, *mreq;
 	u_quad_t frev;
 	struct timespec guard;
+	struct mount *mp = NULL;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	VATTR_NULL(vap);
 	if (v3) {
 		nfsm_srvsattr(vap);
@@ -440,6 +448,7 @@ out:
 nfsmout:
 	if (vp)
 		vput(vp);
+	vn_finished_write(mp);
 	return(error);
 }
 
@@ -1039,6 +1048,7 @@ nfsrv_write(nfsd, slp, procp, mrq)
 	struct uio io, *uiop = &io;
 	off_t off;
 	u_quad_t frev;
+	struct mount *mntp = NULL;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	if (mrep == NULL) {
@@ -1048,6 +1058,13 @@ nfsrv_write(nfsd, slp, procp, mrq)
 	}
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mntp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mntp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mntp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	if (v3) {
 		nfsm_dissect(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 		off = fxdr_hyper(tl);
@@ -1205,6 +1222,7 @@ nfsrv_write(nfsd, slp, procp, mrq)
 nfsmout:
 	if (vp)
 		vput(vp);
+	vn_finished_write(mntp);
 	return(error);
 }
 
@@ -1241,6 +1259,7 @@ nfsrv_writegather(ndp, slp, procp, mrq)
 	struct vnode *vp = NULL;
 	struct uio io, *uiop = &io;
 	u_quad_t frev, cur_usec;
+	struct mount *mntp = NULL;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 #ifndef nolint
@@ -1444,8 +1463,16 @@ loop1:
 			mp = mp->m_next;
 		    }
 		    if (!error) {
+			if (vn_start_write(vp, &mntp, V_NOWAIT) != 0) {
+			    VOP_UNLOCK(vp, 0, procp);
+			    error = vn_start_write(NULL, &mntp, V_WAIT);
+			    vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, procp);
+			}
+		    }
+		    if (!error) {
 			error = VOP_WRITE(vp, uiop, ioflags, cred);
 			nfsstats.srvvop_writes++;
+			vn_finished_write(mntp);
 		    }
 		    FREE((caddr_t)iov, M_TEMP);
 		}
@@ -1620,6 +1647,8 @@ nfsrv_create(nfsd, slp, procp, mrq)
 	fhandle_t *fhp;
 	u_quad_t frev, tempsize;
 	u_char cverf[NFSX_V3CREATEVERF];
+	struct mount *mp = NULL;
+	struct vnode *vp;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 #ifndef nolint
@@ -1629,6 +1658,12 @@ nfsrv_create(nfsd, slp, procp, mrq)
 
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
 	nfsm_srvnamesiz(len);
 
 	nd.ni_cnd.cn_cred = cred;
@@ -1869,6 +1904,7 @@ nfsmout:
 	}
 	if (nd.ni_vp)
 		vput(nd.ni_vp);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -1901,12 +1937,20 @@ nfsrv_mknod(nfsd, slp, procp, mrq)
 	nfsfh_t nfh;
 	fhandle_t *fhp;
 	u_quad_t frev;
+	struct mount *mp = NULL;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	ndclear(&nd);
 
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	nfsm_srvnamesiz(len);
 
 	nd.ni_cnd.cn_cred = cred;
@@ -2030,6 +2074,7 @@ out:
 		nfsm_srvpostop_attr(0, vap);
 	}
 	nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
+	vn_finished_write(mp);
 	return (0);
 nfsmout:
 	if (dirp)
@@ -2045,6 +2090,7 @@ nfsmout:
 	}
 	if (nd.ni_vp)
 		vput(nd.ni_vp);
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -2075,12 +2121,21 @@ nfsrv_remove(nfsd, slp, procp, mrq)
 	nfsfh_t nfh;
 	fhandle_t *fhp;
 	u_quad_t frev;
+	struct mount *mp = NULL;
+	struct vnode *vp;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	ndclear(&nd);
 
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	nfsm_srvnamesiz(len);
 
 	nd.ni_cnd.cn_cred = cred;
@@ -2137,6 +2192,7 @@ nfsmout:
 	}
 	if (nd.ni_vp)
 		vput(nd.ni_vp);
+	vn_finished_write(mp);
 	return(error);
 }
 
@@ -2170,6 +2226,8 @@ nfsrv_rename(nfsd, slp, procp, mrq)
 	fhandle_t *ffhp, *tfhp;
 	u_quad_t frev;
 	uid_t saved_uid;
+	struct mount *mp = NULL;
+	struct vnode *vp;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 #ifndef nolint
@@ -2186,6 +2244,13 @@ nfsrv_rename(nfsd, slp, procp, mrq)
 	ndclear(&tond);
 
 	nfsm_srvmtofh(ffhp);
+	if ((mp = vfs_getvfs(&ffhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &ffhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	nfsm_srvnamesiz(len);
 	/*
 	 * Remember our original uid so that we can reset cr_uid before
@@ -2360,6 +2425,7 @@ nfsmout:
 	if (fromnd.ni_vp)
 		vrele(fromnd.ni_vp);
 
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -2390,6 +2456,7 @@ nfsrv_link(nfsd, slp, procp, mrq)
 	nfsfh_t nfh, dnfh;
 	fhandle_t *fhp, *dfhp;
 	u_quad_t frev;
+	struct mount *mp = NULL;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	ndclear(&nd);
@@ -2397,6 +2464,13 @@ nfsrv_link(nfsd, slp, procp, mrq)
 	fhp = &nfh.fh_generic;
 	dfhp = &dnfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	nfsm_srvmtofh(dfhp);
 	nfsm_srvnamesiz(len);
 
@@ -2475,6 +2549,7 @@ nfsmout:
 	}
 	if (nd.ni_vp)
 		vrele(nd.ni_vp);
+	vn_finished_write(mp);
 	return(error);
 }
 
@@ -2508,12 +2583,21 @@ nfsrv_symlink(nfsd, slp, procp, mrq)
 	nfsfh_t nfh;
 	fhandle_t *fhp;
 	u_quad_t frev;
+	struct mount *mp = NULL;
+	struct vnode *vp;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	ndclear(&nd);
 
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	nfsm_srvnamesiz(len);
 	nd.ni_cnd.cn_cred = cred;
 	nd.ni_cnd.cn_nameiop = CREATE;
@@ -2651,6 +2735,7 @@ nfsmout:
 	if (pathcp)
 		FREE(pathcp, M_TEMP);
 
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -2685,12 +2770,21 @@ nfsrv_mkdir(nfsd, slp, procp, mrq)
 	nfsfh_t nfh;
 	fhandle_t *fhp;
 	u_quad_t frev;
+	struct mount *mp = NULL;
+	struct vnode *vp;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	ndclear(&nd);
 
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	nfsm_srvnamesiz(len);
 	nd.ni_cnd.cn_cred = cred;
 	nd.ni_cnd.cn_nameiop = CREATE;
@@ -2787,6 +2881,7 @@ nfsmout:
 		else
 			vrele(nd.ni_vp);
 	}
+	vn_finished_write(mp);
 	return (error);
 }
 
@@ -2817,12 +2912,20 @@ nfsrv_rmdir(nfsd, slp, procp, mrq)
 	fhandle_t *fhp;
 	struct nameidata nd;
 	u_quad_t frev;
+	struct mount *mp = NULL;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 	ndclear(&nd);
 
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	nfsm_srvnamesiz(len);
 	nd.ni_cnd.cn_cred = cred;
 	nd.ni_cnd.cn_nameiop = DELETE;
@@ -2895,6 +2998,7 @@ nfsmout:
 	if (nd.ni_vp)
 		vput(nd.ni_vp);
 
+	vn_finished_write(mp);
 	return(error);
 }
 
@@ -3588,6 +3692,7 @@ nfsrv_commit(nfsd, slp, procp, mrq)
 	char *cp2;
 	struct mbuf *mb, *mb2, *mreq;
 	u_quad_t frev, off;
+	struct mount *mp = NULL;
 
 	nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
 #ifndef nolint
@@ -3595,6 +3700,13 @@ nfsrv_commit(nfsd, slp, procp, mrq)
 #endif
 	fhp = &nfh.fh_generic;
 	nfsm_srvmtofh(fhp);
+	if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+		return (ESTALE);
+	if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+		return (error);
+	(void) vn_start_write(vp, &mp, V_WAIT);
+	vput(vp);
+	vp = NULL;
 	nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 
 	/*
@@ -3697,6 +3809,7 @@ nfsrv_commit(nfsd, slp, procp, mrq)
 nfsmout:
 	if (vp)
 		vput(vp);
+	vn_finished_write(mp);
 	return(error);
 }
 
@@ -4065,4 +4178,3 @@ nfsrv_access(vp, flags, cred, rdonly, p, override)
 	return error;
 }
 #endif /* NFS_NOSERVER */
-
diff --git a/sys/svr4/svr4_fcntl.c b/sys/svr4/svr4_fcntl.c
index 4040030..c65f345 100644
--- a/sys/svr4/svr4_fcntl.c
+++ b/sys/svr4/svr4_fcntl.c
@@ -247,6 +247,7 @@ fd_revoke(p, fd)
 	struct filedesc *fdp = p->p_fd;
 	struct file *fp;
 	struct vnode *vp;
+	struct mount *mp;
 	struct vattr vattr;
 	int error, *retval;
 
@@ -271,8 +272,11 @@ fd_revoke(p, fd)
 	    (error = suser(p)) != 0)
 		goto out;
 
+	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+		goto out;
 	if (vcount(vp) > 1)
 		VOP_REVOKE(vp, REVOKEALL);
+	vn_finished_write(mp);
 out:
 	vrele(vp);
 	return error;
diff --git a/sys/sys/buf.h b/sys/sys/buf.h
index bc8203f..116e011 100644
--- a/sys/sys/buf.h
+++ b/sys/sys/buf.h
@@ -466,6 +466,7 @@ buf_countdeps(struct buf *bp, int i)
 /* Flags to low-level allocation routines. */
 #define B_CLRBUF	0x01	/* Request allocated buffer be cleared. */
 #define B_SYNC		0x02	/* Do all allocations synchronously. */
+#define	B_METAONLY	0x04	/* Return indirect block buffer. */
 
 #ifdef _KERNEL
 extern int	nbuf;			/* The number of buffer headers */
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index d215351..fb80e5b 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -285,6 +285,7 @@ struct	proc {
 /* Marked a kernel thread */
 #define	P_BUFEXHAUST	0x100000 /* dirty buffers flush is in progress */
 #define	P_KTHREADP	0x200000 /* Process is really a kernel thread */
+#define	P_COWINPROGRESS	0x400000 /* Snapshot copy-on-write in progress */
 
 #define	P_DEADLKTREAT   0x800000 /* lock aquisition - deadlock treatment */
 
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index 5817855..3da7897 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -158,7 +158,7 @@ struct vnode {
 /* open for business    0x00800 */
 /* open for business    0x01000 */
 #define	VOBJBUF		0x02000	/* Allocate buffers in VM object */
-/* open for business    0x04000 */
+#define	VCOPYONWRITE    0x04000 /* vnode is doing copy-on-write */
 #define	VAGE		0x08000	/* Insert vnode at head of free list */
 #define	VOLOCK		0x10000	/* vnode is locked waiting for an object */
 #define	VOWANT		0x20000	/* a process is waiting for VOLOCK */
@@ -246,12 +246,15 @@ extern int		vttoif_tab[];
 /*
  * Flags to various vnode functions.
  */
-#define	SKIPSYSTEM	0x0001		/* vflush: skip vnodes marked VSYSTEM */
-#define	FORCECLOSE	0x0002		/* vflush: force file closure */
-#define	WRITECLOSE	0x0004		/* vflush: only close writable files */
-#define	DOCLOSE		0x0008		/* vclean: close active files */
-#define	V_SAVE		0x0001		/* vinvalbuf: sync file first */
-#define	REVOKEALL	0x0001		/* vop_revoke: revoke all aliases */
+#define	SKIPSYSTEM	0x0001	/* vflush: skip vnodes marked VSYSTEM */
+#define	FORCECLOSE	0x0002	/* vflush: force file closure */
+#define	WRITECLOSE	0x0004	/* vflush: only close writable files */
+#define	DOCLOSE		0x0008	/* vclean: close active files */
+#define	V_SAVE		0x0001	/* vinvalbuf: sync file first */
+#define	REVOKEALL	0x0001	/* vop_revoke: revoke all aliases */
+#define	V_WAIT		0x0001	/* vn_start_write: sleep for suspend */
+#define	V_NOWAIT	0x0002	/* vn_start_write: don't sleep for suspend */
+#define	V_XSLEEP	0x0004	/* vn_start_write: just return after sleep */
 
 #define	VREF(vp)	vref(vp)
 
@@ -572,6 +575,7 @@ int	vrecycle __P((struct vnode *vp, struct simplelock *inter_lkp,
 	    struct proc *p));
 int 	vn_close __P((struct vnode *vp,
 	    int flags, struct ucred *cred, struct proc *p));
+void	vn_finished_write __P((struct mount *mp));
 int	vn_isdisk __P((struct vnode *vp, int *errp));
 int	vn_lock __P((struct vnode *vp, int flags, struct proc *p));
 #ifdef	DEBUG_LOCKS
@@ -587,13 +591,18 @@ int 	vn_rdwr __P((enum uio_rw rw, struct vnode *vp, caddr_t base,
 	    int len, off_t offset, enum uio_seg segflg, int ioflg,
 	    struct ucred *cred, int *aresid, struct proc *p));
 int	vn_stat __P((struct vnode *vp, struct stat *sb, struct proc *p));
+int	vn_start_write __P((struct vnode *vp, struct mount **mpp, int flags));
 dev_t	vn_todev __P((struct vnode *vp));
+int	vn_write_suspend_wait __P((struct vnode *vp, int flags));
+int 	vn_writechk __P((struct vnode *vp));
 int	vfs_cache_lookup __P((struct vop_lookup_args *ap));
 int	vfs_object_create __P((struct vnode *vp, struct proc *p,
                 struct ucred *cred));
 void	vfs_timestamp __P((struct timespec *));
-int 	vn_writechk __P((struct vnode *vp));
+void	vfs_write_resume __P((struct mount *mp));
+void	vfs_write_suspend __P((struct mount *mp));
 int	vop_stdbwrite __P((struct vop_bwrite_args *ap));
+int	vop_stdgetwritemount __P((struct vop_getwritemount_args *));
 int	vop_stdislocked __P((struct vop_islocked_args *));
 int	vop_stdlock __P((struct vop_lock_args *));
 int	vop_stdunlock __P((struct vop_unlock_args *));
diff --git a/sys/ufs/ffs/README.snapshot b/sys/ufs/ffs/README.snapshot
new file mode 100644
index 0000000..f3177c3
--- /dev/null
+++ b/sys/ufs/ffs/README.snapshot
@@ -0,0 +1,112 @@
+$FreeBSD$
+
+Soft Updates Status
+
+As is detailed in the operational information below, snapshots
+are definitely alpha-test code and are NOT yet ready for production
+use. Much remains to be done to make them really useful, but I
+wanted to let folks get a chance to try it out and start reporting
+bugs and other shortcomings. Such reports should be sent to
+Kirk McKusick <mckusick@mckusick.com>.
+
+
+Snapshot Copyright Restrictions
+
+Snapshots have been introduced to FreeBSD with a `Berkeley-style'
+copyright. The file implementing snapshots resides in the sys/ufs/ffs
+directory and is compiled into the generic kernel by default.
+
+
+Using Snapshots
+
+To create a snapshot of your /var filesystem, run the command:
+
+	mount -u -o snapshot /var/snapshot/snap1 /var
+
+This command will take a snapshot of your /var filesystem and
+leave it in the file /var/snapshot/snap1. Note that snapshot
+files must be created in the filesystem that is being snapshotted.
+I use the convention of putting a `snapshot' directory at the
+root of each filesystem into which I can place snapshots.
+You may create up to 20 snapshots per filesystem. Active snapshots
+are recorded in the superblock, so they persist across unmount
+and remount operations and across system reboots. When your
+are done with a snapshot, it can be removed with the `rm'
+command. Snapshots may be removed in any order, however you
+may not get back all the space contained in the snapshot as
+another snapshot may claim some of the blocks that it is releasing. 
+Note that the `schg' flag is set on snapshots to ensure that
+not even the root user can write to them. The unlink command
+makes an exception for snapshot files in that it allows them
+to be removed even though they have the `schg' flag set, so it
+is not necessary to clear the `schg' flag before removing a
+snapshot file.
+
+Once you have taken a snapshot, there are three interesting
+things that you can do with it:
+
+1) Run fsck on the snapshot file. Assuming that the filesystem
+   was clean when it was mounted, you should always get a clean
+   (and unchanging) result from running fsck on the snapshot.
+   If you are running with soft updates and rebooted after a
+   crash without cleaning up the filesystem, then fsck of the
+   snapshot may find missing blocks and inodes or inodes with
+   link counts that are too high. I have not yet added the
+   system calls to allow fsck to add these missing resources
+   back to the filesystem - that will be added once the basic
+   snapshot code is working properly. So, view those reports
+   as informational for now.
+
+2) Run dump on the snapshot. You will get a dump that is
+   consistent with the filesystem as of the timestamp of the
+   snapshot. Note that I have not yet changed dump to set the
+   dumpdates file correctly, so do not use this feature in
+   production until that fix is made.
+
+3) Mount the snapshot as a frozen image of the filesystem.
+   To mount the snapshot /var/snapshot/snap1:
+
+	vnconfig -c vn0c /var/snapshot/snap1
+	mount -r /dev/vn0c /mnt
+
+   You can now cruise around your frozen /var filesystem
+   at /mnt. Everything will be in the same state that it
+   was at the time the snapshot was taken. The one exception
+   is that any earlier snapshots will appear as zero length
+   files. When you are done with the mounted snapshot:
+
+	umount /mnt
+	vnconfig -u vn0c
+
+   Note that under some circumstances, the process accessing
+   the frozen filesystem may deadlock. I am aware of this
+   problem, but the solution is not simple. It requires
+   using buffer read locks rather than exclusive locks when
+   traversing the inode indirect blocks. Until this problem
+   is fixed, you should avoid putting mounted snapshots into
+   production.
+
+
+Performance
+
+It takes about 30 seconds to create a snapshot of an 8Gb filesystem.
+Of that time 25 seconds is spent in preparation; filesystem activity
+is only suspended for the final 5 seconds of that period. Snapshot
+removal of an 8Gb filesystem takes about two minutes. Filesystem
+activity is never suspended during snapshot removal.
+
+The suspend time may be expanded by several minutes if a process
+is in the midst of removing many files as all the soft updates
+backlog must be cleared. Generally snapshots do not slow the system
+down appreciably except when removing many small files (i.e., any
+file less than 96Kb whose last block is a fragment) that are claimed
+by a snapshot. Here, the snapshot code must make a copy of every
+released fragment which slows the rate of file removal to about
+twenty files per second once the soft updates backlog limit is
+reached.
+
+
+How Snapshots Work
+
+For more general information on snapshots, please see:
+	http://www.mckusick.com/softdep/
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index 1f24b2b..5efe0e7 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -186,6 +186,8 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp)
 	*bpp = 0;
 	fs = ip->i_fs;
 #ifdef DIAGNOSTIC
+	if (ITOV(ip)->v_mount->mnt_kern_flag & MNTK_SUSPENDED)
+		panic("ffs_realloccg: allocation on suspended filesystem");
 	if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
 	    (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
 		printf(
@@ -763,6 +765,10 @@ ffs_hashalloc(ip, cg, pref, size, allocator)
 	long result;	/* XXX why not same type as we return? */
 	int i, icg = cg;
 
+#ifdef DIAGNOSTIC
+	if (ITOV(ip)->v_mount->mnt_kern_flag & MNTK_SUSPENDED)
+		panic("ffs_hashalloc: allocation on suspended filesystem");
+#endif
 	fs = ip->i_fs;
 	/*
 	 * 1: preferred cylinder group
@@ -1311,9 +1317,13 @@ ffs_blkfree(ip, bno, size)
 	ufs_daddr_t blkno;
 	int i, error, cg, blk, frags, bbase;
 	u_int8_t *blksfree;
+	struct vnode *vp;
 
 	fs = ip->i_fs;
-	VOP_FREEBLKS(ip->i_devvp, fsbtodb(fs, bno), size);
+#ifdef DIAGNOSTIC
+	if ((vp = ITOV(ip)) != NULL && vp->v_mount != NULL &&
+	    (vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED))
+		panic("ffs_blkfree: deallocation on suspended filesystem");
 	if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 ||
 	    fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) {
 		printf("dev=%s, bno = %ld, bsize = %ld, size = %ld, fs = %s\n",
@@ -1321,6 +1331,11 @@ ffs_blkfree(ip, bno, size)
 		    fs->fs_fsmnt);
 		panic("ffs_blkfree: bad size");
 	}
+#endif
+	if ((ip->i_devvp->v_flag & VCOPYONWRITE) &&
+	    ffs_snapblkfree(ip, bno, size))
+		return;
+	VOP_FREEBLKS(ip->i_devvp, fsbtodb(fs, bno), size);
 	cg = dtog(fs, bno);
 	if ((u_int)bno >= fs->fs_size) {
 		printf("bad block %ld, ino %lu\n",
diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c
index 28cc1ed..92fe379 100644
--- a/sys/ufs/ffs/ffs_balloc.c
+++ b/sys/ufs/ffs/ffs_balloc.c
@@ -125,6 +125,8 @@ ffs_balloc(ap)
 	 * The first NDADDR blocks are direct blocks
 	 */
 	if (lbn < NDADDR) {
+		if (flags & B_METAONLY)
+			panic("ffs_balloc: B_METAONLY for direct block");
 		nb = ip->i_db[lbn];
 		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
 			error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
@@ -289,6 +291,13 @@ ffs_balloc(ap)
 		}
 	}
 	/*
+	 * If asked only for the indirect block, then return it.
+	 */
+	if (flags & B_METAONLY) {
+		*ap->a_bpp = bp;
+		return (0);
+	}
+	/*
 	 * Get the data block, allocating if necessary.
 	 */
 	if (nb == 0) {
diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h
index fe7391b..8e011bb 100644
--- a/sys/ufs/ffs/ffs_extern.h
+++ b/sys/ufs/ffs/ffs_extern.h
@@ -67,6 +67,7 @@ struct vop_balloc_args;
 struct vop_bmap_args;
 struct vop_fsync_args;
 struct vop_reallocblks_args;
+struct vop_copyonwrite_args;
 
 int	ffs_alloc __P((struct inode *,
 	    ufs_daddr_t, ufs_daddr_t, int, struct ucred *, ufs_daddr_t *));
@@ -76,6 +77,7 @@ void	ffs_blkfree __P((struct inode *, ufs_daddr_t, long));
 ufs_daddr_t ffs_blkpref __P((struct inode *, ufs_daddr_t, int, ufs_daddr_t *));
 int	ffs_bmap __P((struct vop_bmap_args *));
 void	ffs_clrblock __P((struct fs *, u_char *, ufs_daddr_t));
+int	ffs_copyonwrite __P((struct vop_copyonwrite_args *ap));
 int	ffs_fhtovp __P((struct mount *, struct fid *, struct vnode **));
 int	ffs_flushfiles __P((struct mount *, int, struct proc *));
 void	ffs_fragacct __P((struct fs *, int, int32_t [], int));
@@ -89,6 +91,10 @@ int	ffs_reallocblks __P((struct vop_reallocblks_args *));
 int	ffs_realloccg __P((struct inode *,
 	    ufs_daddr_t, ufs_daddr_t, int, int, struct ucred *, struct buf **));
 void	ffs_setblock __P((struct fs *, u_char *, ufs_daddr_t));
+int	ffs_snapblkfree __P((struct inode *freeip, ufs_daddr_t bno, long size));
+int	ffs_snapshot __P((struct mount *mp, char *snapfile));
+void	ffs_snapshot_mount __P((struct mount *mp));
+void	ffs_snapshot_unmount __P((struct mount *mp));
 int	ffs_statfs __P((struct mount *, struct statfs *, struct proc *));
 int	ffs_sync __P((struct mount *, int, struct ucred *, struct proc *));
 int	ffs_truncate __P((struct vnode *, off_t, int, struct ucred *, struct proc *));
diff --git a/sys/ufs/ffs/ffs_snapshot.c b/sys/ufs/ffs/ffs_snapshot.c
new file mode 100644
index 0000000..73da537
--- /dev/null
+++ b/sys/ufs/ffs/ffs_snapshot.c
@@ -0,0 +1,1028 @@
+/*
+ * Copyright 2000 Marshall Kirk McKusick. All Rights Reserved.
+ *
+ * Further information about snapshots can be obtained from:
+ *
+ *	Marshall Kirk McKusick		http://www.mckusick.com/softdep/
+ *	1614 Oxford Street		mckusick@mckusick.com
+ *	Berkeley, CA 94709-1608		+1-510-843-9542
+ *	USA
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)ffs_snapshot.c	8.10 (McKusick) 7/11/00
+ * $FreeBSD$
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/namei.h>
+#include <sys/stat.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/resource.h>
+#include <sys/resourcevar.h>
+#include <sys/vnode.h>
+
+#include <ufs/ufs/extattr.h>
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/ffs/fs.h>
+#include <ufs/ffs/ffs_extern.h>
+
+#define KERNCRED proc0.p_ucred
+#define CURPROC curproc
+#define DEBUG
+
+static int indiracct __P((struct vnode *, struct vnode *, int, ufs_daddr_t,
+	int, int, int, int));
+static int snapacct __P((struct vnode *, ufs_daddr_t *, ufs_daddr_t *));
+static int readblock __P((struct buf *, daddr_t));
+
+#ifdef DEBUG
+#include <sys/sysctl.h>
+int snapdebug = 0;
+SYSCTL_INT(_debug, OID_AUTO, snapdebug, CTLFLAG_RW, &snapdebug, 0, "");
+#endif /* DEBUG */
+
+/*
+ * Create a snapshot file and initialize it for the filesystem.
+ */
+int
+ffs_snapshot(mp, snapfile)
+	struct mount *mp;
+	char *snapfile;
+{
+	ufs_daddr_t rlbn;
+	ufs_daddr_t lbn, blkno, copyblkno, inoblks[FSMAXSNAP];
+	int error, cg, snaploc, indiroff, numblks;
+	int i, size, base, len, loc, inoblkcnt;
+	int blksperindir, flag = mp->mnt_flag;
+	struct fs *fs = VFSTOUFS(mp)->um_fs;
+	struct proc *p = CURPROC;
+	struct inode *devip, *ip, *xp;
+	struct buf *bp, *nbp, *ibp;
+	struct vnode *vp, *devvp;
+	struct nameidata nd;
+	struct mount *wrtmp;
+	struct dinode *dip;
+	struct vattr vat;
+	struct cg *cgp;
+
+	/*
+	 * Need to serialize access to snapshot code per filesystem.
+	 */
+	/*
+	 * Assign a snapshot slot in the superblock.
+	 */
+	for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++)
+		if (fs->fs_snapinum[snaploc] == 0)
+			break;
+	if (snaploc == FSMAXSNAP)
+		return (ENOSPC);
+	/*
+	 * Create the snapshot file.
+	 */
+restart:
+	NDINIT(&nd, CREATE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE, snapfile, p);
+	if ((error = namei(&nd)) != 0)
+		return (error);
+	if (nd.ni_vp != NULL) {
+		vput(nd.ni_vp);
+		error = EEXIST;
+	}
+	if (nd.ni_dvp->v_mount != mp)
+		error = EXDEV;
+	if (error) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		if (nd.ni_dvp == nd.ni_vp)
+			vrele(nd.ni_dvp);
+		else
+			vput(nd.ni_dvp);
+		return (error);
+	}
+	VATTR_NULL(&vat);
+	vat.va_type = VREG;
+	vat.va_mode = S_IRUSR;
+	vat.va_vaflags |= VA_EXCLUSIVE;
+	if (VOP_GETWRITEMOUNT(nd.ni_dvp, &wrtmp))
+		wrtmp = NULL;
+	if (wrtmp != mp)
+		panic("ffs_snapshot: mount mismatch");
+	if (vn_start_write(wrtmp, V_NOWAIT) != 0) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vput(nd.ni_dvp);
+		if ((error = vn_start_write(wrtmp, V_XSLEEP | PCATCH)) != 0)
+			return (error);
+		goto restart;
+	}
+	VOP_LEASE(nd.ni_dvp, p, KERNCRED, LEASE_WRITE);
+	error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vat);
+	vput(nd.ni_dvp);
+	if (error) {
+		NDFREE(&nd, NDF_ONLY_PNBUF);
+		vn_finished_write(wrtmp);
+		return (error);
+	}
+	vp = nd.ni_vp;
+	ip = VTOI(vp);
+	devvp = ip->i_devvp;
+	devip = VTOI(devvp);
+	/*
+	 * Allocate and copy the last block contents so as to be able
+	 * to set size to that of the filesystem.
+	 */
+	numblks = howmany(fs->fs_size, fs->fs_frag);
+	error = VOP_BALLOC(vp, lblktosize(fs, (off_t)(numblks - 1)),
+	    fs->fs_bsize, KERNCRED, B_CLRBUF, &bp);
+	if (error)
+		goto out;
+	ip->i_size = lblktosize(fs, (off_t)numblks);
+	ip->i_flag |= IN_CHANGE | IN_UPDATE;
+	if ((error = readblock(bp, numblks - 1)) != 0)
+		goto out;
+	bawrite(bp);
+	/*
+	 * Preallocate critical data structures so that we can copy
+	 * them in without further allocation after we suspend all
+	 * operations on the filesystem. We would like to just release
+	 * the allocated buffers without writing them since they will
+	 * be filled in below once we are ready to go, but this upsets
+	 * the soft update code, so we go ahead and write the new buffers.
+	 *
+	 * Allocate all indirect blocks. Also allocate shadow copies
+	 * for each of the indirect blocks.
+	 */
+	for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) {
+		error = VOP_BALLOC(vp, lblktosize(fs, (off_t)blkno),
+		    fs->fs_bsize, p->p_ucred, B_METAONLY, &ibp);
+		if (error)
+			goto out;
+		copyblkno = fragstoblks(fs, dbtofsb(fs, ibp->b_blkno));
+		bdwrite(ibp);
+		error = VOP_BALLOC(vp, lblktosize(fs, (off_t)copyblkno),
+		    fs->fs_bsize, p->p_ucred, 0, &nbp);
+		if (error)
+			goto out;
+		bawrite(nbp);
+	}
+	/*
+	 * Allocate shadow blocks to copy all of the other snapshot inodes
+	 * so that we will be able to expunge them from this snapshot.
+	 */
+	for (loc = 0, inoblkcnt = 0; loc < snaploc; loc++) {
+		blkno = fragstoblks(fs, ino_to_fsba(fs, fs->fs_snapinum[loc]));
+		for (i = 0; i < inoblkcnt; i++)
+			if (inoblks[i] == blkno)
+				break;
+		if (i == inoblkcnt) {
+			inoblks[inoblkcnt++] = blkno;
+			error = VOP_BALLOC(vp, lblktosize(fs, (off_t)blkno),
+			    fs->fs_bsize, KERNCRED, 0, &nbp);
+			if (error)
+				goto out;
+			bawrite(nbp);
+		}
+	}
+	/*
+	 * Allocate all cylinder group blocks.
+	 */
+	for (cg = 0; cg < fs->fs_ncg; cg++) {
+		error = VOP_BALLOC(vp, (off_t)(cgtod(fs, cg)) << fs->fs_fshift,
+		    fs->fs_bsize, KERNCRED, 0, &nbp);
+		if (error)
+			goto out;
+		bawrite(nbp);
+	}
+	/*
+	 * Allocate copies for the superblock and its summary information.
+	 */
+	error = VOP_BALLOC(vp, (off_t)(SBOFF), fs->fs_bsize, KERNCRED,
+	    0, &nbp);
+	if (error)
+		goto out;
+	bawrite(nbp);
+	blkno = fragstoblks(fs, fs->fs_csaddr);
+	len = howmany(fs->fs_cssize, fs->fs_bsize);
+	for (loc = 0; loc < len; loc++) {
+		error = VOP_BALLOC(vp, lblktosize(fs, (off_t)(blkno + loc)),
+		    fs->fs_bsize, KERNCRED, 0, &nbp);
+		if (error)
+			goto out;
+		bawrite(nbp);
+	}
+	/*
+	 * Change inode to snapshot type file.
+	 */
+	ip->i_flags |= SF_IMMUTABLE | SF_SNAPSHOT;
+	ip->i_flag |= IN_CHANGE | IN_UPDATE;
+	/*
+	 * Ensure that the snapshot is completely on disk.
+	 */
+	if ((error = VOP_FSYNC(vp, KERNCRED, MNT_WAIT, p)) != 0)
+		goto out;
+	/*
+	 * All allocations are done, so we can now snapshot the system.
+	 *
+	 * Suspend operation on filesystem.
+	 */
+	for (;;) {
+		vn_finished_write(wrtmp);
+		vfs_write_suspend(vp->v_mount);
+		if (mp->mnt_kern_flag & MNTK_SUSPENDED)
+			break;
+		vn_start_write(wrtmp, V_WAIT);
+	}
+	/*
+	 * First, copy all the cylinder group maps. All the unallocated
+	 * blocks are marked BLK_NOCOPY so that the snapshot knows that
+	 * it need not copy them if they are later written.
+	 */
+	len = howmany(fs->fs_fpg, fs->fs_frag);
+	for (cg = 0; cg < fs->fs_ncg; cg++) {
+		error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
+			(int)fs->fs_cgsize, KERNCRED, &bp);
+		if (error) {
+			brelse(bp);
+			goto out1;
+		}
+		cgp = (struct cg *)bp->b_data;
+		if (!cg_chkmagic(cgp)) {
+			brelse(bp);
+			error = EIO;
+			goto out1;
+		}
+		error = bread(vp, fragstoblks(fs, cgtod(fs, cg)), fs->fs_bsize,
+			KERNCRED, &nbp);
+		if (error) {
+			brelse(bp);
+			brelse(nbp);
+			goto out1;
+		}
+		bcopy(bp->b_data, nbp->b_data, fs->fs_cgsize);
+		if (fs->fs_cgsize < fs->fs_bsize)
+			bzero(&nbp->b_data[fs->fs_cgsize],
+			    fs->fs_bsize - fs->fs_cgsize);
+		bawrite(nbp);
+		base = cg * fs->fs_fpg / fs->fs_frag;
+		if (base + len > numblks)
+			len = numblks - base;
+		loc = 0;
+		if (base < NDADDR) {
+			for ( ; loc < NDADDR; loc++) {
+				if (!ffs_isblock(fs, cg_blksfree(cgp), loc))
+					continue;
+				ip->i_db[loc] = BLK_NOCOPY;
+			}
+		}
+		error = VOP_BALLOC(vp, lblktosize(fs, (off_t)(base + loc)),
+		    fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
+		if (error) {
+			brelse(bp);
+			goto out1;
+		}
+		indiroff = (base + loc - NDADDR) % NINDIR(fs);
+		for ( ; loc < len; loc++, indiroff++) {
+			if (indiroff >= NINDIR(fs)) {
+				bawrite(ibp);
+				error = VOP_BALLOC(vp,
+				    lblktosize(fs, (off_t)(base + loc)),
+				    fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
+				if (error) {
+					brelse(bp);
+					goto out1;
+				}
+				indiroff = 0;
+			}
+			if (!ffs_isblock(fs, cg_blksfree(cgp), loc))
+				continue;
+			((ufs_daddr_t *)(ibp->b_data))[indiroff] = BLK_NOCOPY;
+		}
+		brelse(bp);
+		bdwrite(ibp);
+	}
+	/*
+	 * Snapshot the superblock and its summary information.
+	 */
+	error = VOP_BALLOC(vp, (off_t)(SBOFF), fs->fs_bsize, KERNCRED,
+	    0, &nbp);
+	if (error)
+		goto out1;
+	bcopy(fs, nbp->b_data, fs->fs_sbsize);
+	((struct fs *)(nbp->b_data))->fs_clean = 1;
+	if (fs->fs_sbsize < fs->fs_bsize)
+		bzero(&nbp->b_data[fs->fs_sbsize],
+		    fs->fs_bsize - fs->fs_sbsize);
+	bawrite(nbp);
+	blkno = fragstoblks(fs, fs->fs_csaddr);
+	len = howmany(fs->fs_cssize, fs->fs_bsize) - 1;
+	size = fs->fs_bsize;
+	for (loc = 0; loc <= len; loc++) {
+		error = VOP_BALLOC(vp, lblktosize(fs, (off_t)(blkno + loc)),
+		    fs->fs_bsize, KERNCRED, 0, &nbp);
+		if (error)
+			goto out1;
+		if (loc == len) {
+			readblock(nbp, blkno + loc);
+			size = fs->fs_cssize % fs->fs_bsize;
+		}
+		bcopy(fs->fs_csp[loc], nbp->b_data, size);
+		bawrite(nbp);
+	}
+	/*
+	 * Copy the shadow blocks for the snapshot inodes so that
+	 * the copies can can be expunged.
+	 */
+	for (loc = 0; loc < inoblkcnt; loc++) {
+		error = VOP_BALLOC(vp, lblktosize(fs, (off_t)inoblks[loc]),
+		    fs->fs_bsize, KERNCRED, 0, &nbp);
+		if (error)
+			goto out1;
+		readblock(nbp, inoblks[loc]);
+		bdwrite(nbp);
+	}
+	/*
+	 * Copy allocation information from other snapshots and then
+	 * expunge them from the view of the current snapshot.
+	 */
+	for (xp = devip->i_copyonwrite; xp; xp = xp->i_copyonwrite) {
+		/*
+		 * Before expunging a snapshot inode, note all the
+		 * blocks that it claims with BLK_SNAP so that fsck will
+		 * be able to account for those blocks properly and so
+		 * that this snapshot knows that it need not copy them
+		 * if the other snapshot holding them is freed.
+		 */
+		if ((error = snapacct(vp, &xp->i_db[0], &xp->i_ib[NIADDR])) !=0)
+			goto out1;
+		blksperindir = 1;
+		lbn = -NDADDR;
+		len = numblks - NDADDR;
+		rlbn = NDADDR;
+		for (i = 0; len > 0 && i < NIADDR; i++) {
+			error = indiracct(vp, ITOV(xp), i, xp->i_ib[i], lbn,
+			    rlbn, len, blksperindir);
+			if (error)
+				goto out1;
+			blksperindir *= NINDIR(fs);
+			lbn -= blksperindir + 1;
+			len -= blksperindir;
+			rlbn += blksperindir;
+		}
+		/*
+		 * Set copied snapshot inode to be a zero length file.
+		 */
+		blkno = fragstoblks(fs, ino_to_fsba(fs, xp->i_number));
+		error = VOP_BALLOC(vp, lblktosize(fs, (off_t)blkno),
+		    fs->fs_bsize, KERNCRED, 0, &nbp);
+		if (error)
+			goto out1;
+		dip = (struct dinode *)nbp->b_data +
+		    ino_to_fsbo(fs, xp->i_number);
+		dip->di_size = 0;
+		dip->di_blocks = 0;
+		dip->di_flags &= ~(SF_IMMUTABLE | SF_SNAPSHOT);
+		bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs_daddr_t));
+		bdwrite(nbp);
+	}
+	/*
+	 * Copy all indirect blocks to their shadows (allocated above)
+	 * to avoid deadlock in ffs_copyonwrite.
+	 */
+	for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) {
+		error = VOP_BALLOC(vp, lblktosize(fs, (off_t)blkno),
+		    fs->fs_bsize, p->p_ucred, B_METAONLY, &ibp);
+		if (error)
+			goto out1;
+		copyblkno = fragstoblks(fs, dbtofsb(fs, ibp->b_blkno));
+		brelse(ibp);
+		error = VOP_BALLOC(vp, lblktosize(fs, (off_t)copyblkno),
+		    fs->fs_bsize, p->p_ucred, 0, &nbp);
+		if (error)
+			goto out1;
+		error = VOP_BALLOC(vp, lblktosize(fs, (off_t)blkno),
+		    fs->fs_bsize, p->p_ucred, B_METAONLY, &ibp);
+		if (error) {
+			brelse(nbp);
+			goto out1;
+		}
+		bcopy(ibp->b_data, nbp->b_data, fs->fs_bsize);
+		brelse(ibp);
+		bawrite(nbp);
+	}
+	/*
+	 * Record snapshot inode. Since this is the newest snapshot,
+	 * it must be placed at the end of the list.
+	 */
+	fs->fs_snapinum[snaploc] = ip->i_number;
+	if (ip->i_copyonwrite != 0)
+		panic("ffs_snapshot: %d already on list", ip->i_number);
+	if (devip->i_copyonwrite == 0) {
+		devvp->v_flag |= VCOPYONWRITE;
+		devip->i_copyonwrite = ip;
+	} else {
+		for (xp = devip->i_copyonwrite; xp->i_copyonwrite != 0; )
+			xp = xp->i_copyonwrite;
+		xp->i_copyonwrite = ip;
+	}
+	vp->v_flag |= VSYSTEM;
+	/*
+	 * Resume operation on filesystem.
+	 */
+out1:
+	vfs_write_resume(vp->v_mount);
+	vn_start_write(wrtmp, V_WAIT);
+out:
+	mp->mnt_flag = flag;
+	(void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, p);
+	if (error)
+		vput(vp);
+	else
+		VOP_UNLOCK(vp, 0, p);
+	vn_finished_write(wrtmp);
+	return (error);
+}
+
+/*
+ * Descend an indirect block chain for vnode cancelvp accounting for all
+ * its indirect blocks in snapvp.
+ */ 
+static int
+indiracct(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks, blksperindir)
+	struct vnode *snapvp;
+	struct vnode *cancelvp;
+	int level;
+	ufs_daddr_t blkno;
+	int lbn;
+	int rlbn;
+	int remblks;
+	int blksperindir;
+{
+	int subblksperindir, error, last, num, i;
+	struct indir indirs[NIADDR + 2];
+	ufs_daddr_t *bap;
+	struct buf *bp;
+	struct fs *fs;
+
+	if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0)
+		return (error);
+	if (lbn != indirs[num - 1 - level].in_lbn || blkno == 0 || num < 2)
+		panic("indiracct: botched params");
+	/*
+	 * We have to expand bread here since it will deadlock looking
+	 * up the block number for any blocks that are not in the cache.
+	 */
+	fs = VTOI(cancelvp)->i_fs;
+	bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0);
+	bp->b_blkno = fsbtodb(fs, blkno);
+	if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 &&
+	    (error = readblock(bp, fragstoblks(fs, blkno)))) {
+		brelse(bp);
+		return (error);
+	}
+	/*
+	 * Account for the block pointers in this indirect block.
+	 */
+	last = howmany(remblks, blksperindir);
+	if (last > NINDIR(fs))
+		last = NINDIR(fs);
+	if (snapvp != cancelvp) {
+		bap = (ufs_daddr_t *)bp->b_data;
+	} else {
+		MALLOC(bap, ufs_daddr_t *, fs->fs_bsize, M_DEVBUF, M_WAITOK);
+		bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize);
+		brelse(bp);
+	}
+	error = snapacct(snapvp, &bap[0], &bap[last]);
+	if (error || level == 0)
+		goto out;
+	/*
+	 * Account for the block pointers in each of the indirect blocks
+	 * in the levels below us.
+	 */
+	subblksperindir = blksperindir / NINDIR(fs);
+	for (lbn++, level--, i = 0; i < last; i++) {
+		error = indiracct(snapvp, cancelvp, level, bap[i], lbn,
+		    rlbn, remblks, subblksperindir);
+		if (error)
+			goto out;
+		rlbn += blksperindir;
+		lbn -= blksperindir;
+		remblks -= blksperindir;
+	}
+out:
+	if (snapvp != cancelvp)
+		brelse(bp);
+	else
+		FREE(bap, M_DEVBUF);
+	return (error);
+}
+
+/*
+ * Account for a set of blocks allocated in a snapshot inode.
+ */
+static int
+snapacct(vp, oldblkp, lastblkp)
+	struct vnode *vp;
+	ufs_daddr_t *oldblkp, *lastblkp;
+{
+	struct inode *ip = VTOI(vp);
+	struct fs *fs = ip->i_fs;
+	ufs_daddr_t lbn, blkno, *blkp;
+	struct buf *ibp;
+	int error;
+
+	for ( ; oldblkp < lastblkp; oldblkp++) {
+		blkno = *oldblkp;
+		if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP)
+			continue;
+		lbn = fragstoblks(fs, blkno);
+		if (lbn < NDADDR) {
+			blkp = &ip->i_db[lbn];
+			ip->i_flag |= IN_CHANGE | IN_UPDATE;
+		} else {
+			error = VOP_BALLOC(vp, lblktosize(fs, (off_t)lbn),
+			    fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
+			if (error)
+				return (error);
+			blkp = &((ufs_daddr_t *)(ibp->b_data))
+			    [(lbn - NDADDR) % NINDIR(fs)];
+		}
+		if (*blkp != 0)
+			panic("snapacct: bad block");
+		*blkp = BLK_SNAP;
+		if (lbn >= NDADDR)
+			bdwrite(ibp);
+	}
+	return (0);
+}
+
+/*
+ * Prepare a snapshot file for being removed.
+ */
+void
+ffs_snapremove(vp)
+	struct vnode *vp;
+{
+	struct inode *ip, *xp;
+	struct vnode *devvp;
+	struct buf *ibp;
+	struct fs *fs;
+	ufs_daddr_t blkno, dblk;
+	int error, snaploc, loc, last;
+
+	ip = VTOI(vp);
+	fs = ip->i_fs;
+	/*
+	 * Delete snapshot inode from superblock. Keep list dense.
+	 */
+	for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++)
+		if (fs->fs_snapinum[snaploc] == ip->i_number)
+			break;
+	if (snaploc < FSMAXSNAP) {
+		for (snaploc++; snaploc < FSMAXSNAP; snaploc++) {
+			if (fs->fs_snapinum[snaploc] == 0)
+				break;
+			fs->fs_snapinum[snaploc - 1] = fs->fs_snapinum[snaploc];
+		}
+		fs->fs_snapinum[snaploc - 1] = 0;
+	}
+	/*
+	 * Delete from incore list.
+	 * Clear copy-on-write flag if last snapshot.
+	 */
+	devvp = ip->i_devvp;
+	for (xp = VTOI(devvp); xp; xp = xp->i_copyonwrite) {
+		if (xp->i_copyonwrite != ip)
+			continue;
+		xp->i_copyonwrite = ip->i_copyonwrite;
+		ip->i_copyonwrite = 0;
+		break;
+	}
+	if (xp == 0) {
+		printf("ffs_snapremove: lost snapshot vnode %d\n",
+		    ip->i_number);
+		vref(vp);
+	}
+	if (VTOI(devvp)->i_copyonwrite == 0)
+		devvp->v_flag &= ~VCOPYONWRITE;
+	/*
+	 * Clear all BLK_NOCOPY fields. Pass any block claims to other
+	 * snapshots that want them (see ffs_snapblkfree below).
+	 */
+	for (blkno = 1; blkno < NDADDR; blkno++) {
+		dblk = ip->i_db[blkno];
+		if (dblk == BLK_NOCOPY || dblk == BLK_SNAP ||
+		    (dblk == blkstofrags(fs, blkno) &&
+		     ffs_snapblkfree(ip, dblk, fs->fs_bsize)))
+			ip->i_db[blkno] = 0;
+	}
+	for (blkno = NDADDR; blkno < fs->fs_size; blkno += NINDIR(fs)) {
+		error = VOP_BALLOC(vp, lblktosize(fs, (off_t)blkno),
+		    fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
+		if (error)
+			continue;
+		if ((last = fs->fs_size - blkno) > NINDIR(fs))
+			last = NINDIR(fs);
+		for (loc = 0; loc < last; loc++) {
+			dblk = ((ufs_daddr_t *)(ibp->b_data))[loc];
+			if (dblk == BLK_NOCOPY || dblk == BLK_SNAP ||
+			    (dblk == blkstofrags(fs, blkno) &&
+			     ffs_snapblkfree(ip, dblk, fs->fs_bsize)))
+				((ufs_daddr_t *)(ibp->b_data))[loc] = 0;
+		}
+		bawrite(ibp);
+	}
+	/*
+	 * Clear snapshot flag and drop reference.
+	 */
+	ip->i_flags &= ~(SF_IMMUTABLE | SF_SNAPSHOT);
+	ip->i_flag |= IN_CHANGE | IN_UPDATE;
+	vrele(vp);
+}
+
+/*
+ * Notification that a block is being freed. Return zero if the free
+ * should be allowed to proceed. Return non-zero if the snapshot file
+ * wants to claim the block. The block will be claimed if it is an
+ * uncopied part of one of the snapshots. It will be freed if it is
+ * either a BLK_NOCOPY or has already been copied in all of the snapshots.
+ * If a fragment is being freed, then all snapshots that care about
+ * it must make a copy since a snapshot file can only claim full sized
+ * blocks. Note that if more than one snapshot file maps the block,
+ * we can pick one at random to claim it. Since none of the snapshots
+ * can change, we are assurred that they will all see the same unmodified
+ * image. When deleting a snapshot file (see ffs_snapremove above), we
+ * must push any of these claimed blocks to one of the other snapshots
+ * that maps it. These claimed blocks are easily identified as they will
+ * have a block number equal to their logical block number within the
+ * snapshot. A copied block can never have this property because they
+ * must always have been allocated from a BLK_NOCOPY location.
+ */
+int
+ffs_snapblkfree(freeip, bno, size)
+	struct inode *freeip;
+	ufs_daddr_t bno;
+	long size;
+{
+	struct buf *ibp, *cbp, *savedcbp = 0;
+	struct fs *fs = freeip->i_fs;
+	struct proc *p = CURPROC;
+	struct inode *ip;
+	struct vnode *vp;
+	ufs_daddr_t lbn, blkno;
+	int indiroff = 0, error = 0, claimedblk = 0;
+
+	lbn = fragstoblks(fs, bno);
+	for (ip = VTOI(freeip->i_devvp)->i_copyonwrite; ip;
+	     ip = ip->i_copyonwrite) {
+		vp = ITOV(ip);
+		/*
+		 * Lookup block being written.
+		 */
+		if (lbn < NDADDR) {
+			blkno = ip->i_db[lbn];
+		} else {
+			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+			p->p_flag |= P_COWINPROGRESS;
+			error = VOP_BALLOC(vp, lblktosize(fs, (off_t)lbn),
+			    fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
+			p->p_flag &= ~P_COWINPROGRESS;
+			VOP_UNLOCK(vp, 0, p);
+			if (error)
+				break;
+			indiroff = (lbn - NDADDR) % NINDIR(fs);
+			blkno = ((ufs_daddr_t *)(ibp->b_data))[indiroff];
+		}
+		/*
+		 * Check to see if block needs to be copied.
+		 */
+		switch (blkno) {
+		/*
+		 * If the snapshot has already copied the block (default),
+		 * or does not care about the block, it is not needed.
+		 */
+		default:
+		case BLK_NOCOPY:
+			if (lbn >= NDADDR)
+				brelse(ibp);
+			continue;
+		/*
+		 * No previous snapshot claimed the block, so it will be
+		 * freed and become a BLK_NOCOPY (don't care) for us.
+		 */
+		case BLK_SNAP:
+			if (claimedblk)
+				panic("snapblkfree: inconsistent block type");
+			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+			if (lbn < NDADDR) {
+				ip->i_db[lbn] = BLK_NOCOPY;
+				ip->i_flag |= IN_CHANGE | IN_UPDATE;
+			} else {
+				((ufs_daddr_t *)(ibp->b_data))[indiroff] =
+				    BLK_NOCOPY;
+				bdwrite(ibp);
+			}
+			VOP_UNLOCK(vp, 0, p);
+			continue;
+		/*
+		 * A block that we map is being freed. If it has not been
+		 * claimed yet, we will claim or copy it (below).
+		 */
+		case 0:
+			claimedblk = 1;
+			break;
+		}
+		/*
+		 * If this is a full size block, we will just grab it
+		 * and assign it to the snapshot inode. Otherwise we
+		 * will proceed to copy it. See explanation for this
+		 * routine as to why only a single snapshot needs to
+		 * claim this block.
+		 */
+		if (size == fs->fs_bsize) {
+#ifdef DEBUG
+			if (snapdebug)
+				printf("%s %d lbn %d from inum %d\n",
+				    "Grabonremove: snapino", ip->i_number, lbn,
+				    freeip->i_number);
+#endif
+			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+			if (lbn < NDADDR) {
+				ip->i_db[lbn] = bno;
+			} else {
+				((ufs_daddr_t *)(ibp->b_data))[indiroff] = bno;
+				bdwrite(ibp);
+			}
+			ip->i_blocks += btodb(size);
+			ip->i_flag |= IN_CHANGE | IN_UPDATE;
+			VOP_UNLOCK(vp, 0, p);
+			return (1);
+		}
+		if (lbn >= NDADDR)
+			brelse(ibp);
+		/*
+		 * Allocate the block into which to do the copy. Note that this
+		 * allocation will never require any additional allocations for
+		 * the snapshot inode.
+		 */
+		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+		p->p_flag |= P_COWINPROGRESS;
+		error = VOP_BALLOC(vp, lblktosize(fs, (off_t)lbn),
+		    fs->fs_bsize, KERNCRED, 0, &cbp);
+		p->p_flag &= ~P_COWINPROGRESS;
+		VOP_UNLOCK(vp, 0, p);
+		if (error)
+			break;
+#ifdef DEBUG
+		if (snapdebug)
+			printf("%s%d lbn %d for inum %d size %ld to blkno %d\n",
+			    "Copyonremove: snapino ", ip->i_number, lbn,
+			    freeip->i_number, size, cbp->b_blkno);
+#endif
+		/*
+		 * If we have already read the old block contents, then
+		 * simply copy them to the new block.
+		 */
+		if (savedcbp != 0) {
+			bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize);
+			bawrite(cbp);
+			continue;
+		}
+		/*
+		 * Otherwise, read the old block contents into the buffer.
+		 */
+		if ((error = readblock(cbp, lbn)) != 0)
+			break;
+		savedcbp = cbp;
+	}
+	if (savedcbp)
+		bawrite(savedcbp);
+	/*
+	 * If we have been unable to allocate a block in which to do
+	 * the copy, then return non-zero so that the fragment will
+	 * not be freed. Although space will be lost, the snapshot
+	 * will stay consistent.
+	 */
+	return (error);
+}
+
+/*
+ * Associate snapshot files when mounting.
+ */
+void
+ffs_snapshot_mount(mp)
+	struct mount *mp;
+{
+	struct ufsmount *ump = VFSTOUFS(mp);
+	struct fs *fs = ump->um_fs;
+	struct proc *p = CURPROC;
+	struct inode *ip, **listtailp;
+	struct vnode *vp;
+	int error, snaploc, loc;
+
+	listtailp = &VTOI(ump->um_devvp)->i_copyonwrite;
+	for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) {
+		if (fs->fs_snapinum[snaploc] == 0)
+			return;
+		if ((error = VFS_VGET(mp, fs->fs_snapinum[snaploc], &vp)) != 0){
+			printf("ffs_snapshot_mount: vget failed %d\n", error);
+			continue;
+		}
+		ip = VTOI(vp);
+		if ((ip->i_flags & SF_SNAPSHOT) == 0) {
+			printf("ffs_snapshot_mount: non-snapshot inode %d\n",
+			    fs->fs_snapinum[snaploc]);
+			vput(vp);
+			for (loc = snaploc + 1; loc < FSMAXSNAP; loc++) {
+				if (fs->fs_snapinum[loc] == 0)
+					break;
+				fs->fs_snapinum[loc - 1] = fs->fs_snapinum[loc];
+			}
+			fs->fs_snapinum[loc - 1] = 0;
+			snaploc--;
+			continue;
+		}
+		if (ip->i_copyonwrite != 0)
+			panic("ffs_snapshot_mount: %d already on list",
+			    ip->i_number);
+		*listtailp = ip;
+		listtailp = &ip->i_copyonwrite;
+		vp->v_flag |= VSYSTEM;
+		VOP_UNLOCK(vp, 0, p);
+		ump->um_devvp->v_flag |= VCOPYONWRITE;
+	}
+}
+
+/*
+ * Disassociate snapshot files when unmounting.
+ */
+void
+ffs_snapshot_unmount(mp)
+	struct mount *mp;
+{
+	struct ufsmount *ump = VFSTOUFS(mp);
+	struct inode *devip = VTOI(ump->um_devvp);
+	struct inode *xp;
+
+	while ((xp = devip->i_copyonwrite) != 0) {
+		devip->i_copyonwrite = xp->i_copyonwrite;
+		xp->i_copyonwrite = 0;
+		vrele(ITOV(xp));
+	}
+	ump->um_devvp->v_flag &= ~VCOPYONWRITE;
+}
+
+/*
+ * Check for need to copy block that is about to be written,
+ * copying the block if necessary.
+ */
+int
+ffs_copyonwrite(ap)
+	struct vop_copyonwrite_args /* {
+		struct vnode *a_vp;
+		struct buf *a_bp;
+	} */ *ap;
+{
+	struct buf *ibp, *cbp, *savedcbp = 0, *bp = ap->a_bp;
+	struct fs *fs = VTOI(bp->b_vp)->i_fs;
+	struct proc *p = CURPROC;
+	struct inode *ip;
+	struct vnode *vp;
+	ufs_daddr_t lbn, blkno;
+	int indiroff, error = 0;
+
+	lbn = fragstoblks(fs, dbtofsb(fs, bp->b_blkno));
+	if (p->p_flag & P_COWINPROGRESS)
+		panic("ffs_copyonwrite: recursive call");
+	for (ip = VTOI(ap->a_vp)->i_copyonwrite; ip; ip = ip->i_copyonwrite) {
+		vp = ITOV(ip);
+		/*
+		 * We ensure that everything of our own that needs to be
+		 * copied will be done at the time that ffs_snapshot is
+		 * called. Thus we can skip the check here which can
+		 * deadlock in doing the lookup in VOP_BALLOC.
+		 */
+		if (bp->b_vp == vp)
+			continue;
+		/*
+		 * Check to see if block needs to be copied.
+		 */
+		if (lbn < NDADDR) {
+			blkno = ip->i_db[lbn];
+		} else {
+			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+			p->p_flag |= P_COWINPROGRESS;
+			error = VOP_BALLOC(vp, lblktosize(fs, (off_t)lbn),
+			    fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
+			p->p_flag &= ~P_COWINPROGRESS;
+			VOP_UNLOCK(vp, 0, p);
+			if (error)
+				break;
+			indiroff = (lbn - NDADDR) % NINDIR(fs);
+			blkno = ((ufs_daddr_t *)(ibp->b_data))[indiroff];
+			brelse(ibp);
+		}
+#ifdef DIAGNOSTIC
+		if (blkno == BLK_SNAP && bp->b_lblkno >= 0)
+			panic("ffs_copyonwrite: bad copy block");
+#endif
+		if (blkno != 0)
+			continue;
+		/*
+		 * Allocate the block into which to do the copy. Note that this
+		 * allocation will never require any additional allocations for
+		 * the snapshot inode.
+		 */
+		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+		p->p_flag |= P_COWINPROGRESS;
+		error = VOP_BALLOC(vp, lblktosize(fs, (off_t)lbn),
+		    fs->fs_bsize, KERNCRED, 0, &cbp);
+		p->p_flag &= ~P_COWINPROGRESS;
+		VOP_UNLOCK(vp, 0, p);
+#ifdef DEBUG
+		if (snapdebug) {
+			printf("Copyonwrite: snapino %d lbn %d for ",
+			    ip->i_number, lbn);
+			if (bp->b_vp == ap->a_vp)
+				printf("fs metadata");
+			else
+				printf("inum %d", VTOI(bp->b_vp)->i_number);
+			printf(" lblkno %d to blkno %d\n", bp->b_lblkno,
+			    cbp->b_blkno);
+		}
+#endif
+		if (error)
+			break;
+		/*
+		 * If we have already read the old block contents, then
+		 * simply copy them to the new block.
+		 */
+		if (savedcbp != 0) {
+			bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize);
+			bawrite(cbp);
+			continue;
+		}
+		/*
+		 * Otherwise, read the old block contents into the buffer.
+		 */
+		if ((error = readblock(cbp, lbn)) != 0)
+			break;
+		savedcbp = cbp;
+	}
+	if (savedcbp)
+		bawrite(savedcbp);
+	return (error);
+}
+
+/*
+ * Read the specified block into the given buffer.
+ * Much of this boiler-plate comes from bwrite().
+ */
+static int
+readblock(bp, lbn)
+	struct buf *bp;
+	daddr_t lbn;
+{
+	struct uio auio;
+	struct iovec aiov;
+	struct proc *p = CURPROC;
+	struct inode *ip = VTOI(bp->b_vp);
+
+	aiov.iov_base = bp->b_data;
+	aiov.iov_len = bp->b_bcount;
+	auio.uio_iov = &aiov;
+	auio.uio_iovcnt = 1;
+	auio.uio_offset = dbtob(fsbtodb(ip->i_fs, blkstofrags(ip->i_fs, lbn)));
+	auio.uio_resid = bp->b_bcount;
+	auio.uio_rw = UIO_READ;
+	auio.uio_segflg = UIO_SYSSPACE;
+	auio.uio_procp = p;
+	return (physio(ip->i_devvp->v_rdev, &auio, 0));
+}
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index 40e9669..d9e6414 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -508,7 +508,7 @@ softdep_process_worklist(matchmnt)
 {
 	struct proc *p = CURPROC;
 	struct worklist *wk;
-	struct fs *matchfs;
+	struct mount *mp;
 	int matchcnt, loopcount;
 
 	/*
@@ -517,9 +517,6 @@ softdep_process_worklist(matchmnt)
 	 */
 	filesys_syncer = p;
 	matchcnt = 0;
-	matchfs = NULL;
-	if (matchmnt != NULL)
-		matchfs = VFSTOUFS(matchmnt)->um_fs;
 	/*
 	 * There is no danger of having multiple processes run this
 	 * code. It is single threaded solely so that softdep_flushfiles
@@ -550,30 +547,42 @@ softdep_process_worklist(matchmnt)
 
 		case D_DIRREM:
 			/* removal of a directory entry */
-			if (WK_DIRREM(wk)->dm_mnt == matchmnt)
+			mp = WK_DIRREM(wk)->dm_mnt;
+			if (mp == matchmnt)
 				matchcnt += 1;
+			vn_start_write(NULL, &mp, V_WAIT);
 			handle_workitem_remove(WK_DIRREM(wk));
+			vn_finished_write(mp);
 			break;
 
 		case D_FREEBLKS:
 			/* releasing blocks and/or fragments from a file */
-			if (WK_FREEBLKS(wk)->fb_fs == matchfs)
+			mp = WK_FREEBLKS(wk)->fb_mnt;
+			if (mp == matchmnt)
 				matchcnt += 1;
+			vn_start_write(NULL, &mp, V_WAIT);
 			handle_workitem_freeblocks(WK_FREEBLKS(wk));
+			vn_finished_write(mp);
 			break;
 
 		case D_FREEFRAG:
 			/* releasing a fragment when replaced as a file grows */
-			if (WK_FREEFRAG(wk)->ff_fs == matchfs)
+			mp = WK_FREEFRAG(wk)->ff_mnt;
+			if (mp == matchmnt)
 				matchcnt += 1;
+			vn_start_write(NULL, &mp, V_WAIT);
 			handle_workitem_freefrag(WK_FREEFRAG(wk));
+			vn_finished_write(mp);
 			break;
 
 		case D_FREEFILE:
 			/* releasing an inode when its link count drops to 0 */
-			if (WK_FREEFILE(wk)->fx_fs == matchfs)
+			mp = WK_FREEFILE(wk)->fx_mnt;
+			if (mp == matchmnt)
 				matchcnt += 1;
+			vn_start_write(NULL, &mp, V_WAIT);
 			handle_workitem_freefile(WK_FREEFILE(wk));
+			vn_finished_write(mp);
 			break;
 
 		default:
@@ -1316,7 +1325,7 @@ newfreefrag(ip, blkno, size)
 	freefrag->ff_list.wk_type = D_FREEFRAG;
 	freefrag->ff_state = ip->i_uid & ~ONWORKLIST;	/* XXX - used below */
 	freefrag->ff_inum = ip->i_number;
-	freefrag->ff_fs = fs;
+	freefrag->ff_mnt = ITOV(ip)->v_mount;
 	freefrag->ff_devvp = ip->i_devvp;
 	freefrag->ff_blkno = blkno;
 	freefrag->ff_fragsize = size;
@@ -1333,7 +1342,8 @@ handle_workitem_freefrag(freefrag)
 {
 	struct inode tip;
 
-	tip.i_fs = freefrag->ff_fs;
+	tip.i_vnode = NULL;
+	tip.i_fs = VFSTOUFS(freefrag->ff_mnt)->um_fs;
 	tip.i_devvp = freefrag->ff_devvp;
 	tip.i_dev = freefrag->ff_devvp->v_rdev;
 	tip.i_number = freefrag->ff_inum;
@@ -1601,7 +1611,7 @@ softdep_setup_freeblocks(ip, length)
 	freeblks->fb_uid = ip->i_uid;
 	freeblks->fb_previousinum = ip->i_number;
 	freeblks->fb_devvp = ip->i_devvp;
-	freeblks->fb_fs = fs;
+	freeblks->fb_mnt = ITOV(ip)->v_mount;
 	freeblks->fb_oldsize = ip->i_size;
 	freeblks->fb_newsize = length;
 	freeblks->fb_chkcnt = ip->i_blocks;
@@ -1845,7 +1855,7 @@ softdep_freefile(pvp, ino, mode)
 	freefile->fx_mode = mode;
 	freefile->fx_oldinum = ino;
 	freefile->fx_devvp = ip->i_devvp;
-	freefile->fx_fs = ip->i_fs;
+	freefile->fx_mnt = ITOV(ip)->v_mount;
 
 	/*
 	 * If the inodedep does not exist, then the zero'ed inode has
@@ -1949,13 +1959,13 @@ handle_workitem_freeblocks(freeblks)
 	int error, allerror = 0;
 	ufs_lbn_t baselbns[NIADDR], tmpval;
 
+	tip.i_fs = fs = VFSTOUFS(freeblks->fb_mnt)->um_fs;
 	tip.i_number = freeblks->fb_previousinum;
 	tip.i_devvp = freeblks->fb_devvp;
 	tip.i_dev = freeblks->fb_devvp->v_rdev;
-	tip.i_fs = freeblks->fb_fs;
 	tip.i_size = freeblks->fb_oldsize;
 	tip.i_uid = freeblks->fb_uid;
-	fs = freeblks->fb_fs;
+	tip.i_vnode = NULL;
 	tmpval = 1;
 	baselbns[0] = NDADDR;
 	for (i = 1; i < NIADDR; i++) {
@@ -2715,20 +2725,23 @@ static void
 handle_workitem_freefile(freefile)
 	struct freefile *freefile;
 {
+	struct fs *fs;
 	struct vnode vp;
 	struct inode tip;
 	struct inodedep *idp;
 	int error;
 
+	fs = VFSTOUFS(freefile->fx_mnt)->um_fs;
 #ifdef DEBUG
 	ACQUIRE_LOCK(&lk);
-	if (inodedep_lookup(freefile->fx_fs, freefile->fx_oldinum, 0, &idp))
+	if (inodedep_lookup(fs, freefile->fx_oldinum, 0, &idp))
 		panic("handle_workitem_freefile: inodedep survived");
 	FREE_LOCK(&lk);
 #endif
 	tip.i_devvp = freefile->fx_devvp;
 	tip.i_dev = freefile->fx_devvp->v_rdev;
-	tip.i_fs = freefile->fx_fs;
+	tip.i_fs = fs;
+	tip.i_vnode = &vp;
 	vp.v_data = &tip;
 	if ((error = ffs_freefile(&vp, freefile->fx_oldinum, freefile->fx_mode)) != 0)
 		softdep_error("handle_workitem_freefile", error);
@@ -4419,14 +4432,18 @@ clear_remove(p)
 			mp = pagedep->pd_mnt;
 			ino = pagedep->pd_ino;
 			FREE_LOCK(&lk);
+			if (vn_start_write(NULL, &mp, V_WAIT | PCATCH) != 0)
+				return;
 			if ((error = VFS_VGET(mp, ino, &vp)) != 0) {
 				softdep_error("clear_remove: vget", error);
+				vn_finished_write(mp);
 				return;
 			}
 			if ((error = VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT, p)))
 				softdep_error("clear_remove: fsync", error);
 			drain_output(vp, 0);
 			vput(vp);
+			vn_finished_write(mp);
 			return;
 		}
 	}
@@ -4486,8 +4503,11 @@ clear_inodedeps(p)
 		if (inodedep_lookup(fs, ino, 0, &inodedep) == 0)
 			continue;
 		FREE_LOCK(&lk);
+		if (vn_start_write(NULL, &mp, V_WAIT | PCATCH) != 0)
+			return;
 		if ((error = VFS_VGET(mp, ino, &vp)) != 0) {
 			softdep_error("clear_inodedeps: vget", error);
+			vn_finished_write(mp);
 			return;
 		}
 		if (ino == lastino) {
@@ -4499,6 +4519,7 @@ clear_inodedeps(p)
 			drain_output(vp, 0);
 		}
 		vput(vp);
+		vn_finished_write(mp);
 		ACQUIRE_LOCK(&lk);
 	}
 	FREE_LOCK(&lk);
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
index 89ff6d3..5280181 100644
--- a/sys/ufs/ffs/ffs_vfsops.c
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -133,7 +133,7 @@ VFS_SET(ufs_vfsops, ufs, 0);
  *		namei() if it is a genuine NULL from the user.
  */
 static int
-ffs_mount( mp, path, data, ndp, p)
+ffs_mount(mp, path, data, ndp, p)
         struct mount		*mp;	/* mount struct pointer*/
         char			*path;	/* path to mount point*/
         caddr_t			data;	/* arguments to FS specific mount*/
@@ -141,49 +141,34 @@ ffs_mount( mp, path, data, ndp, p)
         struct proc		*p;	/* process requesting mount*/
 {
 	size_t		size;
-	int		err = 0;
 	struct vnode	*devvp;
-
 	struct ufs_args args;
 	struct ufsmount *ump = 0;
 	register struct fs *fs;
-	int error, flags, ronly = 0;
+	int error, flags;
 	mode_t accessmode;
 
 	/*
-	 * Use NULL path to flag a root mount
+	 * Use NULL path to indicate we are mounting the root file system.
 	 */
-	if( path == NULL) {
-		/*
-		 ***
-		 * Mounting root file system
-		 ***
-		 */
-	
-		if ((err = bdevvp(rootdev, &rootvp))) {
+	if (path == NULL) {
+		if ((error = bdevvp(rootdev, &rootvp))) {
 			printf("ffs_mountroot: can't find rootvp\n");
-			return (err);
-		}
-
-		if( ( err = ffs_mountfs(rootvp, mp, p, M_FFSNODE)) != 0) {
-			/* fs specific cleanup (if any)*/
-			goto error_1;
+			return (error);
 		}
 
-		goto dostatfs;		/* success*/
+		if ((error = ffs_mountfs(rootvp, mp, p, M_FFSNODE)) != 0)
+			return (error);
 
+		(void)VFS_STATFS(mp, &mp->mnt_stat, p);
+		return (0);
 	}
 
 	/*
-	 ***
 	 * Mounting non-root file system or updating a file system
-	 ***
 	 */
-
-	/* copy in user arguments*/
-	err = copyin(data, (caddr_t)&args, sizeof (struct ufs_args));
-	if (err)
-		goto error_1;		/* can't get arguments*/
+	if ((error = copyin(data, (caddr_t)&args, sizeof(struct ufs_args)))!= 0)
+		return (error);
 
 	/*
 	 * If updating, check whether changing from read-only to
@@ -193,25 +178,36 @@ ffs_mount( mp, path, data, ndp, p)
 		ump = VFSTOUFS(mp);
 		fs = ump->um_fs;
 		devvp = ump->um_devvp;
-		err = 0;
-		ronly = fs->fs_ronly;	/* MNT_RELOAD might change this */
-		if (ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
+		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
+			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
+				return (error);
 			flags = WRITECLOSE;
 			if (mp->mnt_flag & MNT_FORCE)
 				flags |= FORCECLOSE;
 			if (mp->mnt_flag & MNT_SOFTDEP) {
-				err = softdep_flushfiles(mp, flags, p);
+				error = softdep_flushfiles(mp, flags, p);
 			} else {
-				err = ffs_flushfiles(mp, flags, p);
+				error = ffs_flushfiles(mp, flags, p);
 			}
-			ronly = 1;
-		}
-		if (!err && (mp->mnt_flag & MNT_RELOAD))
-			err = ffs_reload(mp, ndp->ni_cnd.cn_cred, p);
-		if (err) {
-			goto error_1;
+			if (error) {
+				vn_finished_write(mp);
+				return (error);
+			}
+			fs->fs_ronly = 1;
+			if ((fs->fs_flags & FS_UNCLEAN) == 0)
+				fs->fs_clean = 1;
+			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
+				fs->fs_ronly = 0;
+				fs->fs_clean = 0;
+				vn_finished_write(mp);
+				return (error);
+			}
+			vn_finished_write(mp);
 		}
-		if (ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
+		if ((mp->mnt_flag & MNT_RELOAD) &&
+		    (error = ffs_reload(mp, ndp->ni_cnd.cn_cred, p)) != 0)
+			return (error);
+		if (fs->fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
 			/*
 			 * If upgrade to read-write by non-root, then verify
 			 * that user has necessary permissions on the device.
@@ -225,31 +221,36 @@ ffs_mount( mp, path, data, ndp, p)
 				}
 				VOP_UNLOCK(devvp, 0, p);
 			}
-
 			fs->fs_flags &= ~FS_UNCLEAN;
 			if (fs->fs_clean == 0) {
 				fs->fs_flags |= FS_UNCLEAN;
 				if (mp->mnt_flag & MNT_FORCE) {
-					printf(
-"WARNING: %s was not properly dismounted\n",
-					    fs->fs_fsmnt);
+					printf("WARNING: %s was not %s\n",
+					   fs->fs_fsmnt, "properly dismounted");
 				} else {
 					printf(
 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
 					    fs->fs_fsmnt);
-					err = EPERM;
-					goto error_1;
+					return (EPERM);
 				}
 			}
-
+			if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
+				return (error);
+			fs->fs_ronly = 0;
+			fs->fs_clean = 0;
+			if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
+				vn_finished_write(mp);
+				return (error);
+			}
 			/* check to see if we need to start softdep */
-			if (fs->fs_flags & FS_DOSOFTDEP) {
-				err = softdep_mount(devvp, mp, fs, p->p_ucred);
-				if (err)
-					goto error_1;
+			if ((fs->fs_flags & FS_DOSOFTDEP) &&
+			    (error = softdep_mount(devvp, mp, fs, p->p_ucred))){
+				vn_finished_write(mp);
+				return (error);
 			}
-
-			ronly = 0;
+			if (fs->fs_snapinum[0] != 0)
+				ffs_snapshot_mount(mp);
+			vn_finished_write(mp);
 		}
 		/*
 		 * Soft updates is incompatible with "async",
@@ -258,18 +259,18 @@ ffs_mount( mp, path, data, ndp, p)
 		 * Softdep_mount() clears it in an initial mount 
 		 * or ro->rw remount.
 		 */
-		if (mp->mnt_flag & MNT_SOFTDEP) {
+		if (mp->mnt_flag & MNT_SOFTDEP)
 			mp->mnt_flag &= ~MNT_ASYNC;
-		}
-		/* if not updating name...*/
-		if (args.fspec == 0) {
-			/*
-			 * Process export requests.  Jumping to "success"
-			 * will return the vfs_export() error code.
-			 */
-			err = vfs_export(mp, &ump->um_export, &args.export);
-			goto success;
-		}
+		/*
+		 * If not updating name, process export requests.
+		 */
+		if (args.fspec == 0)
+			return (vfs_export(mp, &ump->um_export, &args.export));
+		/*
+		 * If this is a snapshot request, take the snapshot.
+		 */
+		if (mp->mnt_flag & MNT_SNAPSHOT)
+			return (ffs_snapshot(mp, args.fspec));
 	}
 
 	/*
@@ -277,17 +278,14 @@ ffs_mount( mp, path, data, ndp, p)
 	 * and verify that it refers to a sensible block device.
 	 */
 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
-	err = namei(ndp);
-	if (err) {
-		/* can't get devvp!*/
-		goto error_1;
-	}
-
+	if ((error = namei(ndp)) != 0)
+		return (error);
 	NDFREE(ndp, NDF_ONLY_PNBUF);
 	devvp = ndp->ni_vp;
-
-	if (!vn_isdisk(devvp, &err))
-		goto error_2;
+	if (!vn_isdisk(devvp, &error)) {
+		vrele(devvp);
+		return (error);
+	}
 
 	/*
 	 * If mount by non-root, then verify that user has necessary
@@ -298,7 +296,7 @@ ffs_mount( mp, path, data, ndp, p)
 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
 			accessmode |= VWRITE;
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
-		if ((error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p)) != 0) {
+		if ((error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p))!= 0){
 			vput(devvp);
 			return (error);
 		}
@@ -307,96 +305,43 @@ ffs_mount( mp, path, data, ndp, p)
 
 	if (mp->mnt_flag & MNT_UPDATE) {
 		/*
-		 ********************
-		 * UPDATE
+		 * Update only
+		 *
 		 * If it's not the same vnode, or at least the same device
 		 * then it's not correct.
-		 ********************
 		 */
 
-		if (devvp != ump->um_devvp) {
-			if ( devvp->v_rdev == ump->um_devvp->v_rdev) {
-				vrele(devvp);
-			} else {
-				err = EINVAL;	/* needs translation */
-			}
-		} else
-			vrele(devvp);
-		/*
-		 * Update device name only on success
-		 */
-		if( !err) {
-			/* Save "mounted from" info for mount point (NULL pad)*/
-			copyinstr(	args.fspec,
-					mp->mnt_stat.f_mntfromname,
-					MNAMELEN - 1,
-					&size);
-			bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
-		}
+		if (devvp != ump->um_devvp &&
+		    devvp->v_rdev != ump->um_devvp->v_rdev)
+			error = EINVAL;	/* needs translation */
+		vrele(devvp);
+		if (error)
+			return (error);
 	} else {
 		/*
-		 ********************
-		 * NEW MOUNT
-		 ********************
+		 * New mount
+		 *
+		 * We need the name for the mount point (also used for
+		 * "last mounted on") copied in. If an error occurs,
+		 * the mount point is discarded by the upper level code.
 		 */
-
-		/*
-		 * Since this is a new mount, we want the names for
-		 * the device and the mount point copied in.  If an
-		 * error occurs,  the mountpoint is discarded by the
-		 * upper level code.
-		 */
-		/* Save "last mounted on" info for mount point (NULL pad)*/
-		copyinstr(	path,				/* mount point*/
-				mp->mnt_stat.f_mntonname,	/* save area*/
-				MNAMELEN - 1,			/* max size*/
-				&size);				/* real size*/
+		copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
 		bzero( mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
-
-		/* Save "mounted from" info for mount point (NULL pad)*/
-		copyinstr(	args.fspec,			/* device name*/
-				mp->mnt_stat.f_mntfromname,	/* save area*/
-				MNAMELEN - 1,			/* max size*/
-				&size);				/* real size*/
-		bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
-
-		err = ffs_mountfs(devvp, mp, p, M_FFSNODE);
-	}
-	if (err) {
-		goto error_2;
+		if ((error = ffs_mountfs(devvp, mp, p, M_FFSNODE)) != 0) {
+			vrele(devvp);
+			return (error);
+		}
 	}
-
-dostatfs:
 	/*
-	 * Initialize FS stat information in mount struct; uses both
-	 * mp->mnt_stat.f_mntonname and mp->mnt_stat.f_mntfromname
-	 *
-	 * This code is common to root and non-root mounts
+	 * Save "mounted from" device name info for mount point (NULL pad).
+	 */
+	copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
+	bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+	/*
+	 * Initialize filesystem stat information in mount struct.
 	 */
 	(void)VFS_STATFS(mp, &mp->mnt_stat, p);
-
-	goto success;
-
-
-error_2:	/* error with devvp held*/
-
-	/* release devvp before failing*/
-	vrele(devvp);
-
-error_1:	/* no state to back out*/
-
-success:
-	if (!err && path && (mp->mnt_flag & MNT_UPDATE)) {
-		/* Update clean flag after changing read-onlyness. */
-		fs = ump->um_fs;
-		if (ronly != fs->fs_ronly) {
-			fs->fs_ronly = ronly;
-			fs->fs_clean = ronly &&
-			    (fs->fs_flags & FS_UNCLEAN) == 0 ? 1 : 0;
-			ffs_sbupdate(ump, MNT_WAIT);
-		}
-	}
-	return (err);
+	return (0);
 }
 
 /*
@@ -478,7 +423,7 @@ ffs_reload(mp, cred, p)
 	newfs->fs_maxcluster = fs->fs_maxcluster;
 	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
 	if (fs->fs_sbsize < SBSIZE)
-		bp->b_flags |= B_INVAL;
+		bp->b_flags |= B_INVAL | B_NOCACHE;
 	brelse(bp);
 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
 	ffs_oldfscompat(fs);
@@ -670,7 +615,7 @@ ffs_mountfs(devvp, mp, p, malloctype)
 	ump->um_vfree = ffs_vfree;
 	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
 	if (fs->fs_sbsize < SBSIZE)
-		bp->b_flags |= B_INVAL;
+		bp->b_flags |= B_INVAL | B_NOCACHE;
 	brelse(bp);
 	bp = NULL;
 	fs = ump->um_fs;
@@ -750,6 +695,8 @@ ffs_mountfs(devvp, mp, p, malloctype)
 			free(base, M_UFSMNT);
 			goto out;
 		}
+		if (fs->fs_snapinum[0] != 0)
+			ffs_snapshot_mount(mp);
 		fs->fs_fmod = 1;
 		fs->fs_clean = 0;
 		(void) ffs_sbupdate(ump, MNT_WAIT);
@@ -886,6 +833,15 @@ ffs_flushfiles(mp, flags, p)
 		 */
 	}
 #endif
+	if (ump->um_devvp->v_flag & VCOPYONWRITE) {
+		if ((error = vflush(mp, NULL, SKIPSYSTEM | flags)) != 0)
+			return (error);
+		ffs_snapshot_unmount(mp);
+		/*
+		 * Here we fall through to vflush again to ensure
+		 * that we have gotten rid of all the system vnodes.
+		 */
+	}
         /*
 	 * Flush all the files.
 	 */
diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c
index 539f302..eb6d621 100644
--- a/sys/ufs/ffs/ffs_vnops.c
+++ b/sys/ufs/ffs/ffs_vnops.c
@@ -95,6 +95,7 @@ vop_t **ffs_specop_p;
 static struct vnodeopv_entry_desc ffs_specop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) ufs_vnoperatespec },
 	{ &vop_fsync_desc,		(vop_t *) ffs_fsync },
+	{ &vop_copyonwrite_desc,	(vop_t *) ffs_copyonwrite },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc ffs_specop_opv_desc =
@@ -129,11 +130,20 @@ ffs_fsync(ap)
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
+	struct inode *ip = VTOI(vp);
 	struct buf *bp;
 	struct buf *nbp;
 	int s, error, wait, passes, skipmeta;
 	daddr_t lbn;
 
+	/*
+	 * Snapshots have to be unlocked so they do not deadlock
+	 * checking whether they need to copy their written buffers.
+	 * We always hold a reference, so they cannot be removed
+	 * out from underneath us.
+	 */
+	if (ip->i_flags & SF_SNAPSHOT)
+		VOP_UNLOCK(vp, 0, ap->a_p);
 	wait = (ap->a_waitfor == MNT_WAIT);
 	if (vn_isdisk(vp, NULL)) {
 		lbn = INT_MAX;
@@ -141,8 +151,6 @@ ffs_fsync(ap)
 		    (vp->v_specmountpoint->mnt_flag & MNT_SOFTDEP))
 			softdep_fsync_mountdev(vp);
 	} else {
-		struct inode *ip;
-		ip = VTOI(vp);
 		lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1));
 	}
 
@@ -279,5 +287,7 @@ loop:
 	}
 	splx(s);
 	error = UFS_UPDATE(vp, wait);
+	if (ip->i_flags & SF_SNAPSHOT)
+		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p);
 	return (error);
 }
diff --git a/sys/ufs/ffs/softdep.h b/sys/ufs/ffs/softdep.h
index 1908a3e..cf9cac8 100644
--- a/sys/ufs/ffs/softdep.h
+++ b/sys/ufs/ffs/softdep.h
@@ -382,7 +382,7 @@ struct freefrag {
 	struct	worklist ff_list;	/* id_inowait or delayed worklist */
 #	define	ff_state ff_list.wk_state /* owning user; should be uid_t */
 	struct	vnode *ff_devvp;	/* filesystem device vnode */
-	struct	fs *ff_fs;		/* addr of superblock */
+	struct	mount *ff_mnt;		/* associated mount point */
 	ufs_daddr_t ff_blkno;		/* fragment physical block number */
 	long	ff_fragsize;		/* size of fragment being deleted */
 	ino_t	ff_inum;		/* owning inode number */
@@ -398,7 +398,7 @@ struct freeblks {
 	struct	worklist fb_list;	/* id_inowait or delayed worklist */
 	ino_t	fb_previousinum;	/* inode of previous owner of blocks */
 	struct	vnode *fb_devvp;	/* filesystem device vnode */
-	struct	fs *fb_fs;		/* addr of superblock */
+	struct	mount *fb_mnt;		/* associated mount point */
 	off_t	fb_oldsize;		/* previous file size */
 	off_t	fb_newsize;		/* new file size */
 	int	fb_chkcnt;		/* used to check cnt of blks released */
@@ -418,7 +418,7 @@ struct freefile {
 	mode_t	fx_mode;		/* mode of inode */
 	ino_t	fx_oldinum;		/* inum of the unlinked file */
 	struct	vnode *fx_devvp;	/* filesystem device vnode */
-	struct	fs *fx_fs;		/* addr of superblock */
+	struct	mount *fx_mnt;		/* associated mount point */
 };
 
 /*
diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h
index 83960b0..6417a10 100644
--- a/sys/ufs/ufs/inode.h
+++ b/sys/ufs/ufs/inode.h
@@ -84,6 +84,7 @@ struct inode {
 	struct	 dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */
 	u_quad_t i_modrev;	/* Revision level for NFS lease. */
 	struct	 lockf *i_lockf;/* Head of byte-level lock list. */
+	struct	 inode *i_copyonwrite; /* copy-on-write list */
 	/*
 	 * Side effects; used during directory lookup.
 	 */
diff --git a/sys/ufs/ufs/ufs_bmap.c b/sys/ufs/ufs/ufs_bmap.c
index 9056340..ab4ac52 100644
--- a/sys/ufs/ufs/ufs_bmap.c
+++ b/sys/ufs/ufs/ufs_bmap.c
@@ -47,6 +47,7 @@
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/resourcevar.h>
+#include <sys/stat.h>
 
 #include <ufs/ufs/extattr.h>
 #include <ufs/ufs/quota.h>
@@ -115,7 +116,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
 	struct indir a[NIADDR+1], *xap;
 	ufs_daddr_t daddr;
 	long metalbn;
-	int error, maxrun, num;
+	int error, num, maxrun = 0;
 
 	ip = VTOI(vp);
 	mp = vp->v_mount;
@@ -127,6 +128,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
 #endif
 
 	if (runp) {
+		maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1;
 		*runp = 0;
 	}
 
@@ -134,7 +136,6 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
 		*runb = 0;
 	}
 
-	maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1;
 
 	xap = ap == NULL ? a : ap;
 	if (!nump)
@@ -146,9 +147,12 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
 	num = *nump;
 	if (num == 0) {
 		*bnp = blkptrtodb(ump, ip->i_db[bn]);
-		if (*bnp == 0)
-			*bnp = -1;
-		else if (runp) {
+		if (*bnp == 0) {
+			if (ip->i_flags & SF_SNAPSHOT)
+				*bnp = blkptrtodb(ump, bn * ump->um_seqinc);
+			else
+				*bnp = -1;
+		} else if (runp) {
 			daddr_t bnb = bn;
 			for (++bn; bn < NDADDR && *runp < maxrun &&
 			    is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]);
@@ -226,8 +230,13 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
 	if (bp)
 		bqrelse(bp);
 
-	daddr = blkptrtodb(ump, daddr);
-	*bnp = daddr == 0 ? -1 : daddr;
+	*bnp = blkptrtodb(ump, daddr);
+	if (*bnp == 0) {
+		if (ip->i_flags & SF_SNAPSHOT)
+			*bnp = blkptrtodb(ump, bn * ump->um_seqinc);
+		else
+			*bnp = -1;
+	}
 	return (0);
 }
 
diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h
index d576be9..b740792 100644
--- a/sys/ufs/ufs/ufs_extern.h
+++ b/sys/ufs/ufs/ufs_extern.h
@@ -87,6 +87,7 @@ int	 ufs_init __P((struct vfsconf *));
 void	 ufs_itimes __P((struct vnode *vp));
 int	 ufs_lookup __P((struct vop_cachedlookup_args *));
 int	 ufs_reclaim __P((struct vop_reclaim_args *));
+void	 ffs_snapremove __P((struct vnode *vp));
 int	 ufs_root __P((struct mount *, struct vnode **));
 int	 ufs_start __P((struct mount *, int, struct proc *));
 int	 ufs_vinit __P((struct mount *, vop_t **, vop_t **, struct vnode **));
diff --git a/sys/ufs/ufs/ufs_inode.c b/sys/ufs/ufs/ufs_inode.c
index 507e716..485a6d2 100644
--- a/sys/ufs/ufs/ufs_inode.c
+++ b/sys/ufs/ufs/ufs_inode.c
@@ -77,6 +77,7 @@ ufs_inactive(ap)
 	if (ip->i_mode == 0)
 		goto out;
 	if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
+		(void) vn_write_suspend_wait(vp, V_WAIT);
 #ifdef QUOTA
 		if (!getinoquota(ip))
 			(void)chkiq(ip, -1, NOCRED, 0);
@@ -91,8 +92,15 @@ ufs_inactive(ap)
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 		UFS_VFREE(vp, ip->i_number, mode);
 	}
-	if (ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE))
-		UFS_UPDATE(vp, 0);
+	if (ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) {
+		if ((ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) == 0 &&
+		    vn_write_suspend_wait(vp, V_NOWAIT)) {
+			ip->i_flag &= ~IN_ACCESS;
+		} else {
+			(void) vn_write_suspend_wait(vp, V_WAIT);
+			UFS_UPDATE(vp, 0);
+		}
+	}
 out:
 	VOP_UNLOCK(vp, 0, p);
 	/*
diff --git a/sys/ufs/ufs/ufs_quota.c b/sys/ufs/ufs/ufs_quota.c
index 574a330..6396f67 100644
--- a/sys/ufs/ufs/ufs_quota.c
+++ b/sys/ufs/ufs/ufs_quota.c
@@ -889,6 +889,7 @@ dqsync(vp, dq)
 	struct vnode *dqvp;
 	struct iovec aiov;
 	struct uio auio;
+	struct mount *mp;
 	int error;
 
 	if (dq == NODQUOT)
@@ -897,6 +898,7 @@ dqsync(vp, dq)
 		return (0);
 	if ((dqvp = dq->dq_ump->um_quotas[dq->dq_type]) == NULLVP)
 		panic("dqsync: file");
+	(void) vn_write_suspend_wait(dqvp, V_WAIT);
 	if (vp != dqvp)
 		vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY, p);
 	while (dq->dq_flags & DQ_LOCK) {
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index e3b6e29..d97568c 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -702,6 +702,8 @@ ufs_remove(ap)
 	int error;
 
 	ip = VTOI(vp);
+	if ((ip->i_flags & SF_SNAPSHOT) != 0)
+		ffs_snapremove(vp);
 	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
 	    (VTOI(dvp)->i_flags & APPEND)) {
 		error = EPERM;
@@ -2215,6 +2217,7 @@ static struct vnodeopv_entry_desc ufs_vnodeop_entries[] = {
 	{ &vop_open_desc,		(vop_t *) ufs_open },
 	{ &vop_pathconf_desc,		(vop_t *) ufs_pathconf },
 	{ &vop_poll_desc,		(vop_t *) vop_stdpoll },
+	{ &vop_getwritemount_desc, 	(vop_t *) vop_stdgetwritemount },
 	{ &vop_print_desc,		(vop_t *) ufs_print },
 	{ &vop_readdir_desc,		(vop_t *) ufs_readdir },
 	{ &vop_readlink_desc,		(vop_t *) ufs_readlink },
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index 307dd0b..97b221e 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -777,6 +777,7 @@ rescan0:
 			int written;
 			int swap_pageouts_ok;
 			struct vnode *vp = NULL;
+			struct mount *mp;
 
 			object = m->object;
 
@@ -853,9 +854,13 @@ rescan0:
 			if (object->type == OBJT_VNODE) {
 				vp = object->handle;
 
+				mp = NULL;
+				if (vp->v_type == VREG)
+					vn_start_write(vp, &mp, V_NOWAIT);
 				if (VOP_ISLOCKED(vp, NULL) ||
 				    vp->v_data == NULL ||
 				    vget(vp, LK_EXCLUSIVE|LK_NOOBJ, curproc)) {
+					vn_finished_write(mp);
 					if ((m->queue == PQ_INACTIVE) &&
 						(m->hold_count == 0) &&
 						(m->busy == 0) &&
@@ -878,6 +883,7 @@ rescan0:
 					if (object->flags & OBJ_MIGHTBEDIRTY)
 						vnodes_skipped++;
 					vput(vp);
+					vn_finished_write(mp);
 					continue;
 				}
 	
@@ -888,6 +894,7 @@ rescan0:
 				 */
 				if (m->busy || (m->flags & PG_BUSY)) {
 					vput(vp);
+					vn_finished_write(mp);
 					continue;
 				}
 
@@ -902,6 +909,7 @@ rescan0:
 					if (object->flags & OBJ_MIGHTBEDIRTY)
 						vnodes_skipped++;
 					vput(vp);
+					vn_finished_write(mp);
 					continue;
 				}
 			}
@@ -913,8 +921,10 @@ rescan0:
 			 * start the cleaning operation.
 			 */
 			written = vm_pageout_clean(m);
-			if (vp)
+			if (vp) {
 				vput(vp);
+				vn_finished_write(mp);
+			}
 
 			maxlaunder -= written;
 		}
diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c
index 2633426..3dd12ec 100644
--- a/sys/vm/vnode_pager.c
+++ b/sys/vm/vnode_pager.c
@@ -850,6 +850,7 @@ vnode_pager_putpages(object, m, count, sync, rtvals)
 {
 	int rtval;
 	struct vnode *vp;
+	struct mount *mp;
 	int bytes = count * PAGE_SIZE;
 
 	/*
@@ -872,11 +873,15 @@ vnode_pager_putpages(object, m, count, sync, rtvals)
 	 */
 
 	vp = object->handle;
+	if (vp->v_type != VREG)
+		mp = NULL;
+	(void)vn_start_write(vp, &mp, V_WAIT);
 	rtval = VOP_PUTPAGES(vp, m, bytes, sync, rtvals, 0);
 	if (rtval == EOPNOTSUPP) {
 	    printf("vnode_pager: *** WARNING *** stale FS putpages\n");
 	    rtval = vnode_pager_generic_putpages( vp, m, bytes, sync, rtvals);
 	}
+	vn_finished_write(mp);
 }