summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/compat/svr4/svr4_fcntl.c4
-rw-r--r--sys/conf/files2
-rw-r--r--sys/dev/vn/vn.c18
-rw-r--r--sys/fs/fdescfs/fdesc_vnops.c9
-rw-r--r--sys/fs/fifofs/fifo_vnops.c1
-rw-r--r--sys/fs/specfs/spec_vnops.c22
-rw-r--r--sys/fs/unionfs/union_subr.c30
-rw-r--r--sys/fs/unionfs/union_vnops.c16
-rw-r--r--sys/gnu/ext2fs/ext2_bmap.c23
-rw-r--r--sys/gnu/ext2fs/inode.h1
-rw-r--r--sys/gnu/fs/ext2fs/ext2_bmap.c23
-rw-r--r--sys/gnu/fs/ext2fs/inode.h1
-rw-r--r--sys/kern/kern_ktrace.c5
-rw-r--r--sys/kern/kern_sig.c11
-rw-r--r--sys/kern/tty_tty.c6
-rw-r--r--sys/kern/uipc_usrreq.c16
-rw-r--r--sys/kern/vfs_bio.c6
-rw-r--r--sys/kern/vfs_default.c15
-rw-r--r--sys/kern/vfs_export.c29
-rw-r--r--sys/kern/vfs_extattr.c316
-rw-r--r--sys/kern/vfs_subr.c29
-rw-r--r--sys/kern/vfs_syscalls.c316
-rw-r--r--sys/kern/vfs_vnops.c164
-rw-r--r--sys/kern/vnode_if.src16
-rw-r--r--sys/miscfs/fdesc/fdesc_vnops.c9
-rw-r--r--sys/miscfs/fifofs/fifo_vnops.c1
-rw-r--r--sys/miscfs/specfs/spec_vnops.c22
-rw-r--r--sys/miscfs/union/union_subr.c30
-rw-r--r--sys/miscfs/union/union_vnops.c16
-rw-r--r--sys/nfs/nfs_serv.c114
-rw-r--r--sys/nfsserver/nfs_serv.c114
-rw-r--r--sys/svr4/svr4_fcntl.c4
-rw-r--r--sys/sys/buf.h1
-rw-r--r--sys/sys/proc.h1
-rw-r--r--sys/sys/vnode.h25
-rw-r--r--sys/ufs/ffs/README.snapshot112
-rw-r--r--sys/ufs/ffs/ffs_alloc.c17
-rw-r--r--sys/ufs/ffs/ffs_balloc.c9
-rw-r--r--sys/ufs/ffs/ffs_extern.h6
-rw-r--r--sys/ufs/ffs/ffs_snapshot.c1028
-rw-r--r--sys/ufs/ffs/ffs_softdep.c53
-rw-r--r--sys/ufs/ffs/ffs_vfsops.c262
-rw-r--r--sys/ufs/ffs/ffs_vnops.c14
-rw-r--r--sys/ufs/ffs/softdep.h6
-rw-r--r--sys/ufs/ufs/inode.h1
-rw-r--r--sys/ufs/ufs/ufs_bmap.c23
-rw-r--r--sys/ufs/ufs/ufs_extern.h1
-rw-r--r--sys/ufs/ufs/ufs_inode.c12
-rw-r--r--sys/ufs/ufs/ufs_quota.c2
-rw-r--r--sys/ufs/ufs/ufs_vnops.c3
-rw-r--r--sys/vm/vm_pageout.c12
-rw-r--r--sys/vm/vnode_pager.c5
52 files changed, 2536 insertions, 446 deletions
diff --git a/sys/compat/svr4/svr4_fcntl.c b/sys/compat/svr4/svr4_fcntl.c
index 4040030..c65f345 100644
--- a/sys/compat/svr4/svr4_fcntl.c
+++ b/sys/compat/svr4/svr4_fcntl.c
@@ -247,6 +247,7 @@ fd_revoke(p, fd)
struct filedesc *fdp = p->p_fd;
struct file *fp;
struct vnode *vp;
+ struct mount *mp;
struct vattr vattr;
int error, *retval;
@@ -271,8 +272,11 @@ fd_revoke(p, fd)
(error = suser(p)) != 0)
goto out;
+ if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+ goto out;
if (vcount(vp) > 1)
VOP_REVOKE(vp, REVOKEALL);
+ vn_finished_write(mp);
out:
vrele(vp);
return error;
diff --git a/sys/conf/files b/sys/conf/files
index 84130e1..01f088f 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -906,6 +906,8 @@ ufs/ffs/ffs_balloc.c optional ffs
ufs/ffs/ffs_balloc.c optional mfs
ufs/ffs/ffs_inode.c optional ffs
ufs/ffs/ffs_inode.c optional mfs
+ufs/ffs/ffs_snapshot.c optional ffs
+ufs/ffs/ffs_snapshot.c optional mfs
ufs/ffs/ffs_softdep.c optional softupdates
ufs/ffs/ffs_softdep_stub.c standard
ufs/ffs/ffs_subr.c optional ffs
diff --git a/sys/dev/vn/vn.c b/sys/dev/vn/vn.c
index 88e3801..efbc437 100644
--- a/sys/dev/vn/vn.c
+++ b/sys/dev/vn/vn.c
@@ -276,7 +276,6 @@ vnstrategy(struct bio *bp)
int unit;
struct vn_softc *vn;
int error;
- int isvplocked = 0;
unit = dkunit(bp->bio_dev);
vn = bp->bio_dev->si_drv1;
@@ -360,6 +359,7 @@ vnstrategy(struct bio *bp)
*/
struct uio auio;
struct iovec aiov;
+ struct mount *mp;
bzero(&auio, sizeof(auio));
@@ -375,18 +375,18 @@ vnstrategy(struct bio *bp)
auio.uio_rw = UIO_WRITE;
auio.uio_resid = bp->bio_bcount;
auio.uio_procp = curproc;
- if (!VOP_ISLOCKED(vn->sc_vp, NULL)) {
- isvplocked = 1;
+ if (VOP_ISLOCKED(vn->sc_vp, NULL))
+ vprint("unexpected vn driver lock", vn->sc_vp);
+ if (bp->bio_cmd == BIO_READ) {
vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY, curproc);
- }
- if(bp->bio_cmd == BIO_READ)
error = VOP_READ(vn->sc_vp, &auio, 0, vn->sc_cred);
- else
+ } else {
+ (void) vn_start_write(vn->sc_vp, &mp, V_WAIT);
+ vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY, curproc);
error = VOP_WRITE(vn->sc_vp, &auio, 0, vn->sc_cred);
- if (isvplocked) {
- VOP_UNLOCK(vn->sc_vp, 0, curproc);
- isvplocked = 0;
+ vn_finished_write(mp);
}
+ VOP_UNLOCK(vn->sc_vp, 0, curproc);
bp->bio_resid = auio.uio_resid;
if (error) {
diff --git a/sys/fs/fdescfs/fdesc_vnops.c b/sys/fs/fdescfs/fdesc_vnops.c
index 772a94c..72c7cae 100644
--- a/sys/fs/fdescfs/fdesc_vnops.c
+++ b/sys/fs/fdescfs/fdesc_vnops.c
@@ -383,6 +383,8 @@ fdesc_setattr(ap)
{
struct filedesc *fdp = ap->a_p->p_fd;
struct vattr *vap = ap->a_vap;
+ struct vnode *vp;
+ struct mount *mp;
struct file *fp;
unsigned fd;
int error;
@@ -403,8 +405,11 @@ fdesc_setattr(ap)
switch (fp->f_type) {
case DTYPE_FIFO:
case DTYPE_VNODE:
- error = VOP_SETATTR((struct vnode *) fp->f_data, ap->a_vap,
- ap->a_cred, ap->a_p);
+ vp = (struct vnode *)fp->f_data;
+ if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+ return (error);
+ error = VOP_SETATTR(vp, ap->a_vap, ap->a_cred, ap->a_p);
+ vn_finished_write(mp);
break;
default:
diff --git a/sys/fs/fifofs/fifo_vnops.c b/sys/fs/fifofs/fifo_vnops.c
index 5bd13a7..03e3e37 100644
--- a/sys/fs/fifofs/fifo_vnops.c
+++ b/sys/fs/fifofs/fifo_vnops.c
@@ -107,6 +107,7 @@ static struct vnodeopv_entry_desc fifo_vnodeop_entries[] = {
{ &vop_open_desc, (vop_t *) fifo_open },
{ &vop_pathconf_desc, (vop_t *) fifo_pathconf },
{ &vop_poll_desc, (vop_t *) fifo_poll },
+ { &vop_getwritemount_desc, (vop_t *) vop_stdgetwritemount },
{ &vop_print_desc, (vop_t *) fifo_print },
{ &vop_read_desc, (vop_t *) fifo_read },
{ &vop_readdir_desc, (vop_t *) fifo_badop },
diff --git a/sys/fs/specfs/spec_vnops.c b/sys/fs/specfs/spec_vnops.c
index cbe52f4..baf40c3 100644
--- a/sys/fs/specfs/spec_vnops.c
+++ b/sys/fs/specfs/spec_vnops.c
@@ -88,6 +88,7 @@ static struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
{ &vop_open_desc, (vop_t *) spec_open },
{ &vop_pathconf_desc, (vop_t *) vop_stdpathconf },
{ &vop_poll_desc, (vop_t *) spec_poll },
+ { &vop_getwritemount_desc, (vop_t *) vop_stdgetwritemount },
{ &vop_print_desc, (vop_t *) spec_print },
{ &vop_read_desc, (vop_t *) spec_read },
{ &vop_readdir_desc, (vop_t *) vop_panic },
@@ -415,16 +416,29 @@ spec_strategy(ap)
struct buf *bp;
struct vnode *vp;
struct mount *mp;
+ int error;
bp = ap->a_bp;
- if ((bp->b_iocmd == BIO_WRITE) && (LIST_FIRST(&bp->b_dep)) != NULL)
- buf_start(bp);
-
+ vp = ap->a_vp;
+ if ((bp->b_iocmd == BIO_WRITE)) {
+ if (vp->v_mount != NULL &&
+ (vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
+ panic("spec_strategy: bad I/O");
+ if (LIST_FIRST(&bp->b_dep) != NULL)
+ buf_start(bp);
+ if ((vp->v_flag & VCOPYONWRITE) &&
+ (error = VOP_COPYONWRITE(vp, bp)) != 0 &&
+ error != EOPNOTSUPP) {
+ bp->b_io.bio_error = error;
+ bp->b_io.bio_flags |= BIO_ERROR;
+ biodone(&bp->b_io);
+ return (0);
+ }
+ }
/*
* Collect statistics on synchronous and asynchronous read
* and write counts for disks that have associated filesystems.
*/
- vp = ap->a_vp;
if (vn_isdisk(vp, NULL) && (mp = vp->v_specmountpoint) != NULL) {
if (bp->b_iocmd == BIO_WRITE) {
if (bp->b_lock.lk_lockholder == LK_KERNPROC)
diff --git a/sys/fs/unionfs/union_subr.c b/sys/fs/unionfs/union_subr.c
index 6b88bef..d1d6e31 100644
--- a/sys/fs/unionfs/union_subr.c
+++ b/sys/fs/unionfs/union_subr.c
@@ -747,6 +747,7 @@ union_copyup(un, docopy, cred, p)
struct proc *p;
{
int error;
+ struct mount *mp;
struct vnode *lvp, *uvp;
/*
@@ -759,9 +760,12 @@ union_copyup(un, docopy, cred, p)
if (error)
return (error);
- error = union_vn_create(&uvp, un, p);
- if (error)
+ if ((error = vn_start_write(un->un_dirvp, &mp, V_WAIT | PCATCH)) != 0)
return (error);
+ if ((error = union_vn_create(&uvp, un, p)) != 0) {
+ vn_finished_write(mp);
+ return (error);
+ }
lvp = un->un_lowervp;
@@ -785,6 +789,7 @@ union_copyup(un, docopy, cred, p)
}
VOP_UNLOCK(uvp, 0, p);
+ vn_finished_write(mp);
union_newupper(un, uvp);
KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount));
union_vn_close(uvp, FWRITE, cred, p);
@@ -910,11 +915,15 @@ union_mkshadow(um, dvp, cnp, vpp)
struct vattr va;
struct proc *p = cnp->cn_proc;
struct componentname cn;
+ struct mount *mp;
- error = union_relookup(um, dvp, vpp, cnp, &cn,
- cnp->cn_nameptr, cnp->cn_namelen);
- if (error)
+ if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)) != 0)
+ return (error);
+ if ((error = union_relookup(um, dvp, vpp, cnp, &cn,
+ cnp->cn_nameptr, cnp->cn_namelen)) != 0) {
+ vn_finished_write(mp);
return (error);
+ }
if (*vpp) {
if (cn.cn_flags & HASBUF) {
@@ -925,6 +934,7 @@ union_mkshadow(um, dvp, cnp, vpp)
vrele(*vpp);
else
vput(*vpp);
+ vn_finished_write(mp);
*vpp = NULLVP;
return (EEXIST);
}
@@ -950,6 +960,7 @@ union_mkshadow(um, dvp, cnp, vpp)
cn.cn_flags &= ~HASBUF;
}
/*vput(dvp);*/
+ vn_finished_write(mp);
return (error);
}
@@ -973,10 +984,15 @@ union_mkwhiteout(um, dvp, cnp, path)
struct proc *p = cnp->cn_proc;
struct vnode *wvp;
struct componentname cn;
+ struct mount *mp;
+ if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)) != 0)
+ return (error);
error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path));
- if (error)
+ if (error) {
+ vn_finished_write(mp);
return (error);
+ }
if (wvp) {
if (cn.cn_flags & HASBUF) {
@@ -987,6 +1003,7 @@ union_mkwhiteout(um, dvp, cnp, path)
vrele(wvp);
else
vput(wvp);
+ vn_finished_write(mp);
return (EEXIST);
}
@@ -998,6 +1015,7 @@ union_mkwhiteout(um, dvp, cnp, path)
zfree(namei_zone, cn.cn_pnbuf);
cn.cn_flags &= ~HASBUF;
}
+ vn_finished_write(mp);
return (error);
}
diff --git a/sys/fs/unionfs/union_vnops.c b/sys/fs/unionfs/union_vnops.c
index 1c5ed5d..d7b95f3 100644
--- a/sys/fs/unionfs/union_vnops.c
+++ b/sys/fs/unionfs/union_vnops.c
@@ -93,6 +93,7 @@ static int union_print __P((struct vop_print_args *ap));
static int union_read __P((struct vop_read_args *ap));
static int union_readdir __P((struct vop_readdir_args *ap));
static int union_readlink __P((struct vop_readlink_args *ap));
+static int union_getwritemount __P((struct vop_getwritemount_args *ap));
static int union_reclaim __P((struct vop_reclaim_args *ap));
static int union_remove __P((struct vop_remove_args *ap));
static int union_rename __P((struct vop_rename_args *ap));
@@ -1681,6 +1682,20 @@ union_readlink(ap)
return (error);
}
+static int
+union_getwritemount(ap)
+ struct vop_getwritemount_args /* {
+ struct vnode *a_vp;
+ struct mount **a_mpp;
+ } */ *ap;
+{
+ struct vnode *vp = UPPERVP(ap->a_vp);
+
+ if (vp == NULL)
+ panic("union: missing upper layer in getwritemount");
+ return(VOP_GETWRITEMOUNT(vp, ap->a_mpp));
+}
+
/*
* union_inactive:
*
@@ -1963,6 +1978,7 @@ static struct vnodeopv_entry_desc union_vnodeop_entries[] = {
{ &vop_read_desc, (vop_t *) union_read },
{ &vop_readdir_desc, (vop_t *) union_readdir },
{ &vop_readlink_desc, (vop_t *) union_readlink },
+ { &vop_getwritemount_desc, (vop_t *) union_getwritemount },
{ &vop_reclaim_desc, (vop_t *) union_reclaim },
{ &vop_remove_desc, (vop_t *) union_remove },
{ &vop_rename_desc, (vop_t *) union_rename },
diff --git a/sys/gnu/ext2fs/ext2_bmap.c b/sys/gnu/ext2fs/ext2_bmap.c
index 9056340..ab4ac52 100644
--- a/sys/gnu/ext2fs/ext2_bmap.c
+++ b/sys/gnu/ext2fs/ext2_bmap.c
@@ -47,6 +47,7 @@
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/resourcevar.h>
+#include <sys/stat.h>
#include <ufs/ufs/extattr.h>
#include <ufs/ufs/quota.h>
@@ -115,7 +116,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
struct indir a[NIADDR+1], *xap;
ufs_daddr_t daddr;
long metalbn;
- int error, maxrun, num;
+ int error, num, maxrun = 0;
ip = VTOI(vp);
mp = vp->v_mount;
@@ -127,6 +128,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
#endif
if (runp) {
+ maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1;
*runp = 0;
}
@@ -134,7 +136,6 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
*runb = 0;
}
- maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1;
xap = ap == NULL ? a : ap;
if (!nump)
@@ -146,9 +147,12 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
num = *nump;
if (num == 0) {
*bnp = blkptrtodb(ump, ip->i_db[bn]);
- if (*bnp == 0)
- *bnp = -1;
- else if (runp) {
+ if (*bnp == 0) {
+ if (ip->i_flags & SF_SNAPSHOT)
+ *bnp = blkptrtodb(ump, bn * ump->um_seqinc);
+ else
+ *bnp = -1;
+ } else if (runp) {
daddr_t bnb = bn;
for (++bn; bn < NDADDR && *runp < maxrun &&
is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]);
@@ -226,8 +230,13 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
if (bp)
bqrelse(bp);
- daddr = blkptrtodb(ump, daddr);
- *bnp = daddr == 0 ? -1 : daddr;
+ *bnp = blkptrtodb(ump, daddr);
+ if (*bnp == 0) {
+ if (ip->i_flags & SF_SNAPSHOT)
+ *bnp = blkptrtodb(ump, bn * ump->um_seqinc);
+ else
+ *bnp = -1;
+ }
return (0);
}
diff --git a/sys/gnu/ext2fs/inode.h b/sys/gnu/ext2fs/inode.h
index 83960b0..6417a10 100644
--- a/sys/gnu/ext2fs/inode.h
+++ b/sys/gnu/ext2fs/inode.h
@@ -84,6 +84,7 @@ struct inode {
struct dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */
u_quad_t i_modrev; /* Revision level for NFS lease. */
struct lockf *i_lockf;/* Head of byte-level lock list. */
+ struct inode *i_copyonwrite; /* copy-on-write list */
/*
* Side effects; used during directory lookup.
*/
diff --git a/sys/gnu/fs/ext2fs/ext2_bmap.c b/sys/gnu/fs/ext2fs/ext2_bmap.c
index 9056340..ab4ac52 100644
--- a/sys/gnu/fs/ext2fs/ext2_bmap.c
+++ b/sys/gnu/fs/ext2fs/ext2_bmap.c
@@ -47,6 +47,7 @@
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/resourcevar.h>
+#include <sys/stat.h>
#include <ufs/ufs/extattr.h>
#include <ufs/ufs/quota.h>
@@ -115,7 +116,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
struct indir a[NIADDR+1], *xap;
ufs_daddr_t daddr;
long metalbn;
- int error, maxrun, num;
+ int error, num, maxrun = 0;
ip = VTOI(vp);
mp = vp->v_mount;
@@ -127,6 +128,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
#endif
if (runp) {
+ maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1;
*runp = 0;
}
@@ -134,7 +136,6 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
*runb = 0;
}
- maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1;
xap = ap == NULL ? a : ap;
if (!nump)
@@ -146,9 +147,12 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
num = *nump;
if (num == 0) {
*bnp = blkptrtodb(ump, ip->i_db[bn]);
- if (*bnp == 0)
- *bnp = -1;
- else if (runp) {
+ if (*bnp == 0) {
+ if (ip->i_flags & SF_SNAPSHOT)
+ *bnp = blkptrtodb(ump, bn * ump->um_seqinc);
+ else
+ *bnp = -1;
+ } else if (runp) {
daddr_t bnb = bn;
for (++bn; bn < NDADDR && *runp < maxrun &&
is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]);
@@ -226,8 +230,13 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
if (bp)
bqrelse(bp);
- daddr = blkptrtodb(ump, daddr);
- *bnp = daddr == 0 ? -1 : daddr;
+ *bnp = blkptrtodb(ump, daddr);
+ if (*bnp == 0) {
+ if (ip->i_flags & SF_SNAPSHOT)
+ *bnp = blkptrtodb(ump, bn * ump->um_seqinc);
+ else
+ *bnp = -1;
+ }
return (0);
}
diff --git a/sys/gnu/fs/ext2fs/inode.h b/sys/gnu/fs/ext2fs/inode.h
index 83960b0..6417a10 100644
--- a/sys/gnu/fs/ext2fs/inode.h
+++ b/sys/gnu/fs/ext2fs/inode.h
@@ -84,6 +84,7 @@ struct inode {
struct dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */
u_quad_t i_modrev; /* Revision level for NFS lease. */
struct lockf *i_lockf;/* Head of byte-level lock list. */
+ struct inode *i_copyonwrite; /* copy-on-write list */
/*
* Side effects; used during directory lookup.
*/
diff --git a/sys/kern/kern_ktrace.c b/sys/kern/kern_ktrace.c
index d914fc2..b0530f9 100644
--- a/sys/kern/kern_ktrace.c
+++ b/sys/kern/kern_ktrace.c
@@ -457,7 +457,8 @@ ktrwrite(vp, kth, uio)
{
struct uio auio;
struct iovec aiov[2];
- register struct proc *p = curproc; /* XXX */
+ struct proc *p = curproc; /* XXX */
+ struct mount *mp;
int error;
if (vp == NULL)
@@ -479,6 +480,7 @@ ktrwrite(vp, kth, uio)
if (uio != NULL)
kth->ktr_len += uio->uio_resid;
}
+ vn_start_write(vp, &mp, V_WAIT);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
(void)VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, p->p_ucred);
@@ -487,6 +489,7 @@ ktrwrite(vp, kth, uio)
error = VOP_WRITE(vp, uio, IO_UNIT | IO_APPEND, p->p_ucred);
}
VOP_UNLOCK(vp, 0, p);
+ vn_finished_write(mp);
if (!error)
return;
/*
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index e96f471..2d87b63 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -1599,6 +1599,7 @@ coredump(p)
struct nameidata nd;
struct vattr vattr;
int error, error1, flags;
+ struct mount *mp;
char *name; /* name of corefile */
off_t limit;
@@ -1619,6 +1620,7 @@ coredump(p)
if (limit == 0)
return 0;
+restart:
name = expand_name(p->p_comm, p->p_ucred->cr_uid, p->p_pid);
NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name, p);
flags = O_CREAT | FWRITE | O_NOFOLLOW;
@@ -1628,6 +1630,14 @@ coredump(p)
return (error);
NDFREE(&nd, NDF_ONLY_PNBUF);
vp = nd.ni_vp;
+ if (vn_start_write(vp, &mp, V_NOWAIT) != 0) {
+ VOP_UNLOCK(vp, 0, p);
+ if ((error = vn_close(vp, FWRITE, cred, p)) != 0)
+ return (error);
+ if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+ return (error);
+ goto restart;
+ }
/* Don't dump to non-regular files or files with links. */
if (vp->v_type != VREG ||
@@ -1647,6 +1657,7 @@ coredump(p)
out:
VOP_UNLOCK(vp, 0, p);
+ vn_finished_write(mp);
error1 = vn_close(vp, FWRITE, cred, p);
if (error == 0)
error = error1;
diff --git a/sys/kern/tty_tty.c b/sys/kern/tty_tty.c
index 2d15c70..66f7a7b 100644
--- a/sys/kern/tty_tty.c
+++ b/sys/kern/tty_tty.c
@@ -133,13 +133,19 @@ cttywrite(dev, uio, flag)
{
struct proc *p = uio->uio_procp;
struct vnode *ttyvp = cttyvp(uio->uio_procp);
+ struct mount *mp;
int error;
if (ttyvp == NULL)
return (EIO);
+ mp = NULL;
+ if (ttyvp->v_type != VCHR &&
+ (error = vn_start_write(ttyvp, &mp, V_WAIT | PCATCH)) != 0)
+ return (error);
vn_lock(ttyvp, LK_EXCLUSIVE | LK_RETRY, p);
error = VOP_WRITE(ttyvp, uio, flag, NOCRED);
VOP_UNLOCK(ttyvp, 0, p);
+ vn_finished_write(mp);
return (error);
}
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index 0103877..a0b4072 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -538,7 +538,8 @@ unp_bind(unp, nam, p)
struct proc *p;
{
struct sockaddr_un *soun = (struct sockaddr_un *)nam;
- register struct vnode *vp;
+ struct vnode *vp;
+ struct mount *mp;
struct vattr vattr;
int error, namelen;
struct nameidata nd;
@@ -552,6 +553,7 @@ unp_bind(unp, nam, p)
return EINVAL;
strncpy(buf, soun->sun_path, namelen);
buf[namelen] = 0; /* null-terminate the string */
+restart:
NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE,
buf, p);
/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
@@ -559,14 +561,19 @@ unp_bind(unp, nam, p)
if (error)
return (error);
vp = nd.ni_vp;
- if (vp != NULL) {
+ if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
NDFREE(&nd, NDF_ONLY_PNBUF);
if (nd.ni_dvp == vp)
vrele(nd.ni_dvp);
else
vput(nd.ni_dvp);
- vrele(vp);
- return (EADDRINUSE);
+ if (vp != NULL) {
+ vrele(vp);
+ return (EADDRINUSE);
+ }
+ if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+ return (error);
+ goto restart;
}
VATTR_NULL(&vattr);
vattr.va_type = VSOCK;
@@ -582,6 +589,7 @@ unp_bind(unp, nam, p)
unp->unp_vnode = vp;
unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1);
VOP_UNLOCK(vp, 0, p);
+ vn_finished_write(mp);
return (0);
}
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index dba2151..96fbd63 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -1165,6 +1165,8 @@ brelse(struct buf * bp)
BUF_UNLOCK(bp);
bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF);
bp->b_ioflags &= ~BIO_ORDERED;
+ if ((bp->b_flags & B_DELWRI) == 0 && (bp->b_xflags & BX_VNDIRTY))
+ panic("brelse: not dirty");
splx(s);
}
@@ -1225,6 +1227,8 @@ bqrelse(struct buf * bp)
BUF_UNLOCK(bp);
bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF);
bp->b_ioflags &= ~BIO_ORDERED;
+ if ((bp->b_flags & B_DELWRI) == 0 && (bp->b_xflags & BX_VNDIRTY))
+ panic("bqrelse: not dirty");
splx(s);
}
@@ -1420,7 +1424,7 @@ getnewbuf(int slpflag, int slptimeo, int size, int maxsize)
int isspecial;
static int flushingbufs;
- if (curproc && (curproc->p_flag & P_BUFEXHAUST) == 0)
+ if (curproc && (curproc->p_flag & (P_COWINPROGRESS|P_BUFEXHAUST)) == 0)
isspecial = 0;
else
isspecial = 1;
diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c
index f478aa2..00f9beb 100644
--- a/sys/kern/vfs_default.c
+++ b/sys/kern/vfs_default.c
@@ -500,6 +500,21 @@ vop_noislocked(ap)
return (lockstatus(vp->v_vnlock, ap->a_p));
}
+/*
+ * Return our mount point, as we will take charge of the writes.
+ */
+int
+vop_stdgetwritemount(ap)
+ struct vop_getwritemount_args /* {
+ struct vnode *a_vp;
+ struct mount **a_mpp;
+ } */ *ap;
+{
+
+ *(ap->a_mpp) = ap->a_vp->v_mount;
+ return (0);
+}
+
/*
* vfs default ops
* used to fill the vfs fucntion table to get reasonable default return values.
diff --git a/sys/kern/vfs_export.c b/sys/kern/vfs_export.c
index 6483660..0e5ec3f 100644
--- a/sys/kern/vfs_export.c
+++ b/sys/kern/vfs_export.c
@@ -453,6 +453,7 @@ getnewvnode(tag, mp, vops, vpp)
int s, count;
struct proc *p = curproc; /* XXX */
struct vnode *vp = NULL;
+ struct mount *vnmp;
vm_object_t object;
/*
@@ -491,7 +492,14 @@ getnewvnode(tag, mp, vops, vpp)
vp = NULL;
continue;
}
- break;
+ /*
+ * Skip over it if its filesystem is being suspended.
+ */
+ if (vn_start_write(vp, &vnmp, V_NOWAIT) == 0)
+ break;
+ simple_unlock(&vp->v_interlock);
+ TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
+ vp = NULL;
}
if (vp) {
vp->v_flag |= VDOOMED;
@@ -504,6 +512,7 @@ getnewvnode(tag, mp, vops, vpp)
} else {
simple_unlock(&vp->v_interlock);
}
+ vn_finished_write(vnmp);
#ifdef INVARIANTS
{
@@ -515,6 +524,8 @@ getnewvnode(tag, mp, vops, vpp)
if (vp->v_numoutput)
panic("Clean vnode has pending I/O's");
splx(s);
+ if (vp->v_writecount != 0)
+ panic("Non-zero write count");
}
#endif
vp->v_flag = 0;
@@ -523,7 +534,6 @@ getnewvnode(tag, mp, vops, vpp)
vp->v_cstart = 0;
vp->v_clen = 0;
vp->v_socket = 0;
- vp->v_writecount = 0; /* XXX */
} else {
simple_unlock(&vnode_free_list_slock);
vp = (struct vnode *) zalloc(vnode_zone);
@@ -946,6 +956,7 @@ sched_sync(void)
{
struct synclist *slp;
struct vnode *vp;
+ struct mount *mp;
long starttime;
int s;
struct proc *p = updateproc;
@@ -970,10 +981,12 @@ sched_sync(void)
splx(s);
while ((vp = LIST_FIRST(slp)) != NULL) {
- if (VOP_ISLOCKED(vp, NULL) == 0) {
+ if (VOP_ISLOCKED(vp, NULL) == 0 &&
+ vn_start_write(vp, &mp, V_NOWAIT) == 0) {
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
(void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p);
VOP_UNLOCK(vp, 0, p);
+ vn_finished_write(mp);
}
s = splbio();
if (LIST_FIRST(slp) == vp) {
@@ -1386,6 +1399,7 @@ vrele(vp)
struct proc *p = curproc; /* XXX */
KASSERT(vp != NULL, ("vrele: null vp"));
+ KASSERT(vp->v_writecount < vp->v_usecount, ("vrele: missed vn_close"));
simple_lock(&vp->v_interlock);
@@ -1427,6 +1441,7 @@ vput(vp)
struct proc *p = curproc; /* XXX */
KASSERT(vp != NULL, ("vput: null vp"));
+ KASSERT(vp->v_writecount < vp->v_usecount, ("vput: missed vn_close"));
simple_lock(&vp->v_interlock);
@@ -1632,6 +1647,8 @@ vclean(vp, flags, p)
* If the flush fails, just toss the buffers.
*/
if (flags & DOCLOSE) {
+ if (TAILQ_FIRST(&vp->v_dirtyblkhd) != NULL)
+ (void) vn_write_suspend_wait(vp, V_WAIT);
if (vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0) != 0)
vinvalbuf(vp, 0, NOCRED, p, 0, 0);
}
@@ -2785,12 +2802,18 @@ sync_fsync(ap)
simple_unlock(&mountlist_slock);
return (0);
}
+ if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) {
+ vfs_unbusy(mp, p);
+ simple_unlock(&mountlist_slock);
+ return (0);
+ }
asyncflag = mp->mnt_flag & MNT_ASYNC;
mp->mnt_flag &= ~MNT_ASYNC;
vfs_msync(mp, MNT_NOWAIT);
VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p);
if (asyncflag)
mp->mnt_flag |= MNT_ASYNC;
+ vn_finished_write(mp);
vfs_unbusy(mp, p);
return (0);
}
diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c
index 65a297ca..404114a 100644
--- a/sys/kern/vfs_extattr.c
+++ b/sys/kern/vfs_extattr.c
@@ -164,8 +164,8 @@ mount(p, uap)
vput(vp);
return (EOPNOTSUPP); /* Needs translation */
}
- mp->mnt_flag |=
- SCARG(uap, flags) & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
+ mp->mnt_flag |= SCARG(uap, flags) &
+ (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
/*
* Only root, or the user that did the original mount is
* permitted to update it.
@@ -303,7 +303,8 @@ update:
vrele(vp);
if (mp->mnt_kern_flag & MNTK_WANTRDWR)
mp->mnt_flag &= ~MNT_RDONLY;
- mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
+ mp->mnt_flag &=~
+ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
if (error) {
mp->mnt_flag = flag;
@@ -458,7 +459,7 @@ unmount(p, uap)
*/
int
dounmount(mp, flags, p)
- register struct mount *mp;
+ struct mount *mp;
int flags;
struct proc *p;
{
@@ -469,6 +470,7 @@ dounmount(mp, flags, p)
simple_lock(&mountlist_slock);
mp->mnt_kern_flag |= MNTK_UNMOUNT;
lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock, p);
+ vn_start_write(NULL, &mp, V_WAIT);
if (mp->mnt_flag & MNT_EXPUBLIC)
vfs_setpublicfs(NULL, NULL, NULL);
@@ -481,8 +483,10 @@ dounmount(mp, flags, p)
vrele(mp->mnt_syncer);
if (((mp->mnt_flag & MNT_RDONLY) ||
(error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) ||
- (flags & MNT_FORCE))
+ (flags & MNT_FORCE)) {
error = VFS_UNMOUNT(mp, flags, p);
+ }
+ vn_finished_write(mp);
simple_lock(&mountlist_slock);
if (error) {
if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
@@ -530,7 +534,7 @@ sync(p, uap)
struct proc *p;
struct sync_args *uap;
{
- register struct mount *mp, *nmp;
+ struct mount *mp, *nmp;
int asyncflag;
simple_lock(&mountlist_slock);
@@ -539,13 +543,15 @@ sync(p, uap)
nmp = TAILQ_NEXT(mp, mnt_list);
continue;
}
- if ((mp->mnt_flag & MNT_RDONLY) == 0) {
+ if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
+ vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
asyncflag = mp->mnt_flag & MNT_ASYNC;
mp->mnt_flag &= ~MNT_ASYNC;
vfs_msync(mp, MNT_NOWAIT);
VFS_SYNC(mp, MNT_NOWAIT,
- ((p != NULL) ? p->p_ucred : NOCRED), p);
+ ((p != NULL) ? p->p_ucred : NOCRED), p);
mp->mnt_flag |= asyncflag;
+ vn_finished_write(mp);
}
simple_lock(&mountlist_slock);
nmp = TAILQ_NEXT(mp, mnt_list);
@@ -593,7 +599,7 @@ quotactl(p, uap)
syscallarg(caddr_t) arg;
} */ *uap;
{
- register struct mount *mp;
+ struct mount *mp;
int error;
struct nameidata nd;
@@ -602,11 +608,15 @@ quotactl(p, uap)
NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
if ((error = namei(&nd)) != 0)
return (error);
- mp = nd.ni_vp->v_mount;
NDFREE(&nd, NDF_ONLY_PNBUF);
+ error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
vrele(nd.ni_vp);
- return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
- SCARG(uap, arg), p));
+ if (error)
+ return (error);
+ error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
+ SCARG(uap, arg), p);
+ vn_finished_write(mp);
+ return (error);
}
/*
@@ -972,6 +982,7 @@ open(p, uap)
struct file *fp;
struct vnode *vp;
struct vattr vat;
+ struct mount *mp;
int cmode, flags, oflags;
struct file *nfp;
int type, indx, error;
@@ -1029,12 +1040,15 @@ open(p, uap)
fp->f_flag |= FHASLOCK;
}
if (flags & O_TRUNC) {
+ if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+ goto bad;
VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
VATTR_NULL(&vat);
vat.va_size = 0;
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
error = VOP_SETATTR(vp, &vat, p->p_ucred, p);
VOP_UNLOCK(vp, 0, p);
+ vn_finished_write(mp);
if (error)
goto bad;
}
@@ -1101,7 +1115,8 @@ mknod(p, uap)
syscallarg(int) dev;
} */ *uap;
{
- register struct vnode *vp;
+ struct vnode *vp;
+ struct mount *mp;
struct vattr vattr;
int error;
int whiteout = 0;
@@ -1118,14 +1133,16 @@ mknod(p, uap)
}
if (error)
return (error);
+restart:
bwillwrite();
NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
if ((error = namei(&nd)) != 0)
return (error);
vp = nd.ni_vp;
- if (vp != NULL)
+ if (vp != NULL) {
+ vrele(vp);
error = EEXIST;
- else {
+ } else {
VATTR_NULL(&vattr);
vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
vattr.va_rdev = SCARG(uap, dev);
@@ -1149,6 +1166,13 @@ mknod(p, uap)
break;
}
}
+ if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ vput(nd.ni_dvp);
+ if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+ return (error);
+ goto restart;
+ }
if (!error) {
VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
if (whiteout)
@@ -1159,17 +1183,10 @@ mknod(p, uap)
if (error == 0)
vput(nd.ni_vp);
}
- NDFREE(&nd, NDF_ONLY_PNBUF);
- vput(nd.ni_dvp);
- } else {
- NDFREE(&nd, NDF_ONLY_PNBUF);
- if (nd.ni_dvp == vp)
- vrele(nd.ni_dvp);
- else
- vput(nd.ni_dvp);
- if (vp)
- vrele(vp);
}
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ vput(nd.ni_dvp);
+ vn_finished_write(mp);
ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
return (error);
@@ -1193,23 +1210,29 @@ mkfifo(p, uap)
syscallarg(int) mode;
} */ *uap;
{
+ struct mount *mp;
struct vattr vattr;
int error;
struct nameidata nd;
+restart:
bwillwrite();
NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
if ((error = namei(&nd)) != 0)
return (error);
if (nd.ni_vp != NULL) {
NDFREE(&nd, NDF_ONLY_PNBUF);
- if (nd.ni_dvp == nd.ni_vp)
- vrele(nd.ni_dvp);
- else
- vput(nd.ni_dvp);
vrele(nd.ni_vp);
+ vput(nd.ni_dvp);
return (EEXIST);
}
+ if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ vput(nd.ni_dvp);
+ if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+ return (error);
+ goto restart;
+ }
VATTR_NULL(&vattr);
vattr.va_type = VFIFO;
vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
@@ -1219,6 +1242,7 @@ mkfifo(p, uap)
vput(nd.ni_vp);
NDFREE(&nd, NDF_ONLY_PNBUF);
vput(nd.ni_dvp);
+ vn_finished_write(mp);
return (error);
}
@@ -1240,7 +1264,8 @@ link(p, uap)
syscallarg(char *) link;
} */ *uap;
{
- register struct vnode *vp;
+ struct vnode *vp;
+ struct mount *mp;
struct nameidata nd;
int error;
@@ -1250,30 +1275,29 @@ link(p, uap)
return (error);
NDFREE(&nd, NDF_ONLY_PNBUF);
vp = nd.ni_vp;
- if (vp->v_type == VDIR)
- error = EPERM; /* POSIX */
- else {
- NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
- error = namei(&nd);
- if (!error) {
- if (nd.ni_vp != NULL) {
- if (nd.ni_vp)
- vrele(nd.ni_vp);
- error = EEXIST;
- } else {
- VOP_LEASE(nd.ni_dvp, p, p->p_ucred,
- LEASE_WRITE);
- VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
- error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
- }
- NDFREE(&nd, NDF_ONLY_PNBUF);
- if (nd.ni_dvp == nd.ni_vp)
- vrele(nd.ni_dvp);
- else
- vput(nd.ni_dvp);
+ if (vp->v_type == VDIR) {
+ vrele(vp);
+ return (EPERM); /* POSIX */
+ }
+ if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
+ vrele(vp);
+ return (error);
+ }
+ NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
+ if ((error = namei(&nd)) == 0) {
+ if (nd.ni_vp != NULL) {
+ vrele(nd.ni_vp);
+ error = EEXIST;
+ } else {
+ VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
}
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ vput(nd.ni_dvp);
}
vrele(vp);
+ vn_finished_write(mp);
ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
return (error);
@@ -1297,6 +1321,7 @@ symlink(p, uap)
syscallarg(char *) link;
} */ *uap;
{
+ struct mount *mp;
struct vattr vattr;
char *path;
int error;
@@ -1305,20 +1330,25 @@ symlink(p, uap)
path = zalloc(namei_zone);
if ((error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) != 0)
goto out;
+restart:
bwillwrite();
NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
if ((error = namei(&nd)) != 0)
goto out;
if (nd.ni_vp) {
NDFREE(&nd, NDF_ONLY_PNBUF);
- if (nd.ni_dvp == nd.ni_vp)
- vrele(nd.ni_dvp);
- else
- vput(nd.ni_dvp);
vrele(nd.ni_vp);
+ vput(nd.ni_dvp);
error = EEXIST;
goto out;
}
+ if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ vput(nd.ni_dvp);
+ if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+ return (error);
+ goto restart;
+ }
VATTR_NULL(&vattr);
vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
@@ -1327,6 +1357,7 @@ symlink(p, uap)
if (error == 0)
vput(nd.ni_vp);
vput(nd.ni_dvp);
+ vn_finished_write(mp);
ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
out:
@@ -1346,8 +1377,10 @@ undelete(p, uap)
} */ *uap;
{
int error;
+ struct mount *mp;
struct nameidata nd;
+restart:
bwillwrite();
NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
SCARG(uap, path), p);
@@ -1357,19 +1390,23 @@ undelete(p, uap)
if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
NDFREE(&nd, NDF_ONLY_PNBUF);
- if (nd.ni_dvp == nd.ni_vp)
- vrele(nd.ni_dvp);
- else
- vput(nd.ni_dvp);
if (nd.ni_vp)
vrele(nd.ni_vp);
+ vput(nd.ni_dvp);
return (EEXIST);
}
-
+ if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ vput(nd.ni_dvp);
+ if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+ return (error);
+ goto restart;
+ }
VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
NDFREE(&nd, NDF_ONLY_PNBUF);
vput(nd.ni_dvp);
+ vn_finished_write(mp);
ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
return (error);
@@ -1391,18 +1428,17 @@ unlink(p, uap)
syscallarg(char *) path;
} */ *uap;
{
- register struct vnode *vp;
+ struct mount *mp;
+ struct vnode *vp;
int error;
struct nameidata nd;
+restart:
bwillwrite();
NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
if ((error = namei(&nd)) != 0)
return (error);
vp = nd.ni_vp;
- VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
-
if (vp->v_type == VDIR)
error = EPERM; /* POSIX */
else {
@@ -1414,18 +1450,24 @@ unlink(p, uap)
if (vp->v_flag & VROOT)
error = EBUSY;
}
-
+ if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ vrele(vp);
+ vput(nd.ni_dvp);
+ if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+ return (error);
+ goto restart;
+ }
+ VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
if (!error) {
VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
}
NDFREE(&nd, NDF_ONLY_PNBUF);
- if (nd.ni_dvp == vp)
- vrele(nd.ni_dvp);
- else
- vput(nd.ni_dvp);
- if (vp != NULLVP)
- vput(vp);
+ vput(nd.ni_dvp);
+ vput(vp);
+ vn_finished_write(mp);
ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
return (error);
@@ -1936,6 +1978,7 @@ setfflags(p, vp, flags)
int flags;
{
int error;
+ struct mount *mp;
struct vattr vattr;
/*
@@ -1948,12 +1991,15 @@ setfflags(p, vp, flags)
((error = suser_xxx(p->p_ucred, p, PRISON_ROOT)) != 0))
return (error);
+ if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+ return (error);
VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
VATTR_NULL(&vattr);
vattr.va_flags = flags;
error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
VOP_UNLOCK(vp, 0, p);
+ vn_finished_write(mp);
return (error);
}
@@ -2020,14 +2066,18 @@ setfmode(p, vp, mode)
int mode;
{
int error;
+ struct mount *mp;
struct vattr vattr;
+ if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+ return (error);
VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
VATTR_NULL(&vattr);
vattr.va_mode = mode & ALLPERMS;
error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
VOP_UNLOCK(vp, 0, p);
+ vn_finished_write(mp);
return error;
}
@@ -2125,8 +2175,11 @@ setfown(p, vp, uid, gid)
gid_t gid;
{
int error;
+ struct mount *mp;
struct vattr vattr;
+ if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+ return (error);
VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
VATTR_NULL(&vattr);
@@ -2134,6 +2187,7 @@ setfown(p, vp, uid, gid)
vattr.va_gid = gid;
error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
VOP_UNLOCK(vp, 0, p);
+ vn_finished_write(mp);
return error;
}
@@ -2259,8 +2313,11 @@ setutimes(p, vp, ts, nullflag)
int nullflag;
{
int error;
+ struct mount *mp;
struct vattr vattr;
+ if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+ return (error);
VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
VATTR_NULL(&vattr);
@@ -2270,6 +2327,7 @@ setutimes(p, vp, ts, nullflag)
vattr.va_vaflags |= VA_UTIMES_NULL;
error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
VOP_UNLOCK(vp, 0, p);
+ vn_finished_write(mp);
return error;
}
@@ -2394,7 +2452,8 @@ truncate(p, uap)
syscallarg(off_t) length;
} */ *uap;
{
- register struct vnode *vp;
+ struct mount *mp;
+ struct vnode *vp;
struct vattr vattr;
int error;
struct nameidata nd;
@@ -2405,6 +2464,10 @@ truncate(p, uap)
if ((error = namei(&nd)) != 0)
return (error);
vp = nd.ni_vp;
+ if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
+ vrele(vp);
+ return (error);
+ }
NDFREE(&nd, NDF_ONLY_PNBUF);
VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
@@ -2417,6 +2480,7 @@ truncate(p, uap)
error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
}
vput(vp);
+ vn_finished_write(mp);
return (error);
}
@@ -2440,6 +2504,7 @@ ftruncate(p, uap)
syscallarg(off_t) length;
} */ *uap;
{
+ struct mount *mp;
struct vattr vattr;
struct vnode *vp;
struct file *fp;
@@ -2452,6 +2517,8 @@ ftruncate(p, uap)
if ((fp->f_flag & FWRITE) == 0)
return (EINVAL);
vp = (struct vnode *)fp->f_data;
+ if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+ return (error);
VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
if (vp->v_type == VDIR)
@@ -2462,6 +2529,7 @@ ftruncate(p, uap)
error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
}
VOP_UNLOCK(vp, 0, p);
+ vn_finished_write(mp);
return (error);
}
@@ -2541,13 +2609,16 @@ fsync(p, uap)
syscallarg(int) fd;
} */ *uap;
{
- register struct vnode *vp;
+ struct vnode *vp;
+ struct mount *mp;
struct file *fp;
int error;
if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
return (error);
vp = (struct vnode *)fp->f_data;
+ if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+ return (error);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
if (vp->v_object)
vm_object_page_clean(vp->v_object, 0, 0, 0);
@@ -2558,6 +2629,7 @@ fsync(p, uap)
#endif
VOP_UNLOCK(vp, 0, p);
+ vn_finished_write(mp);
return (error);
}
@@ -2580,7 +2652,8 @@ rename(p, uap)
syscallarg(char *) to;
} */ *uap;
{
- register struct vnode *tvp, *fvp, *tdvp;
+ struct mount *mp;
+ struct vnode *tvp, *fvp, *tdvp;
struct nameidata fromnd, tond;
int error;
@@ -2590,6 +2663,12 @@ rename(p, uap)
if ((error = namei(&fromnd)) != 0)
return (error);
fvp = fromnd.ni_vp;
+ if ((error = vn_start_write(fvp, &mp, V_WAIT | PCATCH)) != 0) {
+ NDFREE(&fromnd, NDF_ONLY_PNBUF);
+ vrele(fromnd.ni_dvp);
+ vrele(fvp);
+ goto out1;
+ }
NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ,
UIO_USERSPACE, SCARG(uap, to), p);
if (fromnd.ni_vp->v_type == VDIR)
@@ -2652,6 +2731,7 @@ out:
vrele(fvp);
}
vrele(tond.ni_startdir);
+ vn_finished_write(mp);
ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
@@ -2682,11 +2762,13 @@ mkdir(p, uap)
syscallarg(int) mode;
} */ *uap;
{
- register struct vnode *vp;
+ struct mount *mp;
+ struct vnode *vp;
struct vattr vattr;
int error;
struct nameidata nd;
+restart:
bwillwrite();
NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
nd.ni_cnd.cn_flags |= WILLBEDIR;
@@ -2695,13 +2777,17 @@ mkdir(p, uap)
vp = nd.ni_vp;
if (vp != NULL) {
NDFREE(&nd, NDF_ONLY_PNBUF);
- if (nd.ni_dvp == vp)
- vrele(nd.ni_dvp);
- else
- vput(nd.ni_dvp);
vrele(vp);
+ vput(nd.ni_dvp);
return (EEXIST);
}
+ if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ vput(nd.ni_dvp);
+ if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+ return (error);
+ goto restart;
+ }
VATTR_NULL(&vattr);
vattr.va_type = VDIR;
vattr.va_mode = (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_fd->fd_cmask;
@@ -2711,6 +2797,7 @@ mkdir(p, uap)
vput(nd.ni_dvp);
if (!error)
vput(nd.ni_vp);
+ vn_finished_write(mp);
ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
return (error);
@@ -2732,10 +2819,12 @@ rmdir(p, uap)
syscallarg(char *) path;
} */ *uap;
{
- register struct vnode *vp;
+ struct mount *mp;
+ struct vnode *vp;
int error;
struct nameidata nd;
+restart:
bwillwrite();
NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
SCARG(uap, path), p);
@@ -2756,21 +2845,32 @@ rmdir(p, uap)
/*
* The root of a mounted filesystem cannot be deleted.
*/
- if (vp->v_flag & VROOT)
+ if (vp->v_flag & VROOT) {
error = EBUSY;
- else {
- VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
- VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
- error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+ goto out;
}
+ if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vput(vp);
+ if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+ return (error);
+ goto restart;
+ }
+ VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+ vn_finished_write(mp);
out:
NDFREE(&nd, NDF_ONLY_PNBUF);
if (nd.ni_dvp == vp)
vrele(nd.ni_dvp);
else
vput(nd.ni_dvp);
- if (vp != NULLVP)
- vput(vp);
+ vput(vp);
ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
return (error);
@@ -3049,7 +3149,8 @@ revoke(p, uap)
syscallarg(char *) path;
} */ *uap;
{
- register struct vnode *vp;
+ struct mount *mp;
+ struct vnode *vp;
struct vattr vattr;
int error;
struct nameidata nd;
@@ -3068,8 +3169,11 @@ revoke(p, uap)
if (p->p_ucred->cr_uid != vattr.va_uid &&
(error = suser_xxx(0, p, PRISON_ROOT)))
goto out;
+ if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+ goto out;
if (vcount(vp) > 1)
VOP_REVOKE(vp, REVOKEALL);
+ vn_finished_write(mp);
out:
vrele(vp);
return (error);
@@ -3228,11 +3332,16 @@ fhopen(p, uap)
}
if (fmode & O_TRUNC) {
VOP_UNLOCK(vp, 0, p); /* XXX */
+ if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
+ vrele(vp);
+ return (error);
+ }
VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */
VATTR_NULL(vap);
vap->va_size = 0;
error = VOP_SETATTR(vp, vap, p->p_ucred, p);
+ vn_finished_write(mp);
if (error)
goto bad;
}
@@ -3407,10 +3516,15 @@ extattrctl(p, uap)
NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
if ((error = namei(&nd)) != 0)
return (error);
- mp = nd.ni_vp->v_mount;
+ error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
NDFREE(&nd, 0);
- return (VFS_EXTATTRCTL(mp, SCARG(uap, cmd), SCARG(uap, attrname),
- SCARG(uap, arg), p));
+ vrele(nd.ni_vp);
+ if (error)
+ return (error);
+ error = VFS_EXTATTRCTL(mp, SCARG(uap, cmd), SCARG(uap, attrname),
+ SCARG(uap, arg), p);
+ vn_finished_write(mp);
+ return (error);
}
/*
@@ -3425,6 +3539,7 @@ extattr_set_file(p, uap)
struct extattr_set_file_args *uap;
{
struct nameidata nd;
+ struct mount *mp;
struct uio auio;
struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV];
char attrname[EXTATTR_MAXNAMELEN];
@@ -3434,10 +3549,11 @@ extattr_set_file(p, uap)
error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN);
if (error)
return (error);
- NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW, UIO_USERSPACE, SCARG(uap, path),
- p);
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
if ((error = namei(&nd)) != 0)
return(error);
+ if ((error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH)) != 0)
+ goto done;
iovlen = uap->iovcnt * sizeof(struct iovec);
if (uap->iovcnt > UIO_SMALLIOV) {
if (uap->iovcnt > UIO_MAXIOV) {
@@ -3477,6 +3593,8 @@ done:
if (needfree)
FREE(needfree, M_IOV);
NDFREE(&nd, 0);
+ vrele(nd.ni_vp);
+ vn_finished_write(mp);
return (error);
}
@@ -3508,6 +3626,7 @@ extattr_get_file(p, uap)
if (uap->iovcnt > UIO_SMALLIOV) {
if (uap->iovcnt > UIO_MAXIOV) {
NDFREE(&nd, 0);
+ vrele(nd.ni_vp);
return (EINVAL);
}
MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
@@ -3545,6 +3664,7 @@ done:
if (needfree)
FREE(needfree, M_IOV);
NDFREE(&nd, 0);
+ vrele(nd.ni_vp);
return(error);
}
@@ -3557,6 +3677,7 @@ extattr_delete_file(p, uap)
struct proc *p;
struct extattr_delete_file_args *uap;
{
+ struct mount *mp;
struct nameidata nd;
char attrname[EXTATTR_MAXNAMELEN];
int error;
@@ -3564,12 +3685,17 @@ extattr_delete_file(p, uap)
error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN);
if (error)
return(error);
- NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW, UIO_USERSPACE, SCARG(uap, path),
- p);
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
if ((error = namei(&nd)) != 0)
return(error);
+ if ((error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH)) != 0) {
+ vrele(nd.ni_vp);
+ return (error);
+ }
error = VOP_SETEXTATTR(nd.ni_vp, attrname, NULL, p->p_cred->pc_ucred,
p);
NDFREE(&nd, 0);
+ vrele(nd.ni_vp);
+ vn_finished_write(mp);
return(error);
}
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 6483660..0e5ec3f 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -453,6 +453,7 @@ getnewvnode(tag, mp, vops, vpp)
int s, count;
struct proc *p = curproc; /* XXX */
struct vnode *vp = NULL;
+ struct mount *vnmp;
vm_object_t object;
/*
@@ -491,7 +492,14 @@ getnewvnode(tag, mp, vops, vpp)
vp = NULL;
continue;
}
- break;
+ /*
+ * Skip over it if its filesystem is being suspended.
+ */
+ if (vn_start_write(vp, &vnmp, V_NOWAIT) == 0)
+ break;
+ simple_unlock(&vp->v_interlock);
+ TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
+ vp = NULL;
}
if (vp) {
vp->v_flag |= VDOOMED;
@@ -504,6 +512,7 @@ getnewvnode(tag, mp, vops, vpp)
} else {
simple_unlock(&vp->v_interlock);
}
+ vn_finished_write(vnmp);
#ifdef INVARIANTS
{
@@ -515,6 +524,8 @@ getnewvnode(tag, mp, vops, vpp)
if (vp->v_numoutput)
panic("Clean vnode has pending I/O's");
splx(s);
+ if (vp->v_writecount != 0)
+ panic("Non-zero write count");
}
#endif
vp->v_flag = 0;
@@ -523,7 +534,6 @@ getnewvnode(tag, mp, vops, vpp)
vp->v_cstart = 0;
vp->v_clen = 0;
vp->v_socket = 0;
- vp->v_writecount = 0; /* XXX */
} else {
simple_unlock(&vnode_free_list_slock);
vp = (struct vnode *) zalloc(vnode_zone);
@@ -946,6 +956,7 @@ sched_sync(void)
{
struct synclist *slp;
struct vnode *vp;
+ struct mount *mp;
long starttime;
int s;
struct proc *p = updateproc;
@@ -970,10 +981,12 @@ sched_sync(void)
splx(s);
while ((vp = LIST_FIRST(slp)) != NULL) {
- if (VOP_ISLOCKED(vp, NULL) == 0) {
+ if (VOP_ISLOCKED(vp, NULL) == 0 &&
+ vn_start_write(vp, &mp, V_NOWAIT) == 0) {
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
(void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p);
VOP_UNLOCK(vp, 0, p);
+ vn_finished_write(mp);
}
s = splbio();
if (LIST_FIRST(slp) == vp) {
@@ -1386,6 +1399,7 @@ vrele(vp)
struct proc *p = curproc; /* XXX */
KASSERT(vp != NULL, ("vrele: null vp"));
+ KASSERT(vp->v_writecount < vp->v_usecount, ("vrele: missed vn_close"));
simple_lock(&vp->v_interlock);
@@ -1427,6 +1441,7 @@ vput(vp)
struct proc *p = curproc; /* XXX */
KASSERT(vp != NULL, ("vput: null vp"));
+ KASSERT(vp->v_writecount < vp->v_usecount, ("vput: missed vn_close"));
simple_lock(&vp->v_interlock);
@@ -1632,6 +1647,8 @@ vclean(vp, flags, p)
* If the flush fails, just toss the buffers.
*/
if (flags & DOCLOSE) {
+ if (TAILQ_FIRST(&vp->v_dirtyblkhd) != NULL)
+ (void) vn_write_suspend_wait(vp, V_WAIT);
if (vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0) != 0)
vinvalbuf(vp, 0, NOCRED, p, 0, 0);
}
@@ -2785,12 +2802,18 @@ sync_fsync(ap)
simple_unlock(&mountlist_slock);
return (0);
}
+ if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) {
+ vfs_unbusy(mp, p);
+ simple_unlock(&mountlist_slock);
+ return (0);
+ }
asyncflag = mp->mnt_flag & MNT_ASYNC;
mp->mnt_flag &= ~MNT_ASYNC;
vfs_msync(mp, MNT_NOWAIT);
VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p);
if (asyncflag)
mp->mnt_flag |= MNT_ASYNC;
+ vn_finished_write(mp);
vfs_unbusy(mp, p);
return (0);
}
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 65a297ca..404114a 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -164,8 +164,8 @@ mount(p, uap)
vput(vp);
return (EOPNOTSUPP); /* Needs translation */
}
- mp->mnt_flag |=
- SCARG(uap, flags) & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
+ mp->mnt_flag |= SCARG(uap, flags) &
+ (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT);
/*
* Only root, or the user that did the original mount is
* permitted to update it.
@@ -303,7 +303,8 @@ update:
vrele(vp);
if (mp->mnt_kern_flag & MNTK_WANTRDWR)
mp->mnt_flag &= ~MNT_RDONLY;
- mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
+ mp->mnt_flag &=~
+ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT);
mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
if (error) {
mp->mnt_flag = flag;
@@ -458,7 +459,7 @@ unmount(p, uap)
*/
int
dounmount(mp, flags, p)
- register struct mount *mp;
+ struct mount *mp;
int flags;
struct proc *p;
{
@@ -469,6 +470,7 @@ dounmount(mp, flags, p)
simple_lock(&mountlist_slock);
mp->mnt_kern_flag |= MNTK_UNMOUNT;
lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock, p);
+ vn_start_write(NULL, &mp, V_WAIT);
if (mp->mnt_flag & MNT_EXPUBLIC)
vfs_setpublicfs(NULL, NULL, NULL);
@@ -481,8 +483,10 @@ dounmount(mp, flags, p)
vrele(mp->mnt_syncer);
if (((mp->mnt_flag & MNT_RDONLY) ||
(error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) ||
- (flags & MNT_FORCE))
+ (flags & MNT_FORCE)) {
error = VFS_UNMOUNT(mp, flags, p);
+ }
+ vn_finished_write(mp);
simple_lock(&mountlist_slock);
if (error) {
if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
@@ -530,7 +534,7 @@ sync(p, uap)
struct proc *p;
struct sync_args *uap;
{
- register struct mount *mp, *nmp;
+ struct mount *mp, *nmp;
int asyncflag;
simple_lock(&mountlist_slock);
@@ -539,13 +543,15 @@ sync(p, uap)
nmp = TAILQ_NEXT(mp, mnt_list);
continue;
}
- if ((mp->mnt_flag & MNT_RDONLY) == 0) {
+ if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
+ vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
asyncflag = mp->mnt_flag & MNT_ASYNC;
mp->mnt_flag &= ~MNT_ASYNC;
vfs_msync(mp, MNT_NOWAIT);
VFS_SYNC(mp, MNT_NOWAIT,
- ((p != NULL) ? p->p_ucred : NOCRED), p);
+ ((p != NULL) ? p->p_ucred : NOCRED), p);
mp->mnt_flag |= asyncflag;
+ vn_finished_write(mp);
}
simple_lock(&mountlist_slock);
nmp = TAILQ_NEXT(mp, mnt_list);
@@ -593,7 +599,7 @@ quotactl(p, uap)
syscallarg(caddr_t) arg;
} */ *uap;
{
- register struct mount *mp;
+ struct mount *mp;
int error;
struct nameidata nd;
@@ -602,11 +608,15 @@ quotactl(p, uap)
NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
if ((error = namei(&nd)) != 0)
return (error);
- mp = nd.ni_vp->v_mount;
NDFREE(&nd, NDF_ONLY_PNBUF);
+ error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
vrele(nd.ni_vp);
- return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
- SCARG(uap, arg), p));
+ if (error)
+ return (error);
+ error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
+ SCARG(uap, arg), p);
+ vn_finished_write(mp);
+ return (error);
}
/*
@@ -972,6 +982,7 @@ open(p, uap)
struct file *fp;
struct vnode *vp;
struct vattr vat;
+ struct mount *mp;
int cmode, flags, oflags;
struct file *nfp;
int type, indx, error;
@@ -1029,12 +1040,15 @@ open(p, uap)
fp->f_flag |= FHASLOCK;
}
if (flags & O_TRUNC) {
+ if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+ goto bad;
VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
VATTR_NULL(&vat);
vat.va_size = 0;
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
error = VOP_SETATTR(vp, &vat, p->p_ucred, p);
VOP_UNLOCK(vp, 0, p);
+ vn_finished_write(mp);
if (error)
goto bad;
}
@@ -1101,7 +1115,8 @@ mknod(p, uap)
syscallarg(int) dev;
} */ *uap;
{
- register struct vnode *vp;
+ struct vnode *vp;
+ struct mount *mp;
struct vattr vattr;
int error;
int whiteout = 0;
@@ -1118,14 +1133,16 @@ mknod(p, uap)
}
if (error)
return (error);
+restart:
bwillwrite();
NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
if ((error = namei(&nd)) != 0)
return (error);
vp = nd.ni_vp;
- if (vp != NULL)
+ if (vp != NULL) {
+ vrele(vp);
error = EEXIST;
- else {
+ } else {
VATTR_NULL(&vattr);
vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
vattr.va_rdev = SCARG(uap, dev);
@@ -1149,6 +1166,13 @@ mknod(p, uap)
break;
}
}
+ if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ vput(nd.ni_dvp);
+ if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+ return (error);
+ goto restart;
+ }
if (!error) {
VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
if (whiteout)
@@ -1159,17 +1183,10 @@ mknod(p, uap)
if (error == 0)
vput(nd.ni_vp);
}
- NDFREE(&nd, NDF_ONLY_PNBUF);
- vput(nd.ni_dvp);
- } else {
- NDFREE(&nd, NDF_ONLY_PNBUF);
- if (nd.ni_dvp == vp)
- vrele(nd.ni_dvp);
- else
- vput(nd.ni_dvp);
- if (vp)
- vrele(vp);
}
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ vput(nd.ni_dvp);
+ vn_finished_write(mp);
ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
return (error);
@@ -1193,23 +1210,29 @@ mkfifo(p, uap)
syscallarg(int) mode;
} */ *uap;
{
+ struct mount *mp;
struct vattr vattr;
int error;
struct nameidata nd;
+restart:
bwillwrite();
NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
if ((error = namei(&nd)) != 0)
return (error);
if (nd.ni_vp != NULL) {
NDFREE(&nd, NDF_ONLY_PNBUF);
- if (nd.ni_dvp == nd.ni_vp)
- vrele(nd.ni_dvp);
- else
- vput(nd.ni_dvp);
vrele(nd.ni_vp);
+ vput(nd.ni_dvp);
return (EEXIST);
}
+ if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ vput(nd.ni_dvp);
+ if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+ return (error);
+ goto restart;
+ }
VATTR_NULL(&vattr);
vattr.va_type = VFIFO;
vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
@@ -1219,6 +1242,7 @@ mkfifo(p, uap)
vput(nd.ni_vp);
NDFREE(&nd, NDF_ONLY_PNBUF);
vput(nd.ni_dvp);
+ vn_finished_write(mp);
return (error);
}
@@ -1240,7 +1264,8 @@ link(p, uap)
syscallarg(char *) link;
} */ *uap;
{
- register struct vnode *vp;
+ struct vnode *vp;
+ struct mount *mp;
struct nameidata nd;
int error;
@@ -1250,30 +1275,29 @@ link(p, uap)
return (error);
NDFREE(&nd, NDF_ONLY_PNBUF);
vp = nd.ni_vp;
- if (vp->v_type == VDIR)
- error = EPERM; /* POSIX */
- else {
- NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
- error = namei(&nd);
- if (!error) {
- if (nd.ni_vp != NULL) {
- if (nd.ni_vp)
- vrele(nd.ni_vp);
- error = EEXIST;
- } else {
- VOP_LEASE(nd.ni_dvp, p, p->p_ucred,
- LEASE_WRITE);
- VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
- error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
- }
- NDFREE(&nd, NDF_ONLY_PNBUF);
- if (nd.ni_dvp == nd.ni_vp)
- vrele(nd.ni_dvp);
- else
- vput(nd.ni_dvp);
+ if (vp->v_type == VDIR) {
+ vrele(vp);
+ return (EPERM); /* POSIX */
+ }
+ if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
+ vrele(vp);
+ return (error);
+ }
+ NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
+ if ((error = namei(&nd)) == 0) {
+ if (nd.ni_vp != NULL) {
+ vrele(nd.ni_vp);
+ error = EEXIST;
+ } else {
+ VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
}
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ vput(nd.ni_dvp);
}
vrele(vp);
+ vn_finished_write(mp);
ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
return (error);
@@ -1297,6 +1321,7 @@ symlink(p, uap)
syscallarg(char *) link;
} */ *uap;
{
+ struct mount *mp;
struct vattr vattr;
char *path;
int error;
@@ -1305,20 +1330,25 @@ symlink(p, uap)
path = zalloc(namei_zone);
if ((error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) != 0)
goto out;
+restart:
bwillwrite();
NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
if ((error = namei(&nd)) != 0)
goto out;
if (nd.ni_vp) {
NDFREE(&nd, NDF_ONLY_PNBUF);
- if (nd.ni_dvp == nd.ni_vp)
- vrele(nd.ni_dvp);
- else
- vput(nd.ni_dvp);
vrele(nd.ni_vp);
+ vput(nd.ni_dvp);
error = EEXIST;
goto out;
}
+ if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ vput(nd.ni_dvp);
+ if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+ return (error);
+ goto restart;
+ }
VATTR_NULL(&vattr);
vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
@@ -1327,6 +1357,7 @@ symlink(p, uap)
if (error == 0)
vput(nd.ni_vp);
vput(nd.ni_dvp);
+ vn_finished_write(mp);
ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
out:
@@ -1346,8 +1377,10 @@ undelete(p, uap)
} */ *uap;
{
int error;
+ struct mount *mp;
struct nameidata nd;
+restart:
bwillwrite();
NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
SCARG(uap, path), p);
@@ -1357,19 +1390,23 @@ undelete(p, uap)
if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
NDFREE(&nd, NDF_ONLY_PNBUF);
- if (nd.ni_dvp == nd.ni_vp)
- vrele(nd.ni_dvp);
- else
- vput(nd.ni_dvp);
if (nd.ni_vp)
vrele(nd.ni_vp);
+ vput(nd.ni_dvp);
return (EEXIST);
}
-
+ if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ vput(nd.ni_dvp);
+ if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+ return (error);
+ goto restart;
+ }
VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
NDFREE(&nd, NDF_ONLY_PNBUF);
vput(nd.ni_dvp);
+ vn_finished_write(mp);
ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
return (error);
@@ -1391,18 +1428,17 @@ unlink(p, uap)
syscallarg(char *) path;
} */ *uap;
{
- register struct vnode *vp;
+ struct mount *mp;
+ struct vnode *vp;
int error;
struct nameidata nd;
+restart:
bwillwrite();
NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
if ((error = namei(&nd)) != 0)
return (error);
vp = nd.ni_vp;
- VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
-
if (vp->v_type == VDIR)
error = EPERM; /* POSIX */
else {
@@ -1414,18 +1450,24 @@ unlink(p, uap)
if (vp->v_flag & VROOT)
error = EBUSY;
}
-
+ if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ vrele(vp);
+ vput(nd.ni_dvp);
+ if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+ return (error);
+ goto restart;
+ }
+ VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
if (!error) {
VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
}
NDFREE(&nd, NDF_ONLY_PNBUF);
- if (nd.ni_dvp == vp)
- vrele(nd.ni_dvp);
- else
- vput(nd.ni_dvp);
- if (vp != NULLVP)
- vput(vp);
+ vput(nd.ni_dvp);
+ vput(vp);
+ vn_finished_write(mp);
ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
return (error);
@@ -1936,6 +1978,7 @@ setfflags(p, vp, flags)
int flags;
{
int error;
+ struct mount *mp;
struct vattr vattr;
/*
@@ -1948,12 +1991,15 @@ setfflags(p, vp, flags)
((error = suser_xxx(p->p_ucred, p, PRISON_ROOT)) != 0))
return (error);
+ if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+ return (error);
VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
VATTR_NULL(&vattr);
vattr.va_flags = flags;
error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
VOP_UNLOCK(vp, 0, p);
+ vn_finished_write(mp);
return (error);
}
@@ -2020,14 +2066,18 @@ setfmode(p, vp, mode)
int mode;
{
int error;
+ struct mount *mp;
struct vattr vattr;
+ if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+ return (error);
VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
VATTR_NULL(&vattr);
vattr.va_mode = mode & ALLPERMS;
error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
VOP_UNLOCK(vp, 0, p);
+ vn_finished_write(mp);
return error;
}
@@ -2125,8 +2175,11 @@ setfown(p, vp, uid, gid)
gid_t gid;
{
int error;
+ struct mount *mp;
struct vattr vattr;
+ if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+ return (error);
VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
VATTR_NULL(&vattr);
@@ -2134,6 +2187,7 @@ setfown(p, vp, uid, gid)
vattr.va_gid = gid;
error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
VOP_UNLOCK(vp, 0, p);
+ vn_finished_write(mp);
return error;
}
@@ -2259,8 +2313,11 @@ setutimes(p, vp, ts, nullflag)
int nullflag;
{
int error;
+ struct mount *mp;
struct vattr vattr;
+ if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+ return (error);
VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
VATTR_NULL(&vattr);
@@ -2270,6 +2327,7 @@ setutimes(p, vp, ts, nullflag)
vattr.va_vaflags |= VA_UTIMES_NULL;
error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
VOP_UNLOCK(vp, 0, p);
+ vn_finished_write(mp);
return error;
}
@@ -2394,7 +2452,8 @@ truncate(p, uap)
syscallarg(off_t) length;
} */ *uap;
{
- register struct vnode *vp;
+ struct mount *mp;
+ struct vnode *vp;
struct vattr vattr;
int error;
struct nameidata nd;
@@ -2405,6 +2464,10 @@ truncate(p, uap)
if ((error = namei(&nd)) != 0)
return (error);
vp = nd.ni_vp;
+ if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
+ vrele(vp);
+ return (error);
+ }
NDFREE(&nd, NDF_ONLY_PNBUF);
VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
@@ -2417,6 +2480,7 @@ truncate(p, uap)
error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
}
vput(vp);
+ vn_finished_write(mp);
return (error);
}
@@ -2440,6 +2504,7 @@ ftruncate(p, uap)
syscallarg(off_t) length;
} */ *uap;
{
+ struct mount *mp;
struct vattr vattr;
struct vnode *vp;
struct file *fp;
@@ -2452,6 +2517,8 @@ ftruncate(p, uap)
if ((fp->f_flag & FWRITE) == 0)
return (EINVAL);
vp = (struct vnode *)fp->f_data;
+ if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+ return (error);
VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
if (vp->v_type == VDIR)
@@ -2462,6 +2529,7 @@ ftruncate(p, uap)
error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
}
VOP_UNLOCK(vp, 0, p);
+ vn_finished_write(mp);
return (error);
}
@@ -2541,13 +2609,16 @@ fsync(p, uap)
syscallarg(int) fd;
} */ *uap;
{
- register struct vnode *vp;
+ struct vnode *vp;
+ struct mount *mp;
struct file *fp;
int error;
if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0)
return (error);
vp = (struct vnode *)fp->f_data;
+ if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+ return (error);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
if (vp->v_object)
vm_object_page_clean(vp->v_object, 0, 0, 0);
@@ -2558,6 +2629,7 @@ fsync(p, uap)
#endif
VOP_UNLOCK(vp, 0, p);
+ vn_finished_write(mp);
return (error);
}
@@ -2580,7 +2652,8 @@ rename(p, uap)
syscallarg(char *) to;
} */ *uap;
{
- register struct vnode *tvp, *fvp, *tdvp;
+ struct mount *mp;
+ struct vnode *tvp, *fvp, *tdvp;
struct nameidata fromnd, tond;
int error;
@@ -2590,6 +2663,12 @@ rename(p, uap)
if ((error = namei(&fromnd)) != 0)
return (error);
fvp = fromnd.ni_vp;
+ if ((error = vn_start_write(fvp, &mp, V_WAIT | PCATCH)) != 0) {
+ NDFREE(&fromnd, NDF_ONLY_PNBUF);
+ vrele(fromnd.ni_dvp);
+ vrele(fvp);
+ goto out1;
+ }
NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ,
UIO_USERSPACE, SCARG(uap, to), p);
if (fromnd.ni_vp->v_type == VDIR)
@@ -2652,6 +2731,7 @@ out:
vrele(fvp);
}
vrele(tond.ni_startdir);
+ vn_finished_write(mp);
ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
@@ -2682,11 +2762,13 @@ mkdir(p, uap)
syscallarg(int) mode;
} */ *uap;
{
- register struct vnode *vp;
+ struct mount *mp;
+ struct vnode *vp;
struct vattr vattr;
int error;
struct nameidata nd;
+restart:
bwillwrite();
NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
nd.ni_cnd.cn_flags |= WILLBEDIR;
@@ -2695,13 +2777,17 @@ mkdir(p, uap)
vp = nd.ni_vp;
if (vp != NULL) {
NDFREE(&nd, NDF_ONLY_PNBUF);
- if (nd.ni_dvp == vp)
- vrele(nd.ni_dvp);
- else
- vput(nd.ni_dvp);
vrele(vp);
+ vput(nd.ni_dvp);
return (EEXIST);
}
+ if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ vput(nd.ni_dvp);
+ if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+ return (error);
+ goto restart;
+ }
VATTR_NULL(&vattr);
vattr.va_type = VDIR;
vattr.va_mode = (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_fd->fd_cmask;
@@ -2711,6 +2797,7 @@ mkdir(p, uap)
vput(nd.ni_dvp);
if (!error)
vput(nd.ni_vp);
+ vn_finished_write(mp);
ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
return (error);
@@ -2732,10 +2819,12 @@ rmdir(p, uap)
syscallarg(char *) path;
} */ *uap;
{
- register struct vnode *vp;
+ struct mount *mp;
+ struct vnode *vp;
int error;
struct nameidata nd;
+restart:
bwillwrite();
NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
SCARG(uap, path), p);
@@ -2756,21 +2845,32 @@ rmdir(p, uap)
/*
* The root of a mounted filesystem cannot be deleted.
*/
- if (vp->v_flag & VROOT)
+ if (vp->v_flag & VROOT) {
error = EBUSY;
- else {
- VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
- VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
- error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+ goto out;
}
+ if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vput(vp);
+ if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
+ return (error);
+ goto restart;
+ }
+ VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+ vn_finished_write(mp);
out:
NDFREE(&nd, NDF_ONLY_PNBUF);
if (nd.ni_dvp == vp)
vrele(nd.ni_dvp);
else
vput(nd.ni_dvp);
- if (vp != NULLVP)
- vput(vp);
+ vput(vp);
ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
return (error);
@@ -3049,7 +3149,8 @@ revoke(p, uap)
syscallarg(char *) path;
} */ *uap;
{
- register struct vnode *vp;
+ struct mount *mp;
+ struct vnode *vp;
struct vattr vattr;
int error;
struct nameidata nd;
@@ -3068,8 +3169,11 @@ revoke(p, uap)
if (p->p_ucred->cr_uid != vattr.va_uid &&
(error = suser_xxx(0, p, PRISON_ROOT)))
goto out;
+ if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+ goto out;
if (vcount(vp) > 1)
VOP_REVOKE(vp, REVOKEALL);
+ vn_finished_write(mp);
out:
vrele(vp);
return (error);
@@ -3228,11 +3332,16 @@ fhopen(p, uap)
}
if (fmode & O_TRUNC) {
VOP_UNLOCK(vp, 0, p); /* XXX */
+ if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) {
+ vrele(vp);
+ return (error);
+ }
VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */
VATTR_NULL(vap);
vap->va_size = 0;
error = VOP_SETATTR(vp, vap, p->p_ucred, p);
+ vn_finished_write(mp);
if (error)
goto bad;
}
@@ -3407,10 +3516,15 @@ extattrctl(p, uap)
NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
if ((error = namei(&nd)) != 0)
return (error);
- mp = nd.ni_vp->v_mount;
+ error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH);
NDFREE(&nd, 0);
- return (VFS_EXTATTRCTL(mp, SCARG(uap, cmd), SCARG(uap, attrname),
- SCARG(uap, arg), p));
+ vrele(nd.ni_vp);
+ if (error)
+ return (error);
+ error = VFS_EXTATTRCTL(mp, SCARG(uap, cmd), SCARG(uap, attrname),
+ SCARG(uap, arg), p);
+ vn_finished_write(mp);
+ return (error);
}
/*
@@ -3425,6 +3539,7 @@ extattr_set_file(p, uap)
struct extattr_set_file_args *uap;
{
struct nameidata nd;
+ struct mount *mp;
struct uio auio;
struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV];
char attrname[EXTATTR_MAXNAMELEN];
@@ -3434,10 +3549,11 @@ extattr_set_file(p, uap)
error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN);
if (error)
return (error);
- NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW, UIO_USERSPACE, SCARG(uap, path),
- p);
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
if ((error = namei(&nd)) != 0)
return(error);
+ if ((error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH)) != 0)
+ goto done;
iovlen = uap->iovcnt * sizeof(struct iovec);
if (uap->iovcnt > UIO_SMALLIOV) {
if (uap->iovcnt > UIO_MAXIOV) {
@@ -3477,6 +3593,8 @@ done:
if (needfree)
FREE(needfree, M_IOV);
NDFREE(&nd, 0);
+ vrele(nd.ni_vp);
+ vn_finished_write(mp);
return (error);
}
@@ -3508,6 +3626,7 @@ extattr_get_file(p, uap)
if (uap->iovcnt > UIO_SMALLIOV) {
if (uap->iovcnt > UIO_MAXIOV) {
NDFREE(&nd, 0);
+ vrele(nd.ni_vp);
return (EINVAL);
}
MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
@@ -3545,6 +3664,7 @@ done:
if (needfree)
FREE(needfree, M_IOV);
NDFREE(&nd, 0);
+ vrele(nd.ni_vp);
return(error);
}
@@ -3557,6 +3677,7 @@ extattr_delete_file(p, uap)
struct proc *p;
struct extattr_delete_file_args *uap;
{
+ struct mount *mp;
struct nameidata nd;
char attrname[EXTATTR_MAXNAMELEN];
int error;
@@ -3564,12 +3685,17 @@ extattr_delete_file(p, uap)
error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN);
if (error)
return(error);
- NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW, UIO_USERSPACE, SCARG(uap, path),
- p);
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
if ((error = namei(&nd)) != 0)
return(error);
+ if ((error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH)) != 0) {
+ vrele(nd.ni_vp);
+ return (error);
+ }
error = VOP_SETEXTATTR(nd.ni_vp, attrname, NULL, p->p_cred->pc_ucred,
p);
NDFREE(&nd, 0);
+ vrele(nd.ni_vp);
+ vn_finished_write(mp);
return(error);
}
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index 0d0dc24..0708f7c 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -103,12 +103,14 @@ vn_open(ndp, flagp, cmode)
int *flagp, cmode;
{
struct vnode *vp;
+ struct mount *mp;
struct proc *p = ndp->ni_cnd.cn_proc;
struct ucred *cred = p->p_ucred;
struct vattr vat;
struct vattr *vap = &vat;
int mode, fmode, error;
+restart:
fmode = *flagp;
if (fmode & O_CREAT) {
ndp->ni_cnd.cn_nameiop = CREATE;
@@ -124,10 +126,19 @@ vn_open(ndp, flagp, cmode)
vap->va_mode = cmode;
if (fmode & O_EXCL)
vap->va_vaflags |= VA_EXCLUSIVE;
+ if (vn_start_write(ndp->ni_dvp, &mp, V_NOWAIT) != 0) {
+ NDFREE(ndp, NDF_ONLY_PNBUF);
+ vput(ndp->ni_dvp);
+ if ((error = vn_start_write(NULL, &mp,
+ V_XSLEEP | PCATCH)) != 0)
+ return (error);
+ goto restart;
+ }
VOP_LEASE(ndp->ni_dvp, p, cred, LEASE_WRITE);
error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
&ndp->ni_cnd, vap);
vput(ndp->ni_dvp);
+ vn_finished_write(mp);
if (error) {
NDFREE(ndp, NDF_ONLY_PNBUF);
return (error);
@@ -293,10 +304,17 @@ vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p)
{
struct uio auio;
struct iovec aiov;
+ struct mount *mp;
int error;
- if ((ioflg & IO_NODELOCKED) == 0)
+ if ((ioflg & IO_NODELOCKED) == 0) {
+ mp = NULL;
+ if (rw == UIO_WRITE &&
+ vp->v_type != VCHR && vp->v_type != VBLK &&
+ (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+ return (error);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+ }
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
aiov.iov_base = base;
@@ -316,8 +334,10 @@ vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p)
else
if (auio.uio_resid && error == 0)
error = EIO;
- if ((ioflg & IO_NODELOCKED) == 0)
+ if ((ioflg & IO_NODELOCKED) == 0) {
+ vn_finished_write(mp);
VOP_UNLOCK(vp, 0, p);
+ }
return (error);
}
@@ -368,6 +388,7 @@ vn_write(fp, uio, cred, flags, p)
int flags;
{
struct vnode *vp;
+ struct mount *mp;
int error, ioflag;
KASSERT(uio->uio_procp == p, ("uio_procp %p is not p %p",
@@ -384,6 +405,10 @@ vn_write(fp, uio, cred, flags, p)
if ((fp->f_flag & O_FSYNC) ||
(vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
ioflag |= IO_SYNC;
+ mp = NULL;
+ if (vp->v_type != VCHR && vp->v_type != VBLK &&
+ (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+ return (error);
VOP_LEASE(vp, p, cred, LEASE_WRITE);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
if ((flags & FOF_OFFSET) == 0)
@@ -394,6 +419,7 @@ vn_write(fp, uio, cred, flags, p)
fp->f_offset = uio->uio_offset;
fp->f_nextoff = uio->uio_offset;
VOP_UNLOCK(vp, 0, p);
+ vn_finished_write(mp);
return (error);
}
@@ -649,6 +675,140 @@ vn_closefile(fp, p)
fp->f_cred, p));
}
+/*
+ * Preparing to start a filesystem write operation. If the operation is
+ * permitted, then we bump the count of operations in progress and
+ * proceed. If a suspend request is in progress, we wait until the
+ * suspension is over, and then proceed.
+ */
+int
+vn_start_write(vp, mpp, flags)
+ struct vnode *vp;
+ struct mount **mpp;
+ int flags;
+{
+ struct mount *mp;
+ int error;
+
+ /*
+ * If a vnode is provided, get and return the mount point that
+ * to which it will write.
+ */
+ if (vp != NULL) {
+ if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) {
+ *mpp = NULL;
+ if (error != EOPNOTSUPP)
+ return (error);
+ return (0);
+ }
+ }
+ if ((mp = *mpp) == NULL)
+ return (0);
+ /*
+ * Check on status of suspension.
+ */
+ while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
+ if (flags & V_NOWAIT)
+ return (EWOULDBLOCK);
+ error = tsleep(&mp->mnt_flag, (PUSER - 1) | (flags & PCATCH),
+ "suspfs", 0);
+ if (error)
+ return (error);
+ }
+ if (flags & V_XSLEEP)
+ return (0);
+ mp->mnt_writeopcount++;
+ return (0);
+}
+
+/*
+ * Secondary suspension. Used by operations such as vop_inactive
+ * routines that are needed by the higher level functions. These
+ * are allowed to proceed until all the higher level functions have
+ * completed (indicated by mnt_writeopcount dropping to zero). At that
+ * time, these operations are halted until the suspension is over.
+ */
+int
+vn_write_suspend_wait(vp, flags)
+ struct vnode *vp;
+ int flags;
+{
+ struct mount *mp;
+ int error;
+
+ if ((error = VOP_GETWRITEMOUNT(vp, &mp)) != 0) {
+ if (error != EOPNOTSUPP)
+ return (error);
+ return (0);
+ }
+ /*
+ * If we are not suspended or have not yet reached suspended
+ * mode, then let the operation proceed.
+ */
+ if (mp == NULL || (mp->mnt_kern_flag & MNTK_SUSPENDED) == 0)
+ return (0);
+ if (flags & V_NOWAIT)
+ return (EWOULDBLOCK);
+ /*
+ * Wait for the suspension to finish.
+ */
+ return (tsleep(&mp->mnt_flag, (PUSER - 1) | (flags & PCATCH),
+ "suspfs", 0));
+}
+
+/*
+ * Filesystem write operation has completed. If we are suspending and this
+ * operation is the last one, notify the suspender that the suspension is
+ * now in effect.
+ */
+void
+vn_finished_write(mp)
+ struct mount *mp;
+{
+
+ if (mp == NULL)
+ return;
+ mp->mnt_writeopcount--;
+ if (mp->mnt_writeopcount < 0)
+ panic("vn_finished_write: neg cnt");
+ if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 &&
+ mp->mnt_writeopcount <= 0)
+ wakeup(&mp->mnt_writeopcount);
+}
+
+/*
+ * Request a filesystem to suspend write operations.
+ */
+void
+vfs_write_suspend(mp)
+ struct mount *mp;
+{
+ struct proc *p = curproc;
+
+ if (mp->mnt_kern_flag & MNTK_SUSPEND)
+ return;
+ mp->mnt_kern_flag |= MNTK_SUSPEND;
+ if (mp->mnt_writeopcount > 0)
+ (void) tsleep(&mp->mnt_writeopcount, PUSER - 1, "suspwt", 0);
+ VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p);
+ mp->mnt_kern_flag |= MNTK_SUSPENDED;
+}
+
+/*
+ * Request a filesystem to resume write operations.
+ */
+void
+vfs_write_resume(mp)
+ struct mount *mp;
+{
+
+ if ((mp->mnt_kern_flag & MNTK_SUSPEND) == 0)
+ return;
+ mp->mnt_kern_flag &= ~(MNTK_SUSPEND | MNTK_SUSPENDED);
+ wakeup(&mp->mnt_writeopcount);
+ wakeup(&mp->mnt_flag);
+}
+
static int
filt_vnattach(struct knote *kn)
{
diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src
index 479cc92..bda7e98 100644
--- a/sys/kern/vnode_if.src
+++ b/sys/kern/vnode_if.src
@@ -394,6 +394,22 @@ vop_strategy {
};
#
+#% getwritemount vp = = =
+#
+vop_getwritemount {
+ IN struct vnode *vp;
+ OUT struct mount **mpp;
+};
+
+#
+#% copyonwrite vp L L L
+#
+vop_copyonwrite {
+ IN struct vnode *vp;
+ IN struct buf *bp;
+};
+
+#
#% print vp = = =
#
vop_print {
diff --git a/sys/miscfs/fdesc/fdesc_vnops.c b/sys/miscfs/fdesc/fdesc_vnops.c
index 772a94c..72c7cae 100644
--- a/sys/miscfs/fdesc/fdesc_vnops.c
+++ b/sys/miscfs/fdesc/fdesc_vnops.c
@@ -383,6 +383,8 @@ fdesc_setattr(ap)
{
struct filedesc *fdp = ap->a_p->p_fd;
struct vattr *vap = ap->a_vap;
+ struct vnode *vp;
+ struct mount *mp;
struct file *fp;
unsigned fd;
int error;
@@ -403,8 +405,11 @@ fdesc_setattr(ap)
switch (fp->f_type) {
case DTYPE_FIFO:
case DTYPE_VNODE:
- error = VOP_SETATTR((struct vnode *) fp->f_data, ap->a_vap,
- ap->a_cred, ap->a_p);
+ vp = (struct vnode *)fp->f_data;
+ if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+ return (error);
+ error = VOP_SETATTR(vp, ap->a_vap, ap->a_cred, ap->a_p);
+ vn_finished_write(mp);
break;
default:
diff --git a/sys/miscfs/fifofs/fifo_vnops.c b/sys/miscfs/fifofs/fifo_vnops.c
index 5bd13a7..03e3e37 100644
--- a/sys/miscfs/fifofs/fifo_vnops.c
+++ b/sys/miscfs/fifofs/fifo_vnops.c
@@ -107,6 +107,7 @@ static struct vnodeopv_entry_desc fifo_vnodeop_entries[] = {
{ &vop_open_desc, (vop_t *) fifo_open },
{ &vop_pathconf_desc, (vop_t *) fifo_pathconf },
{ &vop_poll_desc, (vop_t *) fifo_poll },
+ { &vop_getwritemount_desc, (vop_t *) vop_stdgetwritemount },
{ &vop_print_desc, (vop_t *) fifo_print },
{ &vop_read_desc, (vop_t *) fifo_read },
{ &vop_readdir_desc, (vop_t *) fifo_badop },
diff --git a/sys/miscfs/specfs/spec_vnops.c b/sys/miscfs/specfs/spec_vnops.c
index cbe52f4..baf40c3 100644
--- a/sys/miscfs/specfs/spec_vnops.c
+++ b/sys/miscfs/specfs/spec_vnops.c
@@ -88,6 +88,7 @@ static struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
{ &vop_open_desc, (vop_t *) spec_open },
{ &vop_pathconf_desc, (vop_t *) vop_stdpathconf },
{ &vop_poll_desc, (vop_t *) spec_poll },
+ { &vop_getwritemount_desc, (vop_t *) vop_stdgetwritemount },
{ &vop_print_desc, (vop_t *) spec_print },
{ &vop_read_desc, (vop_t *) spec_read },
{ &vop_readdir_desc, (vop_t *) vop_panic },
@@ -415,16 +416,29 @@ spec_strategy(ap)
struct buf *bp;
struct vnode *vp;
struct mount *mp;
+ int error;
bp = ap->a_bp;
- if ((bp->b_iocmd == BIO_WRITE) && (LIST_FIRST(&bp->b_dep)) != NULL)
- buf_start(bp);
-
+ vp = ap->a_vp;
+ if ((bp->b_iocmd == BIO_WRITE)) {
+ if (vp->v_mount != NULL &&
+ (vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0)
+ panic("spec_strategy: bad I/O");
+ if (LIST_FIRST(&bp->b_dep) != NULL)
+ buf_start(bp);
+ if ((vp->v_flag & VCOPYONWRITE) &&
+ (error = VOP_COPYONWRITE(vp, bp)) != 0 &&
+ error != EOPNOTSUPP) {
+ bp->b_io.bio_error = error;
+ bp->b_io.bio_flags |= BIO_ERROR;
+ biodone(&bp->b_io);
+ return (0);
+ }
+ }
/*
* Collect statistics on synchronous and asynchronous read
* and write counts for disks that have associated filesystems.
*/
- vp = ap->a_vp;
if (vn_isdisk(vp, NULL) && (mp = vp->v_specmountpoint) != NULL) {
if (bp->b_iocmd == BIO_WRITE) {
if (bp->b_lock.lk_lockholder == LK_KERNPROC)
diff --git a/sys/miscfs/union/union_subr.c b/sys/miscfs/union/union_subr.c
index 6b88bef..d1d6e31 100644
--- a/sys/miscfs/union/union_subr.c
+++ b/sys/miscfs/union/union_subr.c
@@ -747,6 +747,7 @@ union_copyup(un, docopy, cred, p)
struct proc *p;
{
int error;
+ struct mount *mp;
struct vnode *lvp, *uvp;
/*
@@ -759,9 +760,12 @@ union_copyup(un, docopy, cred, p)
if (error)
return (error);
- error = union_vn_create(&uvp, un, p);
- if (error)
+ if ((error = vn_start_write(un->un_dirvp, &mp, V_WAIT | PCATCH)) != 0)
return (error);
+ if ((error = union_vn_create(&uvp, un, p)) != 0) {
+ vn_finished_write(mp);
+ return (error);
+ }
lvp = un->un_lowervp;
@@ -785,6 +789,7 @@ union_copyup(un, docopy, cred, p)
}
VOP_UNLOCK(uvp, 0, p);
+ vn_finished_write(mp);
union_newupper(un, uvp);
KASSERT(uvp->v_usecount > 0, ("copy: uvp refcount 0: %d", uvp->v_usecount));
union_vn_close(uvp, FWRITE, cred, p);
@@ -910,11 +915,15 @@ union_mkshadow(um, dvp, cnp, vpp)
struct vattr va;
struct proc *p = cnp->cn_proc;
struct componentname cn;
+ struct mount *mp;
- error = union_relookup(um, dvp, vpp, cnp, &cn,
- cnp->cn_nameptr, cnp->cn_namelen);
- if (error)
+ if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)) != 0)
+ return (error);
+ if ((error = union_relookup(um, dvp, vpp, cnp, &cn,
+ cnp->cn_nameptr, cnp->cn_namelen)) != 0) {
+ vn_finished_write(mp);
return (error);
+ }
if (*vpp) {
if (cn.cn_flags & HASBUF) {
@@ -925,6 +934,7 @@ union_mkshadow(um, dvp, cnp, vpp)
vrele(*vpp);
else
vput(*vpp);
+ vn_finished_write(mp);
*vpp = NULLVP;
return (EEXIST);
}
@@ -950,6 +960,7 @@ union_mkshadow(um, dvp, cnp, vpp)
cn.cn_flags &= ~HASBUF;
}
/*vput(dvp);*/
+ vn_finished_write(mp);
return (error);
}
@@ -973,10 +984,15 @@ union_mkwhiteout(um, dvp, cnp, path)
struct proc *p = cnp->cn_proc;
struct vnode *wvp;
struct componentname cn;
+ struct mount *mp;
+ if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)) != 0)
+ return (error);
error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path));
- if (error)
+ if (error) {
+ vn_finished_write(mp);
return (error);
+ }
if (wvp) {
if (cn.cn_flags & HASBUF) {
@@ -987,6 +1003,7 @@ union_mkwhiteout(um, dvp, cnp, path)
vrele(wvp);
else
vput(wvp);
+ vn_finished_write(mp);
return (EEXIST);
}
@@ -998,6 +1015,7 @@ union_mkwhiteout(um, dvp, cnp, path)
zfree(namei_zone, cn.cn_pnbuf);
cn.cn_flags &= ~HASBUF;
}
+ vn_finished_write(mp);
return (error);
}
diff --git a/sys/miscfs/union/union_vnops.c b/sys/miscfs/union/union_vnops.c
index 1c5ed5d..d7b95f3 100644
--- a/sys/miscfs/union/union_vnops.c
+++ b/sys/miscfs/union/union_vnops.c
@@ -93,6 +93,7 @@ static int union_print __P((struct vop_print_args *ap));
static int union_read __P((struct vop_read_args *ap));
static int union_readdir __P((struct vop_readdir_args *ap));
static int union_readlink __P((struct vop_readlink_args *ap));
+static int union_getwritemount __P((struct vop_getwritemount_args *ap));
static int union_reclaim __P((struct vop_reclaim_args *ap));
static int union_remove __P((struct vop_remove_args *ap));
static int union_rename __P((struct vop_rename_args *ap));
@@ -1681,6 +1682,20 @@ union_readlink(ap)
return (error);
}
+static int
+union_getwritemount(ap)
+ struct vop_getwritemount_args /* {
+ struct vnode *a_vp;
+ struct mount **a_mpp;
+ } */ *ap;
+{
+ struct vnode *vp = UPPERVP(ap->a_vp);
+
+ if (vp == NULL)
+ panic("union: missing upper layer in getwritemount");
+ return(VOP_GETWRITEMOUNT(vp, ap->a_mpp));
+}
+
/*
* union_inactive:
*
@@ -1963,6 +1978,7 @@ static struct vnodeopv_entry_desc union_vnodeop_entries[] = {
{ &vop_read_desc, (vop_t *) union_read },
{ &vop_readdir_desc, (vop_t *) union_readdir },
{ &vop_readlink_desc, (vop_t *) union_readlink },
+ { &vop_getwritemount_desc, (vop_t *) union_getwritemount },
{ &vop_reclaim_desc, (vop_t *) union_reclaim },
{ &vop_remove_desc, (vop_t *) union_remove },
{ &vop_rename_desc, (vop_t *) union_rename },
diff --git a/sys/nfs/nfs_serv.c b/sys/nfs/nfs_serv.c
index 06ce9ed..0334f74 100644
--- a/sys/nfs/nfs_serv.c
+++ b/sys/nfs/nfs_serv.c
@@ -325,10 +325,18 @@ nfsrv_setattr(nfsd, slp, procp, mrq)
struct mbuf *mb, *mb2, *mreq;
u_quad_t frev;
struct timespec guard;
+ struct mount *mp = NULL;
nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
fhp = &nfh.fh_generic;
nfsm_srvmtofh(fhp);
+ if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+ return (ESTALE);
+ if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+ return (error);
+ (void) vn_start_write(vp, &mp, V_WAIT);
+ vput(vp);
+ vp = NULL;
VATTR_NULL(vap);
if (v3) {
nfsm_srvsattr(vap);
@@ -440,6 +448,7 @@ out:
nfsmout:
if (vp)
vput(vp);
+ vn_finished_write(mp);
return(error);
}
@@ -1039,6 +1048,7 @@ nfsrv_write(nfsd, slp, procp, mrq)
struct uio io, *uiop = &io;
off_t off;
u_quad_t frev;
+ struct mount *mntp = NULL;
nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
if (mrep == NULL) {
@@ -1048,6 +1058,13 @@ nfsrv_write(nfsd, slp, procp, mrq)
}
fhp = &nfh.fh_generic;
nfsm_srvmtofh(fhp);
+ if ((mntp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+ return (ESTALE);
+ if ((error = VFS_FHTOVP(mntp, &fhp->fh_fid, &vp)) != NULL)
+ return (error);
+ (void) vn_start_write(vp, &mntp, V_WAIT);
+ vput(vp);
+ vp = NULL;
if (v3) {
nfsm_dissect(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
off = fxdr_hyper(tl);
@@ -1205,6 +1222,7 @@ nfsrv_write(nfsd, slp, procp, mrq)
nfsmout:
if (vp)
vput(vp);
+ vn_finished_write(mntp);
return(error);
}
@@ -1241,6 +1259,7 @@ nfsrv_writegather(ndp, slp, procp, mrq)
struct vnode *vp = NULL;
struct uio io, *uiop = &io;
u_quad_t frev, cur_usec;
+ struct mount *mntp = NULL;
nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
#ifndef nolint
@@ -1444,8 +1463,16 @@ loop1:
mp = mp->m_next;
}
if (!error) {
+ if (vn_start_write(vp, &mntp, V_NOWAIT) != 0) {
+ VOP_UNLOCK(vp, 0, procp);
+ error = vn_start_write(NULL, &mntp, V_WAIT);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, procp);
+ }
+ }
+ if (!error) {
error = VOP_WRITE(vp, uiop, ioflags, cred);
nfsstats.srvvop_writes++;
+ vn_finished_write(mntp);
}
FREE((caddr_t)iov, M_TEMP);
}
@@ -1620,6 +1647,8 @@ nfsrv_create(nfsd, slp, procp, mrq)
fhandle_t *fhp;
u_quad_t frev, tempsize;
u_char cverf[NFSX_V3CREATEVERF];
+ struct mount *mp = NULL;
+ struct vnode *vp;
nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
#ifndef nolint
@@ -1629,6 +1658,12 @@ nfsrv_create(nfsd, slp, procp, mrq)
fhp = &nfh.fh_generic;
nfsm_srvmtofh(fhp);
+ if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+ return (ESTALE);
+ if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+ return (error);
+ (void) vn_start_write(vp, &mp, V_WAIT);
+ vput(vp);
nfsm_srvnamesiz(len);
nd.ni_cnd.cn_cred = cred;
@@ -1869,6 +1904,7 @@ nfsmout:
}
if (nd.ni_vp)
vput(nd.ni_vp);
+ vn_finished_write(mp);
return (error);
}
@@ -1901,12 +1937,20 @@ nfsrv_mknod(nfsd, slp, procp, mrq)
nfsfh_t nfh;
fhandle_t *fhp;
u_quad_t frev;
+ struct mount *mp = NULL;
nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
ndclear(&nd);
fhp = &nfh.fh_generic;
nfsm_srvmtofh(fhp);
+ if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+ return (ESTALE);
+ if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+ return (error);
+ (void) vn_start_write(vp, &mp, V_WAIT);
+ vput(vp);
+ vp = NULL;
nfsm_srvnamesiz(len);
nd.ni_cnd.cn_cred = cred;
@@ -2030,6 +2074,7 @@ out:
nfsm_srvpostop_attr(0, vap);
}
nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
+ vn_finished_write(mp);
return (0);
nfsmout:
if (dirp)
@@ -2045,6 +2090,7 @@ nfsmout:
}
if (nd.ni_vp)
vput(nd.ni_vp);
+ vn_finished_write(mp);
return (error);
}
@@ -2075,12 +2121,21 @@ nfsrv_remove(nfsd, slp, procp, mrq)
nfsfh_t nfh;
fhandle_t *fhp;
u_quad_t frev;
+ struct mount *mp = NULL;
+ struct vnode *vp;
nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
ndclear(&nd);
fhp = &nfh.fh_generic;
nfsm_srvmtofh(fhp);
+ if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+ return (ESTALE);
+ if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+ return (error);
+ (void) vn_start_write(vp, &mp, V_WAIT);
+ vput(vp);
+ vp = NULL;
nfsm_srvnamesiz(len);
nd.ni_cnd.cn_cred = cred;
@@ -2137,6 +2192,7 @@ nfsmout:
}
if (nd.ni_vp)
vput(nd.ni_vp);
+ vn_finished_write(mp);
return(error);
}
@@ -2170,6 +2226,8 @@ nfsrv_rename(nfsd, slp, procp, mrq)
fhandle_t *ffhp, *tfhp;
u_quad_t frev;
uid_t saved_uid;
+ struct mount *mp = NULL;
+ struct vnode *vp;
nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
#ifndef nolint
@@ -2186,6 +2244,13 @@ nfsrv_rename(nfsd, slp, procp, mrq)
ndclear(&tond);
nfsm_srvmtofh(ffhp);
+ if ((mp = vfs_getvfs(&ffhp->fh_fsid)) == NULL)
+ return (ESTALE);
+ if ((error = VFS_FHTOVP(mp, &ffhp->fh_fid, &vp)) != NULL)
+ return (error);
+ (void) vn_start_write(vp, &mp, V_WAIT);
+ vput(vp);
+ vp = NULL;
nfsm_srvnamesiz(len);
/*
* Remember our original uid so that we can reset cr_uid before
@@ -2360,6 +2425,7 @@ nfsmout:
if (fromnd.ni_vp)
vrele(fromnd.ni_vp);
+ vn_finished_write(mp);
return (error);
}
@@ -2390,6 +2456,7 @@ nfsrv_link(nfsd, slp, procp, mrq)
nfsfh_t nfh, dnfh;
fhandle_t *fhp, *dfhp;
u_quad_t frev;
+ struct mount *mp = NULL;
nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
ndclear(&nd);
@@ -2397,6 +2464,13 @@ nfsrv_link(nfsd, slp, procp, mrq)
fhp = &nfh.fh_generic;
dfhp = &dnfh.fh_generic;
nfsm_srvmtofh(fhp);
+ if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+ return (ESTALE);
+ if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+ return (error);
+ (void) vn_start_write(vp, &mp, V_WAIT);
+ vput(vp);
+ vp = NULL;
nfsm_srvmtofh(dfhp);
nfsm_srvnamesiz(len);
@@ -2475,6 +2549,7 @@ nfsmout:
}
if (nd.ni_vp)
vrele(nd.ni_vp);
+ vn_finished_write(mp);
return(error);
}
@@ -2508,12 +2583,21 @@ nfsrv_symlink(nfsd, slp, procp, mrq)
nfsfh_t nfh;
fhandle_t *fhp;
u_quad_t frev;
+ struct mount *mp = NULL;
+ struct vnode *vp;
nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
ndclear(&nd);
fhp = &nfh.fh_generic;
nfsm_srvmtofh(fhp);
+ if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+ return (ESTALE);
+ if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+ return (error);
+ (void) vn_start_write(vp, &mp, V_WAIT);
+ vput(vp);
+ vp = NULL;
nfsm_srvnamesiz(len);
nd.ni_cnd.cn_cred = cred;
nd.ni_cnd.cn_nameiop = CREATE;
@@ -2651,6 +2735,7 @@ nfsmout:
if (pathcp)
FREE(pathcp, M_TEMP);
+ vn_finished_write(mp);
return (error);
}
@@ -2685,12 +2770,21 @@ nfsrv_mkdir(nfsd, slp, procp, mrq)
nfsfh_t nfh;
fhandle_t *fhp;
u_quad_t frev;
+ struct mount *mp = NULL;
+ struct vnode *vp;
nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
ndclear(&nd);
fhp = &nfh.fh_generic;
nfsm_srvmtofh(fhp);
+ if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+ return (ESTALE);
+ if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+ return (error);
+ (void) vn_start_write(vp, &mp, V_WAIT);
+ vput(vp);
+ vp = NULL;
nfsm_srvnamesiz(len);
nd.ni_cnd.cn_cred = cred;
nd.ni_cnd.cn_nameiop = CREATE;
@@ -2787,6 +2881,7 @@ nfsmout:
else
vrele(nd.ni_vp);
}
+ vn_finished_write(mp);
return (error);
}
@@ -2817,12 +2912,20 @@ nfsrv_rmdir(nfsd, slp, procp, mrq)
fhandle_t *fhp;
struct nameidata nd;
u_quad_t frev;
+ struct mount *mp = NULL;
nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
ndclear(&nd);
fhp = &nfh.fh_generic;
nfsm_srvmtofh(fhp);
+ if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+ return (ESTALE);
+ if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+ return (error);
+ (void) vn_start_write(vp, &mp, V_WAIT);
+ vput(vp);
+ vp = NULL;
nfsm_srvnamesiz(len);
nd.ni_cnd.cn_cred = cred;
nd.ni_cnd.cn_nameiop = DELETE;
@@ -2895,6 +2998,7 @@ nfsmout:
if (nd.ni_vp)
vput(nd.ni_vp);
+ vn_finished_write(mp);
return(error);
}
@@ -3588,6 +3692,7 @@ nfsrv_commit(nfsd, slp, procp, mrq)
char *cp2;
struct mbuf *mb, *mb2, *mreq;
u_quad_t frev, off;
+ struct mount *mp = NULL;
nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
#ifndef nolint
@@ -3595,6 +3700,13 @@ nfsrv_commit(nfsd, slp, procp, mrq)
#endif
fhp = &nfh.fh_generic;
nfsm_srvmtofh(fhp);
+ if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+ return (ESTALE);
+ if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+ return (error);
+ (void) vn_start_write(vp, &mp, V_WAIT);
+ vput(vp);
+ vp = NULL;
nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
/*
@@ -3697,6 +3809,7 @@ nfsrv_commit(nfsd, slp, procp, mrq)
nfsmout:
if (vp)
vput(vp);
+ vn_finished_write(mp);
return(error);
}
@@ -4065,4 +4178,3 @@ nfsrv_access(vp, flags, cred, rdonly, p, override)
return error;
}
#endif /* NFS_NOSERVER */
-
diff --git a/sys/nfsserver/nfs_serv.c b/sys/nfsserver/nfs_serv.c
index 06ce9ed..0334f74 100644
--- a/sys/nfsserver/nfs_serv.c
+++ b/sys/nfsserver/nfs_serv.c
@@ -325,10 +325,18 @@ nfsrv_setattr(nfsd, slp, procp, mrq)
struct mbuf *mb, *mb2, *mreq;
u_quad_t frev;
struct timespec guard;
+ struct mount *mp = NULL;
nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
fhp = &nfh.fh_generic;
nfsm_srvmtofh(fhp);
+ if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+ return (ESTALE);
+ if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+ return (error);
+ (void) vn_start_write(vp, &mp, V_WAIT);
+ vput(vp);
+ vp = NULL;
VATTR_NULL(vap);
if (v3) {
nfsm_srvsattr(vap);
@@ -440,6 +448,7 @@ out:
nfsmout:
if (vp)
vput(vp);
+ vn_finished_write(mp);
return(error);
}
@@ -1039,6 +1048,7 @@ nfsrv_write(nfsd, slp, procp, mrq)
struct uio io, *uiop = &io;
off_t off;
u_quad_t frev;
+ struct mount *mntp = NULL;
nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
if (mrep == NULL) {
@@ -1048,6 +1058,13 @@ nfsrv_write(nfsd, slp, procp, mrq)
}
fhp = &nfh.fh_generic;
nfsm_srvmtofh(fhp);
+ if ((mntp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+ return (ESTALE);
+ if ((error = VFS_FHTOVP(mntp, &fhp->fh_fid, &vp)) != NULL)
+ return (error);
+ (void) vn_start_write(vp, &mntp, V_WAIT);
+ vput(vp);
+ vp = NULL;
if (v3) {
nfsm_dissect(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
off = fxdr_hyper(tl);
@@ -1205,6 +1222,7 @@ nfsrv_write(nfsd, slp, procp, mrq)
nfsmout:
if (vp)
vput(vp);
+ vn_finished_write(mntp);
return(error);
}
@@ -1241,6 +1259,7 @@ nfsrv_writegather(ndp, slp, procp, mrq)
struct vnode *vp = NULL;
struct uio io, *uiop = &io;
u_quad_t frev, cur_usec;
+ struct mount *mntp = NULL;
nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
#ifndef nolint
@@ -1444,8 +1463,16 @@ loop1:
mp = mp->m_next;
}
if (!error) {
+ if (vn_start_write(vp, &mntp, V_NOWAIT) != 0) {
+ VOP_UNLOCK(vp, 0, procp);
+ error = vn_start_write(NULL, &mntp, V_WAIT);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, procp);
+ }
+ }
+ if (!error) {
error = VOP_WRITE(vp, uiop, ioflags, cred);
nfsstats.srvvop_writes++;
+ vn_finished_write(mntp);
}
FREE((caddr_t)iov, M_TEMP);
}
@@ -1620,6 +1647,8 @@ nfsrv_create(nfsd, slp, procp, mrq)
fhandle_t *fhp;
u_quad_t frev, tempsize;
u_char cverf[NFSX_V3CREATEVERF];
+ struct mount *mp = NULL;
+ struct vnode *vp;
nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
#ifndef nolint
@@ -1629,6 +1658,12 @@ nfsrv_create(nfsd, slp, procp, mrq)
fhp = &nfh.fh_generic;
nfsm_srvmtofh(fhp);
+ if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+ return (ESTALE);
+ if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+ return (error);
+ (void) vn_start_write(vp, &mp, V_WAIT);
+ vput(vp);
nfsm_srvnamesiz(len);
nd.ni_cnd.cn_cred = cred;
@@ -1869,6 +1904,7 @@ nfsmout:
}
if (nd.ni_vp)
vput(nd.ni_vp);
+ vn_finished_write(mp);
return (error);
}
@@ -1901,12 +1937,20 @@ nfsrv_mknod(nfsd, slp, procp, mrq)
nfsfh_t nfh;
fhandle_t *fhp;
u_quad_t frev;
+ struct mount *mp = NULL;
nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
ndclear(&nd);
fhp = &nfh.fh_generic;
nfsm_srvmtofh(fhp);
+ if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+ return (ESTALE);
+ if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+ return (error);
+ (void) vn_start_write(vp, &mp, V_WAIT);
+ vput(vp);
+ vp = NULL;
nfsm_srvnamesiz(len);
nd.ni_cnd.cn_cred = cred;
@@ -2030,6 +2074,7 @@ out:
nfsm_srvpostop_attr(0, vap);
}
nfsm_srvwcc_data(dirfor_ret, &dirfor, diraft_ret, &diraft);
+ vn_finished_write(mp);
return (0);
nfsmout:
if (dirp)
@@ -2045,6 +2090,7 @@ nfsmout:
}
if (nd.ni_vp)
vput(nd.ni_vp);
+ vn_finished_write(mp);
return (error);
}
@@ -2075,12 +2121,21 @@ nfsrv_remove(nfsd, slp, procp, mrq)
nfsfh_t nfh;
fhandle_t *fhp;
u_quad_t frev;
+ struct mount *mp = NULL;
+ struct vnode *vp;
nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
ndclear(&nd);
fhp = &nfh.fh_generic;
nfsm_srvmtofh(fhp);
+ if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+ return (ESTALE);
+ if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+ return (error);
+ (void) vn_start_write(vp, &mp, V_WAIT);
+ vput(vp);
+ vp = NULL;
nfsm_srvnamesiz(len);
nd.ni_cnd.cn_cred = cred;
@@ -2137,6 +2192,7 @@ nfsmout:
}
if (nd.ni_vp)
vput(nd.ni_vp);
+ vn_finished_write(mp);
return(error);
}
@@ -2170,6 +2226,8 @@ nfsrv_rename(nfsd, slp, procp, mrq)
fhandle_t *ffhp, *tfhp;
u_quad_t frev;
uid_t saved_uid;
+ struct mount *mp = NULL;
+ struct vnode *vp;
nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
#ifndef nolint
@@ -2186,6 +2244,13 @@ nfsrv_rename(nfsd, slp, procp, mrq)
ndclear(&tond);
nfsm_srvmtofh(ffhp);
+ if ((mp = vfs_getvfs(&ffhp->fh_fsid)) == NULL)
+ return (ESTALE);
+ if ((error = VFS_FHTOVP(mp, &ffhp->fh_fid, &vp)) != NULL)
+ return (error);
+ (void) vn_start_write(vp, &mp, V_WAIT);
+ vput(vp);
+ vp = NULL;
nfsm_srvnamesiz(len);
/*
* Remember our original uid so that we can reset cr_uid before
@@ -2360,6 +2425,7 @@ nfsmout:
if (fromnd.ni_vp)
vrele(fromnd.ni_vp);
+ vn_finished_write(mp);
return (error);
}
@@ -2390,6 +2456,7 @@ nfsrv_link(nfsd, slp, procp, mrq)
nfsfh_t nfh, dnfh;
fhandle_t *fhp, *dfhp;
u_quad_t frev;
+ struct mount *mp = NULL;
nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
ndclear(&nd);
@@ -2397,6 +2464,13 @@ nfsrv_link(nfsd, slp, procp, mrq)
fhp = &nfh.fh_generic;
dfhp = &dnfh.fh_generic;
nfsm_srvmtofh(fhp);
+ if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+ return (ESTALE);
+ if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+ return (error);
+ (void) vn_start_write(vp, &mp, V_WAIT);
+ vput(vp);
+ vp = NULL;
nfsm_srvmtofh(dfhp);
nfsm_srvnamesiz(len);
@@ -2475,6 +2549,7 @@ nfsmout:
}
if (nd.ni_vp)
vrele(nd.ni_vp);
+ vn_finished_write(mp);
return(error);
}
@@ -2508,12 +2583,21 @@ nfsrv_symlink(nfsd, slp, procp, mrq)
nfsfh_t nfh;
fhandle_t *fhp;
u_quad_t frev;
+ struct mount *mp = NULL;
+ struct vnode *vp;
nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
ndclear(&nd);
fhp = &nfh.fh_generic;
nfsm_srvmtofh(fhp);
+ if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+ return (ESTALE);
+ if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+ return (error);
+ (void) vn_start_write(vp, &mp, V_WAIT);
+ vput(vp);
+ vp = NULL;
nfsm_srvnamesiz(len);
nd.ni_cnd.cn_cred = cred;
nd.ni_cnd.cn_nameiop = CREATE;
@@ -2651,6 +2735,7 @@ nfsmout:
if (pathcp)
FREE(pathcp, M_TEMP);
+ vn_finished_write(mp);
return (error);
}
@@ -2685,12 +2770,21 @@ nfsrv_mkdir(nfsd, slp, procp, mrq)
nfsfh_t nfh;
fhandle_t *fhp;
u_quad_t frev;
+ struct mount *mp = NULL;
+ struct vnode *vp;
nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
ndclear(&nd);
fhp = &nfh.fh_generic;
nfsm_srvmtofh(fhp);
+ if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+ return (ESTALE);
+ if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+ return (error);
+ (void) vn_start_write(vp, &mp, V_WAIT);
+ vput(vp);
+ vp = NULL;
nfsm_srvnamesiz(len);
nd.ni_cnd.cn_cred = cred;
nd.ni_cnd.cn_nameiop = CREATE;
@@ -2787,6 +2881,7 @@ nfsmout:
else
vrele(nd.ni_vp);
}
+ vn_finished_write(mp);
return (error);
}
@@ -2817,12 +2912,20 @@ nfsrv_rmdir(nfsd, slp, procp, mrq)
fhandle_t *fhp;
struct nameidata nd;
u_quad_t frev;
+ struct mount *mp = NULL;
nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
ndclear(&nd);
fhp = &nfh.fh_generic;
nfsm_srvmtofh(fhp);
+ if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+ return (ESTALE);
+ if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+ return (error);
+ (void) vn_start_write(vp, &mp, V_WAIT);
+ vput(vp);
+ vp = NULL;
nfsm_srvnamesiz(len);
nd.ni_cnd.cn_cred = cred;
nd.ni_cnd.cn_nameiop = DELETE;
@@ -2895,6 +2998,7 @@ nfsmout:
if (nd.ni_vp)
vput(nd.ni_vp);
+ vn_finished_write(mp);
return(error);
}
@@ -3588,6 +3692,7 @@ nfsrv_commit(nfsd, slp, procp, mrq)
char *cp2;
struct mbuf *mb, *mb2, *mreq;
u_quad_t frev, off;
+ struct mount *mp = NULL;
nfsdbprintf(("%s %d\n", __FILE__, __LINE__));
#ifndef nolint
@@ -3595,6 +3700,13 @@ nfsrv_commit(nfsd, slp, procp, mrq)
#endif
fhp = &nfh.fh_generic;
nfsm_srvmtofh(fhp);
+ if ((mp = vfs_getvfs(&fhp->fh_fsid)) == NULL)
+ return (ESTALE);
+ if ((error = VFS_FHTOVP(mp, &fhp->fh_fid, &vp)) != NULL)
+ return (error);
+ (void) vn_start_write(vp, &mp, V_WAIT);
+ vput(vp);
+ vp = NULL;
nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
/*
@@ -3697,6 +3809,7 @@ nfsrv_commit(nfsd, slp, procp, mrq)
nfsmout:
if (vp)
vput(vp);
+ vn_finished_write(mp);
return(error);
}
@@ -4065,4 +4178,3 @@ nfsrv_access(vp, flags, cred, rdonly, p, override)
return error;
}
#endif /* NFS_NOSERVER */
-
diff --git a/sys/svr4/svr4_fcntl.c b/sys/svr4/svr4_fcntl.c
index 4040030..c65f345 100644
--- a/sys/svr4/svr4_fcntl.c
+++ b/sys/svr4/svr4_fcntl.c
@@ -247,6 +247,7 @@ fd_revoke(p, fd)
struct filedesc *fdp = p->p_fd;
struct file *fp;
struct vnode *vp;
+ struct mount *mp;
struct vattr vattr;
int error, *retval;
@@ -271,8 +272,11 @@ fd_revoke(p, fd)
(error = suser(p)) != 0)
goto out;
+ if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
+ goto out;
if (vcount(vp) > 1)
VOP_REVOKE(vp, REVOKEALL);
+ vn_finished_write(mp);
out:
vrele(vp);
return error;
diff --git a/sys/sys/buf.h b/sys/sys/buf.h
index bc8203f..116e011 100644
--- a/sys/sys/buf.h
+++ b/sys/sys/buf.h
@@ -466,6 +466,7 @@ buf_countdeps(struct buf *bp, int i)
/* Flags to low-level allocation routines. */
#define B_CLRBUF 0x01 /* Request allocated buffer be cleared. */
#define B_SYNC 0x02 /* Do all allocations synchronously. */
+#define B_METAONLY 0x04 /* Return indirect block buffer. */
#ifdef _KERNEL
extern int nbuf; /* The number of buffer headers */
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index d215351..fb80e5b 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -285,6 +285,7 @@ struct proc {
/* Marked a kernel thread */
#define P_BUFEXHAUST 0x100000 /* dirty buffers flush is in progress */
#define P_KTHREADP 0x200000 /* Process is really a kernel thread */
+#define P_COWINPROGRESS 0x400000 /* Snapshot copy-on-write in progress */
#define P_DEADLKTREAT 0x800000 /* lock aquisition - deadlock treatment */
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index 5817855..3da7897 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -158,7 +158,7 @@ struct vnode {
/* open for business 0x00800 */
/* open for business 0x01000 */
#define VOBJBUF 0x02000 /* Allocate buffers in VM object */
-/* open for business 0x04000 */
+#define VCOPYONWRITE 0x04000 /* vnode is doing copy-on-write */
#define VAGE 0x08000 /* Insert vnode at head of free list */
#define VOLOCK 0x10000 /* vnode is locked waiting for an object */
#define VOWANT 0x20000 /* a process is waiting for VOLOCK */
@@ -246,12 +246,15 @@ extern int vttoif_tab[];
/*
* Flags to various vnode functions.
*/
-#define SKIPSYSTEM 0x0001 /* vflush: skip vnodes marked VSYSTEM */
-#define FORCECLOSE 0x0002 /* vflush: force file closure */
-#define WRITECLOSE 0x0004 /* vflush: only close writable files */
-#define DOCLOSE 0x0008 /* vclean: close active files */
-#define V_SAVE 0x0001 /* vinvalbuf: sync file first */
-#define REVOKEALL 0x0001 /* vop_revoke: revoke all aliases */
+#define SKIPSYSTEM 0x0001 /* vflush: skip vnodes marked VSYSTEM */
+#define FORCECLOSE 0x0002 /* vflush: force file closure */
+#define WRITECLOSE 0x0004 /* vflush: only close writable files */
+#define DOCLOSE 0x0008 /* vclean: close active files */
+#define V_SAVE 0x0001 /* vinvalbuf: sync file first */
+#define REVOKEALL 0x0001 /* vop_revoke: revoke all aliases */
+#define V_WAIT 0x0001 /* vn_start_write: sleep for suspend */
+#define V_NOWAIT 0x0002 /* vn_start_write: don't sleep for suspend */
+#define V_XSLEEP 0x0004 /* vn_start_write: just return after sleep */
#define VREF(vp) vref(vp)
@@ -572,6 +575,7 @@ int vrecycle __P((struct vnode *vp, struct simplelock *inter_lkp,
struct proc *p));
int vn_close __P((struct vnode *vp,
int flags, struct ucred *cred, struct proc *p));
+void vn_finished_write __P((struct mount *mp));
int vn_isdisk __P((struct vnode *vp, int *errp));
int vn_lock __P((struct vnode *vp, int flags, struct proc *p));
#ifdef DEBUG_LOCKS
@@ -587,13 +591,18 @@ int vn_rdwr __P((enum uio_rw rw, struct vnode *vp, caddr_t base,
int len, off_t offset, enum uio_seg segflg, int ioflg,
struct ucred *cred, int *aresid, struct proc *p));
int vn_stat __P((struct vnode *vp, struct stat *sb, struct proc *p));
+int vn_start_write __P((struct vnode *vp, struct mount **mpp, int flags));
dev_t vn_todev __P((struct vnode *vp));
+int vn_write_suspend_wait __P((struct vnode *vp, int flags));
+int vn_writechk __P((struct vnode *vp));
int vfs_cache_lookup __P((struct vop_lookup_args *ap));
int vfs_object_create __P((struct vnode *vp, struct proc *p,
struct ucred *cred));
void vfs_timestamp __P((struct timespec *));
-int vn_writechk __P((struct vnode *vp));
+void vfs_write_resume __P((struct mount *mp));
+void vfs_write_suspend __P((struct mount *mp));
int vop_stdbwrite __P((struct vop_bwrite_args *ap));
+int vop_stdgetwritemount __P((struct vop_getwritemount_args *));
int vop_stdislocked __P((struct vop_islocked_args *));
int vop_stdlock __P((struct vop_lock_args *));
int vop_stdunlock __P((struct vop_unlock_args *));
diff --git a/sys/ufs/ffs/README.snapshot b/sys/ufs/ffs/README.snapshot
new file mode 100644
index 0000000..f3177c3
--- /dev/null
+++ b/sys/ufs/ffs/README.snapshot
@@ -0,0 +1,112 @@
+$FreeBSD$
+
+Soft Updates Status
+
+As is detailed in the operational information below, snapshots
+are definitely alpha-test code and are NOT yet ready for production
+use. Much remains to be done to make them really useful, but I
+wanted to let folks get a chance to try it out and start reporting
+bugs and other shortcomings. Such reports should be sent to
+Kirk McKusick <mckusick@mckusick.com>.
+
+
+Snapshot Copyright Restrictions
+
+Snapshots have been introduced to FreeBSD with a `Berkeley-style'
+copyright. The file implementing snapshots resides in the sys/ufs/ffs
+directory and is compiled into the generic kernel by default.
+
+
+Using Snapshots
+
+To create a snapshot of your /var filesystem, run the command:
+
+ mount -u -o snapshot /var/snapshot/snap1 /var
+
+This command will take a snapshot of your /var filesystem and
+leave it in the file /var/snapshot/snap1. Note that snapshot
+files must be created in the filesystem that is being snapshotted.
+I use the convention of putting a `snapshot' directory at the
+root of each filesystem into which I can place snapshots.
+You may create up to 20 snapshots per filesystem. Active snapshots
+are recorded in the superblock, so they persist across unmount
+and remount operations and across system reboots. When your
+are done with a snapshot, it can be removed with the `rm'
+command. Snapshots may be removed in any order, however you
+may not get back all the space contained in the snapshot as
+another snapshot may claim some of the blocks that it is releasing.
+Note that the `schg' flag is set on snapshots to ensure that
+not even the root user can write to them. The unlink command
+makes an exception for snapshot files in that it allows them
+to be removed even though they have the `schg' flag set, so it
+is not necessary to clear the `schg' flag before removing a
+snapshot file.
+
+Once you have taken a snapshot, there are three interesting
+things that you can do with it:
+
+1) Run fsck on the snapshot file. Assuming that the filesystem
+ was clean when it was mounted, you should always get a clean
+ (and unchanging) result from running fsck on the snapshot.
+ If you are running with soft updates and rebooted after a
+ crash without cleaning up the filesystem, then fsck of the
+ snapshot may find missing blocks and inodes or inodes with
+ link counts that are too high. I have not yet added the
+ system calls to allow fsck to add these missing resources
+ back to the filesystem - that will be added once the basic
+ snapshot code is working properly. So, view those reports
+ as informational for now.
+
+2) Run dump on the snapshot. You will get a dump that is
+ consistent with the filesystem as of the timestamp of the
+ snapshot. Note that I have not yet changed dump to set the
+ dumpdates file correctly, so do not use this feature in
+ production until that fix is made.
+
+3) Mount the snapshot as a frozen image of the filesystem.
+ To mount the snapshot /var/snapshot/snap1:
+
+ vnconfig -c vn0c /var/snapshot/snap1
+ mount -r /dev/vn0c /mnt
+
+ You can now cruise around your frozen /var filesystem
+ at /mnt. Everything will be in the same state that it
+ was at the time the snapshot was taken. The one exception
+ is that any earlier snapshots will appear as zero length
+ files. When you are done with the mounted snapshot:
+
+ umount /mnt
+ vnconfig -u vn0c
+
+ Note that under some circumstances, the process accessing
+ the frozen filesystem may deadlock. I am aware of this
+ problem, but the solution is not simple. It requires
+ using buffer read locks rather than exclusive locks when
+ traversing the inode indirect blocks. Until this problem
+ is fixed, you should avoid putting mounted snapshots into
+ production.
+
+
+Performance
+
+It takes about 30 seconds to create a snapshot of an 8Gb filesystem.
+Of that time 25 seconds is spent in preparation; filesystem activity
+is only suspended for the final 5 seconds of that period. Snapshot
+removal of an 8Gb filesystem takes about two minutes. Filesystem
+activity is never suspended during snapshot removal.
+
+The suspend time may be expanded by several minutes if a process
+is in the midst of removing many files as all the soft updates
+backlog must be cleared. Generally snapshots do not slow the system
+down appreciably except when removing many small files (i.e., any
+file less than 96Kb whose last block is a fragment) that are claimed
+by a snapshot. Here, the snapshot code must make a copy of every
+released fragment which slows the rate of file removal to about
+twenty files per second once the soft updates backlog limit is
+reached.
+
+
+How Snapshots Work
+
+For more general information on snapshots, please see:
+ http://www.mckusick.com/softdep/
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index 1f24b2b..5efe0e7 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -186,6 +186,8 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp)
*bpp = 0;
fs = ip->i_fs;
#ifdef DIAGNOSTIC
+ if (ITOV(ip)->v_mount->mnt_kern_flag & MNTK_SUSPENDED)
+ panic("ffs_realloccg: allocation on suspended filesystem");
if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
(u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
printf(
@@ -763,6 +765,10 @@ ffs_hashalloc(ip, cg, pref, size, allocator)
long result; /* XXX why not same type as we return? */
int i, icg = cg;
+#ifdef DIAGNOSTIC
+ if (ITOV(ip)->v_mount->mnt_kern_flag & MNTK_SUSPENDED)
+ panic("ffs_hashalloc: allocation on suspended filesystem");
+#endif
fs = ip->i_fs;
/*
* 1: preferred cylinder group
@@ -1311,9 +1317,13 @@ ffs_blkfree(ip, bno, size)
ufs_daddr_t blkno;
int i, error, cg, blk, frags, bbase;
u_int8_t *blksfree;
+ struct vnode *vp;
fs = ip->i_fs;
- VOP_FREEBLKS(ip->i_devvp, fsbtodb(fs, bno), size);
+#ifdef DIAGNOSTIC
+ if ((vp = ITOV(ip)) != NULL && vp->v_mount != NULL &&
+ (vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED))
+ panic("ffs_blkfree: deallocation on suspended filesystem");
if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 ||
fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) {
printf("dev=%s, bno = %ld, bsize = %ld, size = %ld, fs = %s\n",
@@ -1321,6 +1331,11 @@ ffs_blkfree(ip, bno, size)
fs->fs_fsmnt);
panic("ffs_blkfree: bad size");
}
+#endif
+ if ((ip->i_devvp->v_flag & VCOPYONWRITE) &&
+ ffs_snapblkfree(ip, bno, size))
+ return;
+ VOP_FREEBLKS(ip->i_devvp, fsbtodb(fs, bno), size);
cg = dtog(fs, bno);
if ((u_int)bno >= fs->fs_size) {
printf("bad block %ld, ino %lu\n",
diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c
index 28cc1ed..92fe379 100644
--- a/sys/ufs/ffs/ffs_balloc.c
+++ b/sys/ufs/ffs/ffs_balloc.c
@@ -125,6 +125,8 @@ ffs_balloc(ap)
* The first NDADDR blocks are direct blocks
*/
if (lbn < NDADDR) {
+ if (flags & B_METAONLY)
+ panic("ffs_balloc: B_METAONLY for direct block");
nb = ip->i_db[lbn];
if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
@@ -289,6 +291,13 @@ ffs_balloc(ap)
}
}
/*
+ * If asked only for the indirect block, then return it.
+ */
+ if (flags & B_METAONLY) {
+ *ap->a_bpp = bp;
+ return (0);
+ }
+ /*
* Get the data block, allocating if necessary.
*/
if (nb == 0) {
diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h
index fe7391b..8e011bb 100644
--- a/sys/ufs/ffs/ffs_extern.h
+++ b/sys/ufs/ffs/ffs_extern.h
@@ -67,6 +67,7 @@ struct vop_balloc_args;
struct vop_bmap_args;
struct vop_fsync_args;
struct vop_reallocblks_args;
+struct vop_copyonwrite_args;
int ffs_alloc __P((struct inode *,
ufs_daddr_t, ufs_daddr_t, int, struct ucred *, ufs_daddr_t *));
@@ -76,6 +77,7 @@ void ffs_blkfree __P((struct inode *, ufs_daddr_t, long));
ufs_daddr_t ffs_blkpref __P((struct inode *, ufs_daddr_t, int, ufs_daddr_t *));
int ffs_bmap __P((struct vop_bmap_args *));
void ffs_clrblock __P((struct fs *, u_char *, ufs_daddr_t));
+int ffs_copyonwrite __P((struct vop_copyonwrite_args *ap));
int ffs_fhtovp __P((struct mount *, struct fid *, struct vnode **));
int ffs_flushfiles __P((struct mount *, int, struct proc *));
void ffs_fragacct __P((struct fs *, int, int32_t [], int));
@@ -89,6 +91,10 @@ int ffs_reallocblks __P((struct vop_reallocblks_args *));
int ffs_realloccg __P((struct inode *,
ufs_daddr_t, ufs_daddr_t, int, int, struct ucred *, struct buf **));
void ffs_setblock __P((struct fs *, u_char *, ufs_daddr_t));
+int ffs_snapblkfree __P((struct inode *freeip, ufs_daddr_t bno, long size));
+int ffs_snapshot __P((struct mount *mp, char *snapfile));
+void ffs_snapshot_mount __P((struct mount *mp));
+void ffs_snapshot_unmount __P((struct mount *mp));
int ffs_statfs __P((struct mount *, struct statfs *, struct proc *));
int ffs_sync __P((struct mount *, int, struct ucred *, struct proc *));
int ffs_truncate __P((struct vnode *, off_t, int, struct ucred *, struct proc *));
diff --git a/sys/ufs/ffs/ffs_snapshot.c b/sys/ufs/ffs/ffs_snapshot.c
new file mode 100644
index 0000000..73da537
--- /dev/null
+++ b/sys/ufs/ffs/ffs_snapshot.c
@@ -0,0 +1,1028 @@
+/*
+ * Copyright 2000 Marshall Kirk McKusick. All Rights Reserved.
+ *
+ * Further information about snapshots can be obtained from:
+ *
+ * Marshall Kirk McKusick http://www.mckusick.com/softdep/
+ * 1614 Oxford Street mckusick@mckusick.com
+ * Berkeley, CA 94709-1608 +1-510-843-9542
+ * USA
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ffs_snapshot.c 8.10 (McKusick) 7/11/00
+ * $FreeBSD$
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/namei.h>
+#include <sys/stat.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/resource.h>
+#include <sys/resourcevar.h>
+#include <sys/vnode.h>
+
+#include <ufs/ufs/extattr.h>
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufs_extern.h>
+
+#include <ufs/ffs/fs.h>
+#include <ufs/ffs/ffs_extern.h>
+
+#define KERNCRED proc0.p_ucred
+#define CURPROC curproc
+#define DEBUG
+
+static int indiracct __P((struct vnode *, struct vnode *, int, ufs_daddr_t,
+ int, int, int, int));
+static int snapacct __P((struct vnode *, ufs_daddr_t *, ufs_daddr_t *));
+static int readblock __P((struct buf *, daddr_t));
+
+#ifdef DEBUG
+#include <sys/sysctl.h>
+int snapdebug = 0;
+SYSCTL_INT(_debug, OID_AUTO, snapdebug, CTLFLAG_RW, &snapdebug, 0, "");
+#endif /* DEBUG */
+
+/*
+ * Create a snapshot file and initialize it for the filesystem.
+ */
+int
+ffs_snapshot(mp, snapfile)
+ struct mount *mp;
+ char *snapfile;
+{
+ ufs_daddr_t rlbn;
+ ufs_daddr_t lbn, blkno, copyblkno, inoblks[FSMAXSNAP];
+ int error, cg, snaploc, indiroff, numblks;
+ int i, size, base, len, loc, inoblkcnt;
+ int blksperindir, flag = mp->mnt_flag;
+ struct fs *fs = VFSTOUFS(mp)->um_fs;
+ struct proc *p = CURPROC;
+ struct inode *devip, *ip, *xp;
+ struct buf *bp, *nbp, *ibp;
+ struct vnode *vp, *devvp;
+ struct nameidata nd;
+ struct mount *wrtmp;
+ struct dinode *dip;
+ struct vattr vat;
+ struct cg *cgp;
+
+ /*
+ * Need to serialize access to snapshot code per filesystem.
+ */
+ /*
+ * Assign a snapshot slot in the superblock.
+ */
+ for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++)
+ if (fs->fs_snapinum[snaploc] == 0)
+ break;
+ if (snaploc == FSMAXSNAP)
+ return (ENOSPC);
+ /*
+ * Create the snapshot file.
+ */
+restart:
+ NDINIT(&nd, CREATE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE, snapfile, p);
+ if ((error = namei(&nd)) != 0)
+ return (error);
+ if (nd.ni_vp != NULL) {
+ vput(nd.ni_vp);
+ error = EEXIST;
+ }
+ if (nd.ni_dvp->v_mount != mp)
+ error = EXDEV;
+ if (error) {
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ return (error);
+ }
+ VATTR_NULL(&vat);
+ vat.va_type = VREG;
+ vat.va_mode = S_IRUSR;
+ vat.va_vaflags |= VA_EXCLUSIVE;
+ if (VOP_GETWRITEMOUNT(nd.ni_dvp, &wrtmp))
+ wrtmp = NULL;
+ if (wrtmp != mp)
+ panic("ffs_snapshot: mount mismatch");
+ if (vn_start_write(wrtmp, V_NOWAIT) != 0) {
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ vput(nd.ni_dvp);
+ if ((error = vn_start_write(wrtmp, V_XSLEEP | PCATCH)) != 0)
+ return (error);
+ goto restart;
+ }
+ VOP_LEASE(nd.ni_dvp, p, KERNCRED, LEASE_WRITE);
+ error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vat);
+ vput(nd.ni_dvp);
+ if (error) {
+ NDFREE(&nd, NDF_ONLY_PNBUF);
+ vn_finished_write(wrtmp);
+ return (error);
+ }
+ vp = nd.ni_vp;
+ ip = VTOI(vp);
+ devvp = ip->i_devvp;
+ devip = VTOI(devvp);
+ /*
+ * Allocate and copy the last block contents so as to be able
+ * to set size to that of the filesystem.
+ */
+ numblks = howmany(fs->fs_size, fs->fs_frag);
+ error = VOP_BALLOC(vp, lblktosize(fs, (off_t)(numblks - 1)),
+ fs->fs_bsize, KERNCRED, B_CLRBUF, &bp);
+ if (error)
+ goto out;
+ ip->i_size = lblktosize(fs, (off_t)numblks);
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ if ((error = readblock(bp, numblks - 1)) != 0)
+ goto out;
+ bawrite(bp);
+ /*
+ * Preallocate critical data structures so that we can copy
+ * them in without further allocation after we suspend all
+ * operations on the filesystem. We would like to just release
+ * the allocated buffers without writing them since they will
+ * be filled in below once we are ready to go, but this upsets
+ * the soft update code, so we go ahead and write the new buffers.
+ *
+ * Allocate all indirect blocks. Also allocate shadow copies
+ * for each of the indirect blocks.
+ */
+ for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) {
+ error = VOP_BALLOC(vp, lblktosize(fs, (off_t)blkno),
+ fs->fs_bsize, p->p_ucred, B_METAONLY, &ibp);
+ if (error)
+ goto out;
+ copyblkno = fragstoblks(fs, dbtofsb(fs, ibp->b_blkno));
+ bdwrite(ibp);
+ error = VOP_BALLOC(vp, lblktosize(fs, (off_t)copyblkno),
+ fs->fs_bsize, p->p_ucred, 0, &nbp);
+ if (error)
+ goto out;
+ bawrite(nbp);
+ }
+ /*
+ * Allocate shadow blocks to copy all of the other snapshot inodes
+ * so that we will be able to expunge them from this snapshot.
+ */
+ for (loc = 0, inoblkcnt = 0; loc < snaploc; loc++) {
+ blkno = fragstoblks(fs, ino_to_fsba(fs, fs->fs_snapinum[loc]));
+ for (i = 0; i < inoblkcnt; i++)
+ if (inoblks[i] == blkno)
+ break;
+ if (i == inoblkcnt) {
+ inoblks[inoblkcnt++] = blkno;
+ error = VOP_BALLOC(vp, lblktosize(fs, (off_t)blkno),
+ fs->fs_bsize, KERNCRED, 0, &nbp);
+ if (error)
+ goto out;
+ bawrite(nbp);
+ }
+ }
+ /*
+ * Allocate all cylinder group blocks.
+ */
+ for (cg = 0; cg < fs->fs_ncg; cg++) {
+ error = VOP_BALLOC(vp, (off_t)(cgtod(fs, cg)) << fs->fs_fshift,
+ fs->fs_bsize, KERNCRED, 0, &nbp);
+ if (error)
+ goto out;
+ bawrite(nbp);
+ }
+ /*
+ * Allocate copies for the superblock and its summary information.
+ */
+ error = VOP_BALLOC(vp, (off_t)(SBOFF), fs->fs_bsize, KERNCRED,
+ 0, &nbp);
+ if (error)
+ goto out;
+ bawrite(nbp);
+ blkno = fragstoblks(fs, fs->fs_csaddr);
+ len = howmany(fs->fs_cssize, fs->fs_bsize);
+ for (loc = 0; loc < len; loc++) {
+ error = VOP_BALLOC(vp, lblktosize(fs, (off_t)(blkno + loc)),
+ fs->fs_bsize, KERNCRED, 0, &nbp);
+ if (error)
+ goto out;
+ bawrite(nbp);
+ }
+ /*
+ * Change inode to snapshot type file.
+ */
+ ip->i_flags |= SF_IMMUTABLE | SF_SNAPSHOT;
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ /*
+ * Ensure that the snapshot is completely on disk.
+ */
+ if ((error = VOP_FSYNC(vp, KERNCRED, MNT_WAIT, p)) != 0)
+ goto out;
+ /*
+ * All allocations are done, so we can now snapshot the system.
+ *
+ * Suspend operation on filesystem.
+ */
+ for (;;) {
+ vn_finished_write(wrtmp);
+ vfs_write_suspend(vp->v_mount);
+ if (mp->mnt_kern_flag & MNTK_SUSPENDED)
+ break;
+ vn_start_write(wrtmp, V_WAIT);
+ }
+ /*
+ * First, copy all the cylinder group maps. All the unallocated
+ * blocks are marked BLK_NOCOPY so that the snapshot knows that
+ * it need not copy them if they are later written.
+ */
+ len = howmany(fs->fs_fpg, fs->fs_frag);
+ for (cg = 0; cg < fs->fs_ncg; cg++) {
+ error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
+ (int)fs->fs_cgsize, KERNCRED, &bp);
+ if (error) {
+ brelse(bp);
+ goto out1;
+ }
+ cgp = (struct cg *)bp->b_data;
+ if (!cg_chkmagic(cgp)) {
+ brelse(bp);
+ error = EIO;
+ goto out1;
+ }
+ error = bread(vp, fragstoblks(fs, cgtod(fs, cg)), fs->fs_bsize,
+ KERNCRED, &nbp);
+ if (error) {
+ brelse(bp);
+ brelse(nbp);
+ goto out1;
+ }
+ bcopy(bp->b_data, nbp->b_data, fs->fs_cgsize);
+ if (fs->fs_cgsize < fs->fs_bsize)
+ bzero(&nbp->b_data[fs->fs_cgsize],
+ fs->fs_bsize - fs->fs_cgsize);
+ bawrite(nbp);
+ base = cg * fs->fs_fpg / fs->fs_frag;
+ if (base + len > numblks)
+ len = numblks - base;
+ loc = 0;
+ if (base < NDADDR) {
+ for ( ; loc < NDADDR; loc++) {
+ if (!ffs_isblock(fs, cg_blksfree(cgp), loc))
+ continue;
+ ip->i_db[loc] = BLK_NOCOPY;
+ }
+ }
+ error = VOP_BALLOC(vp, lblktosize(fs, (off_t)(base + loc)),
+ fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
+ if (error) {
+ brelse(bp);
+ goto out1;
+ }
+ indiroff = (base + loc - NDADDR) % NINDIR(fs);
+ for ( ; loc < len; loc++, indiroff++) {
+ if (indiroff >= NINDIR(fs)) {
+ bawrite(ibp);
+ error = VOP_BALLOC(vp,
+ lblktosize(fs, (off_t)(base + loc)),
+ fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
+ if (error) {
+ brelse(bp);
+ goto out1;
+ }
+ indiroff = 0;
+ }
+ if (!ffs_isblock(fs, cg_blksfree(cgp), loc))
+ continue;
+ ((ufs_daddr_t *)(ibp->b_data))[indiroff] = BLK_NOCOPY;
+ }
+ brelse(bp);
+ bdwrite(ibp);
+ }
+ /*
+ * Snapshot the superblock and its summary information.
+ */
+ error = VOP_BALLOC(vp, (off_t)(SBOFF), fs->fs_bsize, KERNCRED,
+ 0, &nbp);
+ if (error)
+ goto out1;
+ bcopy(fs, nbp->b_data, fs->fs_sbsize);
+ ((struct fs *)(nbp->b_data))->fs_clean = 1;
+ if (fs->fs_sbsize < fs->fs_bsize)
+ bzero(&nbp->b_data[fs->fs_sbsize],
+ fs->fs_bsize - fs->fs_sbsize);
+ bawrite(nbp);
+ blkno = fragstoblks(fs, fs->fs_csaddr);
+ len = howmany(fs->fs_cssize, fs->fs_bsize) - 1;
+ size = fs->fs_bsize;
+ for (loc = 0; loc <= len; loc++) {
+ error = VOP_BALLOC(vp, lblktosize(fs, (off_t)(blkno + loc)),
+ fs->fs_bsize, KERNCRED, 0, &nbp);
+ if (error)
+ goto out1;
+ if (loc == len) {
+ readblock(nbp, blkno + loc);
+ size = fs->fs_cssize % fs->fs_bsize;
+ }
+ bcopy(fs->fs_csp[loc], nbp->b_data, size);
+ bawrite(nbp);
+ }
+ /*
+ * Copy the shadow blocks for the snapshot inodes so that
+ * the copies can can be expunged.
+ */
+ for (loc = 0; loc < inoblkcnt; loc++) {
+ error = VOP_BALLOC(vp, lblktosize(fs, (off_t)inoblks[loc]),
+ fs->fs_bsize, KERNCRED, 0, &nbp);
+ if (error)
+ goto out1;
+ readblock(nbp, inoblks[loc]);
+ bdwrite(nbp);
+ }
+ /*
+ * Copy allocation information from other snapshots and then
+ * expunge them from the view of the current snapshot.
+ */
+ for (xp = devip->i_copyonwrite; xp; xp = xp->i_copyonwrite) {
+ /*
+ * Before expunging a snapshot inode, note all the
+ * blocks that it claims with BLK_SNAP so that fsck will
+ * be able to account for those blocks properly and so
+ * that this snapshot knows that it need not copy them
+ * if the other snapshot holding them is freed.
+ */
+ if ((error = snapacct(vp, &xp->i_db[0], &xp->i_ib[NIADDR])) !=0)
+ goto out1;
+ blksperindir = 1;
+ lbn = -NDADDR;
+ len = numblks - NDADDR;
+ rlbn = NDADDR;
+ for (i = 0; len > 0 && i < NIADDR; i++) {
+ error = indiracct(vp, ITOV(xp), i, xp->i_ib[i], lbn,
+ rlbn, len, blksperindir);
+ if (error)
+ goto out1;
+ blksperindir *= NINDIR(fs);
+ lbn -= blksperindir + 1;
+ len -= blksperindir;
+ rlbn += blksperindir;
+ }
+ /*
+ * Set copied snapshot inode to be a zero length file.
+ */
+ blkno = fragstoblks(fs, ino_to_fsba(fs, xp->i_number));
+ error = VOP_BALLOC(vp, lblktosize(fs, (off_t)blkno),
+ fs->fs_bsize, KERNCRED, 0, &nbp);
+ if (error)
+ goto out1;
+ dip = (struct dinode *)nbp->b_data +
+ ino_to_fsbo(fs, xp->i_number);
+ dip->di_size = 0;
+ dip->di_blocks = 0;
+ dip->di_flags &= ~(SF_IMMUTABLE | SF_SNAPSHOT);
+ bzero(&dip->di_db[0], (NDADDR + NIADDR) * sizeof(ufs_daddr_t));
+ bdwrite(nbp);
+ }
+ /*
+ * Copy all indirect blocks to their shadows (allocated above)
+ * to avoid deadlock in ffs_copyonwrite.
+ */
+ for (blkno = NDADDR; blkno < numblks; blkno += NINDIR(fs)) {
+ error = VOP_BALLOC(vp, lblktosize(fs, (off_t)blkno),
+ fs->fs_bsize, p->p_ucred, B_METAONLY, &ibp);
+ if (error)
+ goto out1;
+ copyblkno = fragstoblks(fs, dbtofsb(fs, ibp->b_blkno));
+ brelse(ibp);
+ error = VOP_BALLOC(vp, lblktosize(fs, (off_t)copyblkno),
+ fs->fs_bsize, p->p_ucred, 0, &nbp);
+ if (error)
+ goto out1;
+ error = VOP_BALLOC(vp, lblktosize(fs, (off_t)blkno),
+ fs->fs_bsize, p->p_ucred, B_METAONLY, &ibp);
+ if (error) {
+ brelse(nbp);
+ goto out1;
+ }
+ bcopy(ibp->b_data, nbp->b_data, fs->fs_bsize);
+ brelse(ibp);
+ bawrite(nbp);
+ }
+ /*
+ * Record snapshot inode. Since this is the newest snapshot,
+ * it must be placed at the end of the list.
+ */
+ fs->fs_snapinum[snaploc] = ip->i_number;
+ if (ip->i_copyonwrite != 0)
+ panic("ffs_snapshot: %d already on list", ip->i_number);
+ if (devip->i_copyonwrite == 0) {
+ devvp->v_flag |= VCOPYONWRITE;
+ devip->i_copyonwrite = ip;
+ } else {
+ for (xp = devip->i_copyonwrite; xp->i_copyonwrite != 0; )
+ xp = xp->i_copyonwrite;
+ xp->i_copyonwrite = ip;
+ }
+ vp->v_flag |= VSYSTEM;
+ /*
+ * Resume operation on filesystem.
+ */
+out1:
+ vfs_write_resume(vp->v_mount);
+ vn_start_write(wrtmp, V_WAIT);
+out:
+ mp->mnt_flag = flag;
+ (void) VOP_FSYNC(vp, KERNCRED, MNT_WAIT, p);
+ if (error)
+ vput(vp);
+ else
+ VOP_UNLOCK(vp, 0, p);
+ vn_finished_write(wrtmp);
+ return (error);
+}
+
+/*
+ * Descend an indirect block chain for vnode cancelvp accounting for all
+ * its indirect blocks in snapvp.
+ */
+static int
+indiracct(snapvp, cancelvp, level, blkno, lbn, rlbn, remblks, blksperindir)
+ struct vnode *snapvp;
+ struct vnode *cancelvp;
+ int level;
+ ufs_daddr_t blkno;
+ int lbn;
+ int rlbn;
+ int remblks;
+ int blksperindir;
+{
+ int subblksperindir, error, last, num, i;
+ struct indir indirs[NIADDR + 2];
+ ufs_daddr_t *bap;
+ struct buf *bp;
+ struct fs *fs;
+
+ if ((error = ufs_getlbns(cancelvp, rlbn, indirs, &num)) != 0)
+ return (error);
+ if (lbn != indirs[num - 1 - level].in_lbn || blkno == 0 || num < 2)
+ panic("indiracct: botched params");
+ /*
+ * We have to expand bread here since it will deadlock looking
+ * up the block number for any blocks that are not in the cache.
+ */
+ fs = VTOI(cancelvp)->i_fs;
+ bp = getblk(cancelvp, lbn, fs->fs_bsize, 0, 0);
+ bp->b_blkno = fsbtodb(fs, blkno);
+ if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0 &&
+ (error = readblock(bp, fragstoblks(fs, blkno)))) {
+ brelse(bp);
+ return (error);
+ }
+ /*
+ * Account for the block pointers in this indirect block.
+ */
+ last = howmany(remblks, blksperindir);
+ if (last > NINDIR(fs))
+ last = NINDIR(fs);
+ if (snapvp != cancelvp) {
+ bap = (ufs_daddr_t *)bp->b_data;
+ } else {
+ MALLOC(bap, ufs_daddr_t *, fs->fs_bsize, M_DEVBUF, M_WAITOK);
+ bcopy(bp->b_data, (caddr_t)bap, fs->fs_bsize);
+ brelse(bp);
+ }
+ error = snapacct(snapvp, &bap[0], &bap[last]);
+ if (error || level == 0)
+ goto out;
+ /*
+ * Account for the block pointers in each of the indirect blocks
+ * in the levels below us.
+ */
+ subblksperindir = blksperindir / NINDIR(fs);
+ for (lbn++, level--, i = 0; i < last; i++) {
+ error = indiracct(snapvp, cancelvp, level, bap[i], lbn,
+ rlbn, remblks, subblksperindir);
+ if (error)
+ goto out;
+ rlbn += blksperindir;
+ lbn -= blksperindir;
+ remblks -= blksperindir;
+ }
+out:
+ if (snapvp != cancelvp)
+ brelse(bp);
+ else
+ FREE(bap, M_DEVBUF);
+ return (error);
+}
+
+/*
+ * Account for a set of blocks allocated in a snapshot inode.
+ */
+static int
+snapacct(vp, oldblkp, lastblkp)
+ struct vnode *vp;
+ ufs_daddr_t *oldblkp, *lastblkp;
+{
+ struct inode *ip = VTOI(vp);
+ struct fs *fs = ip->i_fs;
+ ufs_daddr_t lbn, blkno, *blkp;
+ struct buf *ibp;
+ int error;
+
+ for ( ; oldblkp < lastblkp; oldblkp++) {
+ blkno = *oldblkp;
+ if (blkno == 0 || blkno == BLK_NOCOPY || blkno == BLK_SNAP)
+ continue;
+ lbn = fragstoblks(fs, blkno);
+ if (lbn < NDADDR) {
+ blkp = &ip->i_db[lbn];
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ } else {
+ error = VOP_BALLOC(vp, lblktosize(fs, (off_t)lbn),
+ fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
+ if (error)
+ return (error);
+ blkp = &((ufs_daddr_t *)(ibp->b_data))
+ [(lbn - NDADDR) % NINDIR(fs)];
+ }
+ if (*blkp != 0)
+ panic("snapacct: bad block");
+ *blkp = BLK_SNAP;
+ if (lbn >= NDADDR)
+ bdwrite(ibp);
+ }
+ return (0);
+}
+
+/*
+ * Prepare a snapshot file for being removed.
+ */
+void
+ffs_snapremove(vp)
+ struct vnode *vp;
+{
+ struct inode *ip, *xp;
+ struct vnode *devvp;
+ struct buf *ibp;
+ struct fs *fs;
+ ufs_daddr_t blkno, dblk;
+ int error, snaploc, loc, last;
+
+ ip = VTOI(vp);
+ fs = ip->i_fs;
+ /*
+ * Delete snapshot inode from superblock. Keep list dense.
+ */
+ for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++)
+ if (fs->fs_snapinum[snaploc] == ip->i_number)
+ break;
+ if (snaploc < FSMAXSNAP) {
+ for (snaploc++; snaploc < FSMAXSNAP; snaploc++) {
+ if (fs->fs_snapinum[snaploc] == 0)
+ break;
+ fs->fs_snapinum[snaploc - 1] = fs->fs_snapinum[snaploc];
+ }
+ fs->fs_snapinum[snaploc - 1] = 0;
+ }
+ /*
+ * Delete from incore list.
+ * Clear copy-on-write flag if last snapshot.
+ */
+ devvp = ip->i_devvp;
+ for (xp = VTOI(devvp); xp; xp = xp->i_copyonwrite) {
+ if (xp->i_copyonwrite != ip)
+ continue;
+ xp->i_copyonwrite = ip->i_copyonwrite;
+ ip->i_copyonwrite = 0;
+ break;
+ }
+ if (xp == 0) {
+ printf("ffs_snapremove: lost snapshot vnode %d\n",
+ ip->i_number);
+ vref(vp);
+ }
+ if (VTOI(devvp)->i_copyonwrite == 0)
+ devvp->v_flag &= ~VCOPYONWRITE;
+ /*
+ * Clear all BLK_NOCOPY fields. Pass any block claims to other
+ * snapshots that want them (see ffs_snapblkfree below).
+ */
+ for (blkno = 1; blkno < NDADDR; blkno++) {
+ dblk = ip->i_db[blkno];
+ if (dblk == BLK_NOCOPY || dblk == BLK_SNAP ||
+ (dblk == blkstofrags(fs, blkno) &&
+ ffs_snapblkfree(ip, dblk, fs->fs_bsize)))
+ ip->i_db[blkno] = 0;
+ }
+ for (blkno = NDADDR; blkno < fs->fs_size; blkno += NINDIR(fs)) {
+ error = VOP_BALLOC(vp, lblktosize(fs, (off_t)blkno),
+ fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
+ if (error)
+ continue;
+ if ((last = fs->fs_size - blkno) > NINDIR(fs))
+ last = NINDIR(fs);
+ for (loc = 0; loc < last; loc++) {
+ dblk = ((ufs_daddr_t *)(ibp->b_data))[loc];
+ if (dblk == BLK_NOCOPY || dblk == BLK_SNAP ||
+ (dblk == blkstofrags(fs, blkno) &&
+ ffs_snapblkfree(ip, dblk, fs->fs_bsize)))
+ ((ufs_daddr_t *)(ibp->b_data))[loc] = 0;
+ }
+ bawrite(ibp);
+ }
+ /*
+ * Clear snapshot flag and drop reference.
+ */
+ ip->i_flags &= ~(SF_IMMUTABLE | SF_SNAPSHOT);
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ vrele(vp);
+}
+
+/*
+ * Notification that a block is being freed. Return zero if the free
+ * should be allowed to proceed. Return non-zero if the snapshot file
+ * wants to claim the block. The block will be claimed if it is an
+ * uncopied part of one of the snapshots. It will be freed if it is
+ * either a BLK_NOCOPY or has already been copied in all of the snapshots.
+ * If a fragment is being freed, then all snapshots that care about
+ * it must make a copy since a snapshot file can only claim full sized
+ * blocks. Note that if more than one snapshot file maps the block,
+ * we can pick one at random to claim it. Since none of the snapshots
+ * can change, we are assurred that they will all see the same unmodified
+ * image. When deleting a snapshot file (see ffs_snapremove above), we
+ * must push any of these claimed blocks to one of the other snapshots
+ * that maps it. These claimed blocks are easily identified as they will
+ * have a block number equal to their logical block number within the
+ * snapshot. A copied block can never have this property because they
+ * must always have been allocated from a BLK_NOCOPY location.
+ */
+int
+ffs_snapblkfree(freeip, bno, size)
+ struct inode *freeip;
+ ufs_daddr_t bno;
+ long size;
+{
+ struct buf *ibp, *cbp, *savedcbp = 0;
+ struct fs *fs = freeip->i_fs;
+ struct proc *p = CURPROC;
+ struct inode *ip;
+ struct vnode *vp;
+ ufs_daddr_t lbn, blkno;
+ int indiroff = 0, error = 0, claimedblk = 0;
+
+ lbn = fragstoblks(fs, bno);
+ for (ip = VTOI(freeip->i_devvp)->i_copyonwrite; ip;
+ ip = ip->i_copyonwrite) {
+ vp = ITOV(ip);
+ /*
+ * Lookup block being written.
+ */
+ if (lbn < NDADDR) {
+ blkno = ip->i_db[lbn];
+ } else {
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+ p->p_flag |= P_COWINPROGRESS;
+ error = VOP_BALLOC(vp, lblktosize(fs, (off_t)lbn),
+ fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
+ p->p_flag &= ~P_COWINPROGRESS;
+ VOP_UNLOCK(vp, 0, p);
+ if (error)
+ break;
+ indiroff = (lbn - NDADDR) % NINDIR(fs);
+ blkno = ((ufs_daddr_t *)(ibp->b_data))[indiroff];
+ }
+ /*
+ * Check to see if block needs to be copied.
+ */
+ switch (blkno) {
+ /*
+ * If the snapshot has already copied the block (default),
+ * or does not care about the block, it is not needed.
+ */
+ default:
+ case BLK_NOCOPY:
+ if (lbn >= NDADDR)
+ brelse(ibp);
+ continue;
+ /*
+ * No previous snapshot claimed the block, so it will be
+ * freed and become a BLK_NOCOPY (don't care) for us.
+ */
+ case BLK_SNAP:
+ if (claimedblk)
+ panic("snapblkfree: inconsistent block type");
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+ if (lbn < NDADDR) {
+ ip->i_db[lbn] = BLK_NOCOPY;
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ } else {
+ ((ufs_daddr_t *)(ibp->b_data))[indiroff] =
+ BLK_NOCOPY;
+ bdwrite(ibp);
+ }
+ VOP_UNLOCK(vp, 0, p);
+ continue;
+ /*
+ * A block that we map is being freed. If it has not been
+ * claimed yet, we will claim or copy it (below).
+ */
+ case 0:
+ claimedblk = 1;
+ break;
+ }
+ /*
+ * If this is a full size block, we will just grab it
+ * and assign it to the snapshot inode. Otherwise we
+ * will proceed to copy it. See explanation for this
+ * routine as to why only a single snapshot needs to
+ * claim this block.
+ */
+ if (size == fs->fs_bsize) {
+#ifdef DEBUG
+ if (snapdebug)
+ printf("%s %d lbn %d from inum %d\n",
+ "Grabonremove: snapino", ip->i_number, lbn,
+ freeip->i_number);
+#endif
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+ if (lbn < NDADDR) {
+ ip->i_db[lbn] = bno;
+ } else {
+ ((ufs_daddr_t *)(ibp->b_data))[indiroff] = bno;
+ bdwrite(ibp);
+ }
+ ip->i_blocks += btodb(size);
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ VOP_UNLOCK(vp, 0, p);
+ return (1);
+ }
+ if (lbn >= NDADDR)
+ brelse(ibp);
+ /*
+ * Allocate the block into which to do the copy. Note that this
+ * allocation will never require any additional allocations for
+ * the snapshot inode.
+ */
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+ p->p_flag |= P_COWINPROGRESS;
+ error = VOP_BALLOC(vp, lblktosize(fs, (off_t)lbn),
+ fs->fs_bsize, KERNCRED, 0, &cbp);
+ p->p_flag &= ~P_COWINPROGRESS;
+ VOP_UNLOCK(vp, 0, p);
+ if (error)
+ break;
+#ifdef DEBUG
+ if (snapdebug)
+ printf("%s%d lbn %d for inum %d size %ld to blkno %d\n",
+ "Copyonremove: snapino ", ip->i_number, lbn,
+ freeip->i_number, size, cbp->b_blkno);
+#endif
+ /*
+ * If we have already read the old block contents, then
+ * simply copy them to the new block.
+ */
+ if (savedcbp != 0) {
+ bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize);
+ bawrite(cbp);
+ continue;
+ }
+ /*
+ * Otherwise, read the old block contents into the buffer.
+ */
+ if ((error = readblock(cbp, lbn)) != 0)
+ break;
+ savedcbp = cbp;
+ }
+ if (savedcbp)
+ bawrite(savedcbp);
+ /*
+ * If we have been unable to allocate a block in which to do
+ * the copy, then return non-zero so that the fragment will
+ * not be freed. Although space will be lost, the snapshot
+ * will stay consistent.
+ */
+ return (error);
+}
+
+/*
+ * Associate snapshot files when mounting.
+ */
+void
+ffs_snapshot_mount(mp)
+ struct mount *mp;
+{
+ struct ufsmount *ump = VFSTOUFS(mp);
+ struct fs *fs = ump->um_fs;
+ struct proc *p = CURPROC;
+ struct inode *ip, **listtailp;
+ struct vnode *vp;
+ int error, snaploc, loc;
+
+ listtailp = &VTOI(ump->um_devvp)->i_copyonwrite;
+ for (snaploc = 0; snaploc < FSMAXSNAP; snaploc++) {
+ if (fs->fs_snapinum[snaploc] == 0)
+ return;
+ if ((error = VFS_VGET(mp, fs->fs_snapinum[snaploc], &vp)) != 0){
+ printf("ffs_snapshot_mount: vget failed %d\n", error);
+ continue;
+ }
+ ip = VTOI(vp);
+ if ((ip->i_flags & SF_SNAPSHOT) == 0) {
+ printf("ffs_snapshot_mount: non-snapshot inode %d\n",
+ fs->fs_snapinum[snaploc]);
+ vput(vp);
+ for (loc = snaploc + 1; loc < FSMAXSNAP; loc++) {
+ if (fs->fs_snapinum[loc] == 0)
+ break;
+ fs->fs_snapinum[loc - 1] = fs->fs_snapinum[loc];
+ }
+ fs->fs_snapinum[loc - 1] = 0;
+ snaploc--;
+ continue;
+ }
+ if (ip->i_copyonwrite != 0)
+ panic("ffs_snapshot_mount: %d already on list",
+ ip->i_number);
+ *listtailp = ip;
+ listtailp = &ip->i_copyonwrite;
+ vp->v_flag |= VSYSTEM;
+ VOP_UNLOCK(vp, 0, p);
+ ump->um_devvp->v_flag |= VCOPYONWRITE;
+ }
+}
+
+/*
+ * Disassociate snapshot files when unmounting.
+ */
+void
+ffs_snapshot_unmount(mp)
+ struct mount *mp;
+{
+ struct ufsmount *ump = VFSTOUFS(mp);
+ struct inode *devip = VTOI(ump->um_devvp);
+ struct inode *xp;
+
+ while ((xp = devip->i_copyonwrite) != 0) {
+ devip->i_copyonwrite = xp->i_copyonwrite;
+ xp->i_copyonwrite = 0;
+ vrele(ITOV(xp));
+ }
+ ump->um_devvp->v_flag &= ~VCOPYONWRITE;
+}
+
+/*
+ * Check for need to copy block that is about to be written,
+ * copying the block if necessary.
+ */
+int
+ffs_copyonwrite(ap)
+ struct vop_copyonwrite_args /* {
+ struct vnode *a_vp;
+ struct buf *a_bp;
+ } */ *ap;
+{
+ struct buf *ibp, *cbp, *savedcbp = 0, *bp = ap->a_bp;
+ struct fs *fs = VTOI(bp->b_vp)->i_fs;
+ struct proc *p = CURPROC;
+ struct inode *ip;
+ struct vnode *vp;
+ ufs_daddr_t lbn, blkno;
+ int indiroff, error = 0;
+
+ lbn = fragstoblks(fs, dbtofsb(fs, bp->b_blkno));
+ if (p->p_flag & P_COWINPROGRESS)
+ panic("ffs_copyonwrite: recursive call");
+ for (ip = VTOI(ap->a_vp)->i_copyonwrite; ip; ip = ip->i_copyonwrite) {
+ vp = ITOV(ip);
+ /*
+ * We ensure that everything of our own that needs to be
+ * copied will be done at the time that ffs_snapshot is
+ * called. Thus we can skip the check here which can
+ * deadlock in doing the lookup in VOP_BALLOC.
+ */
+ if (bp->b_vp == vp)
+ continue;
+ /*
+ * Check to see if block needs to be copied.
+ */
+ if (lbn < NDADDR) {
+ blkno = ip->i_db[lbn];
+ } else {
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+ p->p_flag |= P_COWINPROGRESS;
+ error = VOP_BALLOC(vp, lblktosize(fs, (off_t)lbn),
+ fs->fs_bsize, KERNCRED, B_METAONLY, &ibp);
+ p->p_flag &= ~P_COWINPROGRESS;
+ VOP_UNLOCK(vp, 0, p);
+ if (error)
+ break;
+ indiroff = (lbn - NDADDR) % NINDIR(fs);
+ blkno = ((ufs_daddr_t *)(ibp->b_data))[indiroff];
+ brelse(ibp);
+ }
+#ifdef DIAGNOSTIC
+ if (blkno == BLK_SNAP && bp->b_lblkno >= 0)
+ panic("ffs_copyonwrite: bad copy block");
+#endif
+ if (blkno != 0)
+ continue;
+ /*
+ * Allocate the block into which to do the copy. Note that this
+ * allocation will never require any additional allocations for
+ * the snapshot inode.
+ */
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+ p->p_flag |= P_COWINPROGRESS;
+ error = VOP_BALLOC(vp, lblktosize(fs, (off_t)lbn),
+ fs->fs_bsize, KERNCRED, 0, &cbp);
+ p->p_flag &= ~P_COWINPROGRESS;
+ VOP_UNLOCK(vp, 0, p);
+#ifdef DEBUG
+ if (snapdebug) {
+ printf("Copyonwrite: snapino %d lbn %d for ",
+ ip->i_number, lbn);
+ if (bp->b_vp == ap->a_vp)
+ printf("fs metadata");
+ else
+ printf("inum %d", VTOI(bp->b_vp)->i_number);
+ printf(" lblkno %d to blkno %d\n", bp->b_lblkno,
+ cbp->b_blkno);
+ }
+#endif
+ if (error)
+ break;
+ /*
+ * If we have already read the old block contents, then
+ * simply copy them to the new block.
+ */
+ if (savedcbp != 0) {
+ bcopy(savedcbp->b_data, cbp->b_data, fs->fs_bsize);
+ bawrite(cbp);
+ continue;
+ }
+ /*
+ * Otherwise, read the old block contents into the buffer.
+ */
+ if ((error = readblock(cbp, lbn)) != 0)
+ break;
+ savedcbp = cbp;
+ }
+ if (savedcbp)
+ bawrite(savedcbp);
+ return (error);
+}
+
+/*
+ * Read the specified block into the given buffer.
+ * Much of this boiler-plate comes from bwrite().
+ */
+static int
+readblock(bp, lbn)
+ struct buf *bp;
+ daddr_t lbn;
+{
+ struct uio auio;
+ struct iovec aiov;
+ struct proc *p = CURPROC;
+ struct inode *ip = VTOI(bp->b_vp);
+
+ aiov.iov_base = bp->b_data;
+ aiov.iov_len = bp->b_bcount;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = dbtob(fsbtodb(ip->i_fs, blkstofrags(ip->i_fs, lbn)));
+ auio.uio_resid = bp->b_bcount;
+ auio.uio_rw = UIO_READ;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_procp = p;
+ return (physio(ip->i_devvp->v_rdev, &auio, 0));
+}
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index 40e9669..d9e6414 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -508,7 +508,7 @@ softdep_process_worklist(matchmnt)
{
struct proc *p = CURPROC;
struct worklist *wk;
- struct fs *matchfs;
+ struct mount *mp;
int matchcnt, loopcount;
/*
@@ -517,9 +517,6 @@ softdep_process_worklist(matchmnt)
*/
filesys_syncer = p;
matchcnt = 0;
- matchfs = NULL;
- if (matchmnt != NULL)
- matchfs = VFSTOUFS(matchmnt)->um_fs;
/*
* There is no danger of having multiple processes run this
* code. It is single threaded solely so that softdep_flushfiles
@@ -550,30 +547,42 @@ softdep_process_worklist(matchmnt)
case D_DIRREM:
/* removal of a directory entry */
- if (WK_DIRREM(wk)->dm_mnt == matchmnt)
+ mp = WK_DIRREM(wk)->dm_mnt;
+ if (mp == matchmnt)
matchcnt += 1;
+ vn_start_write(NULL, &mp, V_WAIT);
handle_workitem_remove(WK_DIRREM(wk));
+ vn_finished_write(mp);
break;
case D_FREEBLKS:
/* releasing blocks and/or fragments from a file */
- if (WK_FREEBLKS(wk)->fb_fs == matchfs)
+ mp = WK_FREEBLKS(wk)->fb_mnt;
+ if (mp == matchmnt)
matchcnt += 1;
+ vn_start_write(NULL, &mp, V_WAIT);
handle_workitem_freeblocks(WK_FREEBLKS(wk));
+ vn_finished_write(mp);
break;
case D_FREEFRAG:
/* releasing a fragment when replaced as a file grows */
- if (WK_FREEFRAG(wk)->ff_fs == matchfs)
+ mp = WK_FREEFRAG(wk)->ff_mnt;
+ if (mp == matchmnt)
matchcnt += 1;
+ vn_start_write(NULL, &mp, V_WAIT);
handle_workitem_freefrag(WK_FREEFRAG(wk));
+ vn_finished_write(mp);
break;
case D_FREEFILE:
/* releasing an inode when its link count drops to 0 */
- if (WK_FREEFILE(wk)->fx_fs == matchfs)
+ mp = WK_FREEFILE(wk)->fx_mnt;
+ if (mp == matchmnt)
matchcnt += 1;
+ vn_start_write(NULL, &mp, V_WAIT);
handle_workitem_freefile(WK_FREEFILE(wk));
+ vn_finished_write(mp);
break;
default:
@@ -1316,7 +1325,7 @@ newfreefrag(ip, blkno, size)
freefrag->ff_list.wk_type = D_FREEFRAG;
freefrag->ff_state = ip->i_uid & ~ONWORKLIST; /* XXX - used below */
freefrag->ff_inum = ip->i_number;
- freefrag->ff_fs = fs;
+ freefrag->ff_mnt = ITOV(ip)->v_mount;
freefrag->ff_devvp = ip->i_devvp;
freefrag->ff_blkno = blkno;
freefrag->ff_fragsize = size;
@@ -1333,7 +1342,8 @@ handle_workitem_freefrag(freefrag)
{
struct inode tip;
- tip.i_fs = freefrag->ff_fs;
+ tip.i_vnode = NULL;
+ tip.i_fs = VFSTOUFS(freefrag->ff_mnt)->um_fs;
tip.i_devvp = freefrag->ff_devvp;
tip.i_dev = freefrag->ff_devvp->v_rdev;
tip.i_number = freefrag->ff_inum;
@@ -1601,7 +1611,7 @@ softdep_setup_freeblocks(ip, length)
freeblks->fb_uid = ip->i_uid;
freeblks->fb_previousinum = ip->i_number;
freeblks->fb_devvp = ip->i_devvp;
- freeblks->fb_fs = fs;
+ freeblks->fb_mnt = ITOV(ip)->v_mount;
freeblks->fb_oldsize = ip->i_size;
freeblks->fb_newsize = length;
freeblks->fb_chkcnt = ip->i_blocks;
@@ -1845,7 +1855,7 @@ softdep_freefile(pvp, ino, mode)
freefile->fx_mode = mode;
freefile->fx_oldinum = ino;
freefile->fx_devvp = ip->i_devvp;
- freefile->fx_fs = ip->i_fs;
+ freefile->fx_mnt = ITOV(ip)->v_mount;
/*
* If the inodedep does not exist, then the zero'ed inode has
@@ -1949,13 +1959,13 @@ handle_workitem_freeblocks(freeblks)
int error, allerror = 0;
ufs_lbn_t baselbns[NIADDR], tmpval;
+ tip.i_fs = fs = VFSTOUFS(freeblks->fb_mnt)->um_fs;
tip.i_number = freeblks->fb_previousinum;
tip.i_devvp = freeblks->fb_devvp;
tip.i_dev = freeblks->fb_devvp->v_rdev;
- tip.i_fs = freeblks->fb_fs;
tip.i_size = freeblks->fb_oldsize;
tip.i_uid = freeblks->fb_uid;
- fs = freeblks->fb_fs;
+ tip.i_vnode = NULL;
tmpval = 1;
baselbns[0] = NDADDR;
for (i = 1; i < NIADDR; i++) {
@@ -2715,20 +2725,23 @@ static void
handle_workitem_freefile(freefile)
struct freefile *freefile;
{
+ struct fs *fs;
struct vnode vp;
struct inode tip;
struct inodedep *idp;
int error;
+ fs = VFSTOUFS(freefile->fx_mnt)->um_fs;
#ifdef DEBUG
ACQUIRE_LOCK(&lk);
- if (inodedep_lookup(freefile->fx_fs, freefile->fx_oldinum, 0, &idp))
+ if (inodedep_lookup(fs, freefile->fx_oldinum, 0, &idp))
panic("handle_workitem_freefile: inodedep survived");
FREE_LOCK(&lk);
#endif
tip.i_devvp = freefile->fx_devvp;
tip.i_dev = freefile->fx_devvp->v_rdev;
- tip.i_fs = freefile->fx_fs;
+ tip.i_fs = fs;
+ tip.i_vnode = &vp;
vp.v_data = &tip;
if ((error = ffs_freefile(&vp, freefile->fx_oldinum, freefile->fx_mode)) != 0)
softdep_error("handle_workitem_freefile", error);
@@ -4419,14 +4432,18 @@ clear_remove(p)
mp = pagedep->pd_mnt;
ino = pagedep->pd_ino;
FREE_LOCK(&lk);
+ if (vn_start_write(NULL, &mp, V_WAIT | PCATCH) != 0)
+ return;
if ((error = VFS_VGET(mp, ino, &vp)) != 0) {
softdep_error("clear_remove: vget", error);
+ vn_finished_write(mp);
return;
}
if ((error = VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT, p)))
softdep_error("clear_remove: fsync", error);
drain_output(vp, 0);
vput(vp);
+ vn_finished_write(mp);
return;
}
}
@@ -4486,8 +4503,11 @@ clear_inodedeps(p)
if (inodedep_lookup(fs, ino, 0, &inodedep) == 0)
continue;
FREE_LOCK(&lk);
+ if (vn_start_write(NULL, &mp, V_WAIT | PCATCH) != 0)
+ return;
if ((error = VFS_VGET(mp, ino, &vp)) != 0) {
softdep_error("clear_inodedeps: vget", error);
+ vn_finished_write(mp);
return;
}
if (ino == lastino) {
@@ -4499,6 +4519,7 @@ clear_inodedeps(p)
drain_output(vp, 0);
}
vput(vp);
+ vn_finished_write(mp);
ACQUIRE_LOCK(&lk);
}
FREE_LOCK(&lk);
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
index 89ff6d3..5280181 100644
--- a/sys/ufs/ffs/ffs_vfsops.c
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -133,7 +133,7 @@ VFS_SET(ufs_vfsops, ufs, 0);
* namei() if it is a genuine NULL from the user.
*/
static int
-ffs_mount( mp, path, data, ndp, p)
+ffs_mount(mp, path, data, ndp, p)
struct mount *mp; /* mount struct pointer*/
char *path; /* path to mount point*/
caddr_t data; /* arguments to FS specific mount*/
@@ -141,49 +141,34 @@ ffs_mount( mp, path, data, ndp, p)
struct proc *p; /* process requesting mount*/
{
size_t size;
- int err = 0;
struct vnode *devvp;
-
struct ufs_args args;
struct ufsmount *ump = 0;
register struct fs *fs;
- int error, flags, ronly = 0;
+ int error, flags;
mode_t accessmode;
/*
- * Use NULL path to flag a root mount
+ * Use NULL path to indicate we are mounting the root file system.
*/
- if( path == NULL) {
- /*
- ***
- * Mounting root file system
- ***
- */
-
- if ((err = bdevvp(rootdev, &rootvp))) {
+ if (path == NULL) {
+ if ((error = bdevvp(rootdev, &rootvp))) {
printf("ffs_mountroot: can't find rootvp\n");
- return (err);
- }
-
- if( ( err = ffs_mountfs(rootvp, mp, p, M_FFSNODE)) != 0) {
- /* fs specific cleanup (if any)*/
- goto error_1;
+ return (error);
}
- goto dostatfs; /* success*/
+ if ((error = ffs_mountfs(rootvp, mp, p, M_FFSNODE)) != 0)
+ return (error);
+ (void)VFS_STATFS(mp, &mp->mnt_stat, p);
+ return (0);
}
/*
- ***
* Mounting non-root file system or updating a file system
- ***
*/
-
- /* copy in user arguments*/
- err = copyin(data, (caddr_t)&args, sizeof (struct ufs_args));
- if (err)
- goto error_1; /* can't get arguments*/
+ if ((error = copyin(data, (caddr_t)&args, sizeof(struct ufs_args)))!= 0)
+ return (error);
/*
* If updating, check whether changing from read-only to
@@ -193,25 +178,36 @@ ffs_mount( mp, path, data, ndp, p)
ump = VFSTOUFS(mp);
fs = ump->um_fs;
devvp = ump->um_devvp;
- err = 0;
- ronly = fs->fs_ronly; /* MNT_RELOAD might change this */
- if (ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
+ if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
+ if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
+ return (error);
flags = WRITECLOSE;
if (mp->mnt_flag & MNT_FORCE)
flags |= FORCECLOSE;
if (mp->mnt_flag & MNT_SOFTDEP) {
- err = softdep_flushfiles(mp, flags, p);
+ error = softdep_flushfiles(mp, flags, p);
} else {
- err = ffs_flushfiles(mp, flags, p);
+ error = ffs_flushfiles(mp, flags, p);
}
- ronly = 1;
- }
- if (!err && (mp->mnt_flag & MNT_RELOAD))
- err = ffs_reload(mp, ndp->ni_cnd.cn_cred, p);
- if (err) {
- goto error_1;
+ if (error) {
+ vn_finished_write(mp);
+ return (error);
+ }
+ fs->fs_ronly = 1;
+ if ((fs->fs_flags & FS_UNCLEAN) == 0)
+ fs->fs_clean = 1;
+ if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
+ fs->fs_ronly = 0;
+ fs->fs_clean = 0;
+ vn_finished_write(mp);
+ return (error);
+ }
+ vn_finished_write(mp);
}
- if (ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
+ if ((mp->mnt_flag & MNT_RELOAD) &&
+ (error = ffs_reload(mp, ndp->ni_cnd.cn_cred, p)) != 0)
+ return (error);
+ if (fs->fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
/*
* If upgrade to read-write by non-root, then verify
* that user has necessary permissions on the device.
@@ -225,31 +221,36 @@ ffs_mount( mp, path, data, ndp, p)
}
VOP_UNLOCK(devvp, 0, p);
}
-
fs->fs_flags &= ~FS_UNCLEAN;
if (fs->fs_clean == 0) {
fs->fs_flags |= FS_UNCLEAN;
if (mp->mnt_flag & MNT_FORCE) {
- printf(
-"WARNING: %s was not properly dismounted\n",
- fs->fs_fsmnt);
+ printf("WARNING: %s was not %s\n",
+ fs->fs_fsmnt, "properly dismounted");
} else {
printf(
"WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n",
fs->fs_fsmnt);
- err = EPERM;
- goto error_1;
+ return (EPERM);
}
}
-
+ if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0)
+ return (error);
+ fs->fs_ronly = 0;
+ fs->fs_clean = 0;
+ if ((error = ffs_sbupdate(ump, MNT_WAIT)) != 0) {
+ vn_finished_write(mp);
+ return (error);
+ }
/* check to see if we need to start softdep */
- if (fs->fs_flags & FS_DOSOFTDEP) {
- err = softdep_mount(devvp, mp, fs, p->p_ucred);
- if (err)
- goto error_1;
+ if ((fs->fs_flags & FS_DOSOFTDEP) &&
+ (error = softdep_mount(devvp, mp, fs, p->p_ucred))){
+ vn_finished_write(mp);
+ return (error);
}
-
- ronly = 0;
+ if (fs->fs_snapinum[0] != 0)
+ ffs_snapshot_mount(mp);
+ vn_finished_write(mp);
}
/*
* Soft updates is incompatible with "async",
@@ -258,18 +259,18 @@ ffs_mount( mp, path, data, ndp, p)
* Softdep_mount() clears it in an initial mount
* or ro->rw remount.
*/
- if (mp->mnt_flag & MNT_SOFTDEP) {
+ if (mp->mnt_flag & MNT_SOFTDEP)
mp->mnt_flag &= ~MNT_ASYNC;
- }
- /* if not updating name...*/
- if (args.fspec == 0) {
- /*
- * Process export requests. Jumping to "success"
- * will return the vfs_export() error code.
- */
- err = vfs_export(mp, &ump->um_export, &args.export);
- goto success;
- }
+ /*
+ * If not updating name, process export requests.
+ */
+ if (args.fspec == 0)
+ return (vfs_export(mp, &ump->um_export, &args.export));
+ /*
+ * If this is a snapshot request, take the snapshot.
+ */
+ if (mp->mnt_flag & MNT_SNAPSHOT)
+ return (ffs_snapshot(mp, args.fspec));
}
/*
@@ -277,17 +278,14 @@ ffs_mount( mp, path, data, ndp, p)
* and verify that it refers to a sensible block device.
*/
NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
- err = namei(ndp);
- if (err) {
- /* can't get devvp!*/
- goto error_1;
- }
-
+ if ((error = namei(ndp)) != 0)
+ return (error);
NDFREE(ndp, NDF_ONLY_PNBUF);
devvp = ndp->ni_vp;
-
- if (!vn_isdisk(devvp, &err))
- goto error_2;
+ if (!vn_isdisk(devvp, &error)) {
+ vrele(devvp);
+ return (error);
+ }
/*
* If mount by non-root, then verify that user has necessary
@@ -298,7 +296,7 @@ ffs_mount( mp, path, data, ndp, p)
if ((mp->mnt_flag & MNT_RDONLY) == 0)
accessmode |= VWRITE;
vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
- if ((error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p)) != 0) {
+ if ((error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p))!= 0){
vput(devvp);
return (error);
}
@@ -307,96 +305,43 @@ ffs_mount( mp, path, data, ndp, p)
if (mp->mnt_flag & MNT_UPDATE) {
/*
- ********************
- * UPDATE
+ * Update only
+ *
* If it's not the same vnode, or at least the same device
* then it's not correct.
- ********************
*/
- if (devvp != ump->um_devvp) {
- if ( devvp->v_rdev == ump->um_devvp->v_rdev) {
- vrele(devvp);
- } else {
- err = EINVAL; /* needs translation */
- }
- } else
- vrele(devvp);
- /*
- * Update device name only on success
- */
- if( !err) {
- /* Save "mounted from" info for mount point (NULL pad)*/
- copyinstr( args.fspec,
- mp->mnt_stat.f_mntfromname,
- MNAMELEN - 1,
- &size);
- bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
- }
+ if (devvp != ump->um_devvp &&
+ devvp->v_rdev != ump->um_devvp->v_rdev)
+ error = EINVAL; /* needs translation */
+ vrele(devvp);
+ if (error)
+ return (error);
} else {
/*
- ********************
- * NEW MOUNT
- ********************
+ * New mount
+ *
+ * We need the name for the mount point (also used for
+ * "last mounted on") copied in. If an error occurs,
+ * the mount point is discarded by the upper level code.
*/
-
- /*
- * Since this is a new mount, we want the names for
- * the device and the mount point copied in. If an
- * error occurs, the mountpoint is discarded by the
- * upper level code.
- */
- /* Save "last mounted on" info for mount point (NULL pad)*/
- copyinstr( path, /* mount point*/
- mp->mnt_stat.f_mntonname, /* save area*/
- MNAMELEN - 1, /* max size*/
- &size); /* real size*/
+ copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
bzero( mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
-
- /* Save "mounted from" info for mount point (NULL pad)*/
- copyinstr( args.fspec, /* device name*/
- mp->mnt_stat.f_mntfromname, /* save area*/
- MNAMELEN - 1, /* max size*/
- &size); /* real size*/
- bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
-
- err = ffs_mountfs(devvp, mp, p, M_FFSNODE);
- }
- if (err) {
- goto error_2;
+ if ((error = ffs_mountfs(devvp, mp, p, M_FFSNODE)) != 0) {
+ vrele(devvp);
+ return (error);
+ }
}
-
-dostatfs:
/*
- * Initialize FS stat information in mount struct; uses both
- * mp->mnt_stat.f_mntonname and mp->mnt_stat.f_mntfromname
- *
- * This code is common to root and non-root mounts
+ * Save "mounted from" device name info for mount point (NULL pad).
+ */
+ copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
+ bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+ /*
+ * Initialize filesystem stat information in mount struct.
*/
(void)VFS_STATFS(mp, &mp->mnt_stat, p);
-
- goto success;
-
-
-error_2: /* error with devvp held*/
-
- /* release devvp before failing*/
- vrele(devvp);
-
-error_1: /* no state to back out*/
-
-success:
- if (!err && path && (mp->mnt_flag & MNT_UPDATE)) {
- /* Update clean flag after changing read-onlyness. */
- fs = ump->um_fs;
- if (ronly != fs->fs_ronly) {
- fs->fs_ronly = ronly;
- fs->fs_clean = ronly &&
- (fs->fs_flags & FS_UNCLEAN) == 0 ? 1 : 0;
- ffs_sbupdate(ump, MNT_WAIT);
- }
- }
- return (err);
+ return (0);
}
/*
@@ -478,7 +423,7 @@ ffs_reload(mp, cred, p)
newfs->fs_maxcluster = fs->fs_maxcluster;
bcopy(newfs, fs, (u_int)fs->fs_sbsize);
if (fs->fs_sbsize < SBSIZE)
- bp->b_flags |= B_INVAL;
+ bp->b_flags |= B_INVAL | B_NOCACHE;
brelse(bp);
mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
ffs_oldfscompat(fs);
@@ -670,7 +615,7 @@ ffs_mountfs(devvp, mp, p, malloctype)
ump->um_vfree = ffs_vfree;
bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
if (fs->fs_sbsize < SBSIZE)
- bp->b_flags |= B_INVAL;
+ bp->b_flags |= B_INVAL | B_NOCACHE;
brelse(bp);
bp = NULL;
fs = ump->um_fs;
@@ -750,6 +695,8 @@ ffs_mountfs(devvp, mp, p, malloctype)
free(base, M_UFSMNT);
goto out;
}
+ if (fs->fs_snapinum[0] != 0)
+ ffs_snapshot_mount(mp);
fs->fs_fmod = 1;
fs->fs_clean = 0;
(void) ffs_sbupdate(ump, MNT_WAIT);
@@ -886,6 +833,15 @@ ffs_flushfiles(mp, flags, p)
*/
}
#endif
+ if (ump->um_devvp->v_flag & VCOPYONWRITE) {
+ if ((error = vflush(mp, NULL, SKIPSYSTEM | flags)) != 0)
+ return (error);
+ ffs_snapshot_unmount(mp);
+ /*
+ * Here we fall through to vflush again to ensure
+ * that we have gotten rid of all the system vnodes.
+ */
+ }
/*
* Flush all the files.
*/
diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c
index 539f302..eb6d621 100644
--- a/sys/ufs/ffs/ffs_vnops.c
+++ b/sys/ufs/ffs/ffs_vnops.c
@@ -95,6 +95,7 @@ vop_t **ffs_specop_p;
static struct vnodeopv_entry_desc ffs_specop_entries[] = {
{ &vop_default_desc, (vop_t *) ufs_vnoperatespec },
{ &vop_fsync_desc, (vop_t *) ffs_fsync },
+ { &vop_copyonwrite_desc, (vop_t *) ffs_copyonwrite },
{ NULL, NULL }
};
static struct vnodeopv_desc ffs_specop_opv_desc =
@@ -129,11 +130,20 @@ ffs_fsync(ap)
} */ *ap;
{
struct vnode *vp = ap->a_vp;
+ struct inode *ip = VTOI(vp);
struct buf *bp;
struct buf *nbp;
int s, error, wait, passes, skipmeta;
daddr_t lbn;
+ /*
+ * Snapshots have to be unlocked so they do not deadlock
+ * checking whether they need to copy their written buffers.
+ * We always hold a reference, so they cannot be removed
+ * out from underneath us.
+ */
+ if (ip->i_flags & SF_SNAPSHOT)
+ VOP_UNLOCK(vp, 0, ap->a_p);
wait = (ap->a_waitfor == MNT_WAIT);
if (vn_isdisk(vp, NULL)) {
lbn = INT_MAX;
@@ -141,8 +151,6 @@ ffs_fsync(ap)
(vp->v_specmountpoint->mnt_flag & MNT_SOFTDEP))
softdep_fsync_mountdev(vp);
} else {
- struct inode *ip;
- ip = VTOI(vp);
lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1));
}
@@ -279,5 +287,7 @@ loop:
}
splx(s);
error = UFS_UPDATE(vp, wait);
+ if (ip->i_flags & SF_SNAPSHOT)
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p);
return (error);
}
diff --git a/sys/ufs/ffs/softdep.h b/sys/ufs/ffs/softdep.h
index 1908a3e..cf9cac8 100644
--- a/sys/ufs/ffs/softdep.h
+++ b/sys/ufs/ffs/softdep.h
@@ -382,7 +382,7 @@ struct freefrag {
struct worklist ff_list; /* id_inowait or delayed worklist */
# define ff_state ff_list.wk_state /* owning user; should be uid_t */
struct vnode *ff_devvp; /* filesystem device vnode */
- struct fs *ff_fs; /* addr of superblock */
+ struct mount *ff_mnt; /* associated mount point */
ufs_daddr_t ff_blkno; /* fragment physical block number */
long ff_fragsize; /* size of fragment being deleted */
ino_t ff_inum; /* owning inode number */
@@ -398,7 +398,7 @@ struct freeblks {
struct worklist fb_list; /* id_inowait or delayed worklist */
ino_t fb_previousinum; /* inode of previous owner of blocks */
struct vnode *fb_devvp; /* filesystem device vnode */
- struct fs *fb_fs; /* addr of superblock */
+ struct mount *fb_mnt; /* associated mount point */
off_t fb_oldsize; /* previous file size */
off_t fb_newsize; /* new file size */
int fb_chkcnt; /* used to check cnt of blks released */
@@ -418,7 +418,7 @@ struct freefile {
mode_t fx_mode; /* mode of inode */
ino_t fx_oldinum; /* inum of the unlinked file */
struct vnode *fx_devvp; /* filesystem device vnode */
- struct fs *fx_fs; /* addr of superblock */
+ struct mount *fx_mnt; /* associated mount point */
};
/*
diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h
index 83960b0..6417a10 100644
--- a/sys/ufs/ufs/inode.h
+++ b/sys/ufs/ufs/inode.h
@@ -84,6 +84,7 @@ struct inode {
struct dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */
u_quad_t i_modrev; /* Revision level for NFS lease. */
struct lockf *i_lockf;/* Head of byte-level lock list. */
+ struct inode *i_copyonwrite; /* copy-on-write list */
/*
* Side effects; used during directory lookup.
*/
diff --git a/sys/ufs/ufs/ufs_bmap.c b/sys/ufs/ufs/ufs_bmap.c
index 9056340..ab4ac52 100644
--- a/sys/ufs/ufs/ufs_bmap.c
+++ b/sys/ufs/ufs/ufs_bmap.c
@@ -47,6 +47,7 @@
#include <sys/vnode.h>
#include <sys/mount.h>
#include <sys/resourcevar.h>
+#include <sys/stat.h>
#include <ufs/ufs/extattr.h>
#include <ufs/ufs/quota.h>
@@ -115,7 +116,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
struct indir a[NIADDR+1], *xap;
ufs_daddr_t daddr;
long metalbn;
- int error, maxrun, num;
+ int error, num, maxrun = 0;
ip = VTOI(vp);
mp = vp->v_mount;
@@ -127,6 +128,7 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
#endif
if (runp) {
+ maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1;
*runp = 0;
}
@@ -134,7 +136,6 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
*runb = 0;
}
- maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1;
xap = ap == NULL ? a : ap;
if (!nump)
@@ -146,9 +147,12 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
num = *nump;
if (num == 0) {
*bnp = blkptrtodb(ump, ip->i_db[bn]);
- if (*bnp == 0)
- *bnp = -1;
- else if (runp) {
+ if (*bnp == 0) {
+ if (ip->i_flags & SF_SNAPSHOT)
+ *bnp = blkptrtodb(ump, bn * ump->um_seqinc);
+ else
+ *bnp = -1;
+ } else if (runp) {
daddr_t bnb = bn;
for (++bn; bn < NDADDR && *runp < maxrun &&
is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]);
@@ -226,8 +230,13 @@ ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
if (bp)
bqrelse(bp);
- daddr = blkptrtodb(ump, daddr);
- *bnp = daddr == 0 ? -1 : daddr;
+ *bnp = blkptrtodb(ump, daddr);
+ if (*bnp == 0) {
+ if (ip->i_flags & SF_SNAPSHOT)
+ *bnp = blkptrtodb(ump, bn * ump->um_seqinc);
+ else
+ *bnp = -1;
+ }
return (0);
}
diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h
index d576be9..b740792 100644
--- a/sys/ufs/ufs/ufs_extern.h
+++ b/sys/ufs/ufs/ufs_extern.h
@@ -87,6 +87,7 @@ int ufs_init __P((struct vfsconf *));
void ufs_itimes __P((struct vnode *vp));
int ufs_lookup __P((struct vop_cachedlookup_args *));
int ufs_reclaim __P((struct vop_reclaim_args *));
+void ffs_snapremove __P((struct vnode *vp));
int ufs_root __P((struct mount *, struct vnode **));
int ufs_start __P((struct mount *, int, struct proc *));
int ufs_vinit __P((struct mount *, vop_t **, vop_t **, struct vnode **));
diff --git a/sys/ufs/ufs/ufs_inode.c b/sys/ufs/ufs/ufs_inode.c
index 507e716..485a6d2 100644
--- a/sys/ufs/ufs/ufs_inode.c
+++ b/sys/ufs/ufs/ufs_inode.c
@@ -77,6 +77,7 @@ ufs_inactive(ap)
if (ip->i_mode == 0)
goto out;
if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
+ (void) vn_write_suspend_wait(vp, V_WAIT);
#ifdef QUOTA
if (!getinoquota(ip))
(void)chkiq(ip, -1, NOCRED, 0);
@@ -91,8 +92,15 @@ ufs_inactive(ap)
ip->i_flag |= IN_CHANGE | IN_UPDATE;
UFS_VFREE(vp, ip->i_number, mode);
}
- if (ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE))
- UFS_UPDATE(vp, 0);
+ if (ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) {
+ if ((ip->i_flag & (IN_CHANGE | IN_UPDATE | IN_MODIFIED)) == 0 &&
+ vn_write_suspend_wait(vp, V_NOWAIT)) {
+ ip->i_flag &= ~IN_ACCESS;
+ } else {
+ (void) vn_write_suspend_wait(vp, V_WAIT);
+ UFS_UPDATE(vp, 0);
+ }
+ }
out:
VOP_UNLOCK(vp, 0, p);
/*
diff --git a/sys/ufs/ufs/ufs_quota.c b/sys/ufs/ufs/ufs_quota.c
index 574a330..6396f67 100644
--- a/sys/ufs/ufs/ufs_quota.c
+++ b/sys/ufs/ufs/ufs_quota.c
@@ -889,6 +889,7 @@ dqsync(vp, dq)
struct vnode *dqvp;
struct iovec aiov;
struct uio auio;
+ struct mount *mp;
int error;
if (dq == NODQUOT)
@@ -897,6 +898,7 @@ dqsync(vp, dq)
return (0);
if ((dqvp = dq->dq_ump->um_quotas[dq->dq_type]) == NULLVP)
panic("dqsync: file");
+ (void) vn_write_suspend_wait(dqvp, V_WAIT);
if (vp != dqvp)
vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY, p);
while (dq->dq_flags & DQ_LOCK) {
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index e3b6e29..d97568c 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -702,6 +702,8 @@ ufs_remove(ap)
int error;
ip = VTOI(vp);
+ if ((ip->i_flags & SF_SNAPSHOT) != 0)
+ ffs_snapremove(vp);
if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
(VTOI(dvp)->i_flags & APPEND)) {
error = EPERM;
@@ -2215,6 +2217,7 @@ static struct vnodeopv_entry_desc ufs_vnodeop_entries[] = {
{ &vop_open_desc, (vop_t *) ufs_open },
{ &vop_pathconf_desc, (vop_t *) ufs_pathconf },
{ &vop_poll_desc, (vop_t *) vop_stdpoll },
+ { &vop_getwritemount_desc, (vop_t *) vop_stdgetwritemount },
{ &vop_print_desc, (vop_t *) ufs_print },
{ &vop_readdir_desc, (vop_t *) ufs_readdir },
{ &vop_readlink_desc, (vop_t *) ufs_readlink },
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index 307dd0b..97b221e 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -777,6 +777,7 @@ rescan0:
int written;
int swap_pageouts_ok;
struct vnode *vp = NULL;
+ struct mount *mp;
object = m->object;
@@ -853,9 +854,13 @@ rescan0:
if (object->type == OBJT_VNODE) {
vp = object->handle;
+ mp = NULL;
+ if (vp->v_type == VREG)
+ vn_start_write(vp, &mp, V_NOWAIT);
if (VOP_ISLOCKED(vp, NULL) ||
vp->v_data == NULL ||
vget(vp, LK_EXCLUSIVE|LK_NOOBJ, curproc)) {
+ vn_finished_write(mp);
if ((m->queue == PQ_INACTIVE) &&
(m->hold_count == 0) &&
(m->busy == 0) &&
@@ -878,6 +883,7 @@ rescan0:
if (object->flags & OBJ_MIGHTBEDIRTY)
vnodes_skipped++;
vput(vp);
+ vn_finished_write(mp);
continue;
}
@@ -888,6 +894,7 @@ rescan0:
*/
if (m->busy || (m->flags & PG_BUSY)) {
vput(vp);
+ vn_finished_write(mp);
continue;
}
@@ -902,6 +909,7 @@ rescan0:
if (object->flags & OBJ_MIGHTBEDIRTY)
vnodes_skipped++;
vput(vp);
+ vn_finished_write(mp);
continue;
}
}
@@ -913,8 +921,10 @@ rescan0:
* start the cleaning operation.
*/
written = vm_pageout_clean(m);
- if (vp)
+ if (vp) {
vput(vp);
+ vn_finished_write(mp);
+ }
maxlaunder -= written;
}
diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c
index 2633426..3dd12ec 100644
--- a/sys/vm/vnode_pager.c
+++ b/sys/vm/vnode_pager.c
@@ -850,6 +850,7 @@ vnode_pager_putpages(object, m, count, sync, rtvals)
{
int rtval;
struct vnode *vp;
+ struct mount *mp;
int bytes = count * PAGE_SIZE;
/*
@@ -872,11 +873,15 @@ vnode_pager_putpages(object, m, count, sync, rtvals)
*/
vp = object->handle;
+ if (vp->v_type != VREG)
+ mp = NULL;
+ (void)vn_start_write(vp, &mp, V_WAIT);
rtval = VOP_PUTPAGES(vp, m, bytes, sync, rtvals, 0);
if (rtval == EOPNOTSUPP) {
printf("vnode_pager: *** WARNING *** stale FS putpages\n");
rtval = vnode_pager_generic_putpages( vp, m, bytes, sync, rtvals);
}
+ vn_finished_write(mp);
}
OpenPOWER on IntegriCloud