diff options
Diffstat (limited to 'sys/kern')
-rw-r--r-- | sys/kern/kern_ktrace.c | 5 | ||||
-rw-r--r-- | sys/kern/kern_sig.c | 11 | ||||
-rw-r--r-- | sys/kern/tty_tty.c | 6 | ||||
-rw-r--r-- | sys/kern/uipc_usrreq.c | 16 | ||||
-rw-r--r-- | sys/kern/vfs_bio.c | 6 | ||||
-rw-r--r-- | sys/kern/vfs_default.c | 15 | ||||
-rw-r--r-- | sys/kern/vfs_export.c | 29 | ||||
-rw-r--r-- | sys/kern/vfs_extattr.c | 316 | ||||
-rw-r--r-- | sys/kern/vfs_subr.c | 29 | ||||
-rw-r--r-- | sys/kern/vfs_syscalls.c | 316 | ||||
-rw-r--r-- | sys/kern/vfs_vnops.c | 164 | ||||
-rw-r--r-- | sys/kern/vnode_if.src | 16 |
12 files changed, 725 insertions, 204 deletions
diff --git a/sys/kern/kern_ktrace.c b/sys/kern/kern_ktrace.c index d914fc2..b0530f9 100644 --- a/sys/kern/kern_ktrace.c +++ b/sys/kern/kern_ktrace.c @@ -457,7 +457,8 @@ ktrwrite(vp, kth, uio) { struct uio auio; struct iovec aiov[2]; - register struct proc *p = curproc; /* XXX */ + struct proc *p = curproc; /* XXX */ + struct mount *mp; int error; if (vp == NULL) @@ -479,6 +480,7 @@ ktrwrite(vp, kth, uio) if (uio != NULL) kth->ktr_len += uio->uio_resid; } + vn_start_write(vp, &mp, V_WAIT); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); (void)VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, p->p_ucred); @@ -487,6 +489,7 @@ ktrwrite(vp, kth, uio) error = VOP_WRITE(vp, uio, IO_UNIT | IO_APPEND, p->p_ucred); } VOP_UNLOCK(vp, 0, p); + vn_finished_write(mp); if (!error) return; /* diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index e96f471..2d87b63 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -1599,6 +1599,7 @@ coredump(p) struct nameidata nd; struct vattr vattr; int error, error1, flags; + struct mount *mp; char *name; /* name of corefile */ off_t limit; @@ -1619,6 +1620,7 @@ coredump(p) if (limit == 0) return 0; +restart: name = expand_name(p->p_comm, p->p_ucred->cr_uid, p->p_pid); NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, name, p); flags = O_CREAT | FWRITE | O_NOFOLLOW; @@ -1628,6 +1630,14 @@ coredump(p) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; + if (vn_start_write(vp, &mp, V_NOWAIT) != 0) { + VOP_UNLOCK(vp, 0, p); + if ((error = vn_close(vp, FWRITE, cred, p)) != 0) + return (error); + if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) + return (error); + goto restart; + } /* Don't dump to non-regular files or files with links. */ if (vp->v_type != VREG || @@ -1647,6 +1657,7 @@ coredump(p) out: VOP_UNLOCK(vp, 0, p); + vn_finished_write(mp); error1 = vn_close(vp, FWRITE, cred, p); if (error == 0) error = error1; diff --git a/sys/kern/tty_tty.c b/sys/kern/tty_tty.c index 2d15c70..66f7a7b 100644 --- a/sys/kern/tty_tty.c +++ b/sys/kern/tty_tty.c @@ -133,13 +133,19 @@ cttywrite(dev, uio, flag) { struct proc *p = uio->uio_procp; struct vnode *ttyvp = cttyvp(uio->uio_procp); + struct mount *mp; int error; if (ttyvp == NULL) return (EIO); + mp = NULL; + if (ttyvp->v_type != VCHR && + (error = vn_start_write(ttyvp, &mp, V_WAIT | PCATCH)) != 0) + return (error); vn_lock(ttyvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_WRITE(ttyvp, uio, flag, NOCRED); VOP_UNLOCK(ttyvp, 0, p); + vn_finished_write(mp); return (error); } diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index 0103877..a0b4072 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -538,7 +538,8 @@ unp_bind(unp, nam, p) struct proc *p; { struct sockaddr_un *soun = (struct sockaddr_un *)nam; - register struct vnode *vp; + struct vnode *vp; + struct mount *mp; struct vattr vattr; int error, namelen; struct nameidata nd; @@ -552,6 +553,7 @@ unp_bind(unp, nam, p) return EINVAL; strncpy(buf, soun->sun_path, namelen); buf[namelen] = 0; /* null-terminate the string */ +restart: NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE, buf, p); /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ @@ -559,14 +561,19 @@ unp_bind(unp, nam, p) if (error) return (error); vp = nd.ni_vp; - if (vp != NULL) { + if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); - vrele(vp); - return (EADDRINUSE); + if (vp != NULL) { + vrele(vp); + return (EADDRINUSE); + } + if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) + return (error); + goto restart; } VATTR_NULL(&vattr); vattr.va_type = VSOCK; @@ -582,6 +589,7 @@ unp_bind(unp, nam, p) unp->unp_vnode = vp; unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1); VOP_UNLOCK(vp, 0, p); + vn_finished_write(mp); return (0); } diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index dba2151..96fbd63 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -1165,6 +1165,8 @@ brelse(struct buf * bp) BUF_UNLOCK(bp); bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF); bp->b_ioflags &= ~BIO_ORDERED; + if ((bp->b_flags & B_DELWRI) == 0 && (bp->b_xflags & BX_VNDIRTY)) + panic("brelse: not dirty"); splx(s); } @@ -1225,6 +1227,8 @@ bqrelse(struct buf * bp) BUF_UNLOCK(bp); bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF); bp->b_ioflags &= ~BIO_ORDERED; + if ((bp->b_flags & B_DELWRI) == 0 && (bp->b_xflags & BX_VNDIRTY)) + panic("bqrelse: not dirty"); splx(s); } @@ -1420,7 +1424,7 @@ getnewbuf(int slpflag, int slptimeo, int size, int maxsize) int isspecial; static int flushingbufs; - if (curproc && (curproc->p_flag & P_BUFEXHAUST) == 0) + if (curproc && (curproc->p_flag & (P_COWINPROGRESS|P_BUFEXHAUST)) == 0) isspecial = 0; else isspecial = 1; diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index f478aa2..00f9beb 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -500,6 +500,21 @@ vop_noislocked(ap) return (lockstatus(vp->v_vnlock, ap->a_p)); } +/* + * Return our mount point, as we will take charge of the writes. + */ +int +vop_stdgetwritemount(ap) + struct vop_getwritemount_args /* { + struct vnode *a_vp; + struct mount **a_mpp; + } */ *ap; +{ + + *(ap->a_mpp) = ap->a_vp->v_mount; + return (0); +} + /* * vfs default ops * used to fill the vfs fucntion table to get reasonable default return values. diff --git a/sys/kern/vfs_export.c b/sys/kern/vfs_export.c index 6483660..0e5ec3f 100644 --- a/sys/kern/vfs_export.c +++ b/sys/kern/vfs_export.c @@ -453,6 +453,7 @@ getnewvnode(tag, mp, vops, vpp) int s, count; struct proc *p = curproc; /* XXX */ struct vnode *vp = NULL; + struct mount *vnmp; vm_object_t object; /* @@ -491,7 +492,14 @@ getnewvnode(tag, mp, vops, vpp) vp = NULL; continue; } - break; + /* + * Skip over it if its filesystem is being suspended. + */ + if (vn_start_write(vp, &vnmp, V_NOWAIT) == 0) + break; + simple_unlock(&vp->v_interlock); + TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); + vp = NULL; } if (vp) { vp->v_flag |= VDOOMED; @@ -504,6 +512,7 @@ getnewvnode(tag, mp, vops, vpp) } else { simple_unlock(&vp->v_interlock); } + vn_finished_write(vnmp); #ifdef INVARIANTS { @@ -515,6 +524,8 @@ getnewvnode(tag, mp, vops, vpp) if (vp->v_numoutput) panic("Clean vnode has pending I/O's"); splx(s); + if (vp->v_writecount != 0) + panic("Non-zero write count"); } #endif vp->v_flag = 0; @@ -523,7 +534,6 @@ getnewvnode(tag, mp, vops, vpp) vp->v_cstart = 0; vp->v_clen = 0; vp->v_socket = 0; - vp->v_writecount = 0; /* XXX */ } else { simple_unlock(&vnode_free_list_slock); vp = (struct vnode *) zalloc(vnode_zone); @@ -946,6 +956,7 @@ sched_sync(void) { struct synclist *slp; struct vnode *vp; + struct mount *mp; long starttime; int s; struct proc *p = updateproc; @@ -970,10 +981,12 @@ sched_sync(void) splx(s); while ((vp = LIST_FIRST(slp)) != NULL) { - if (VOP_ISLOCKED(vp, NULL) == 0) { + if (VOP_ISLOCKED(vp, NULL) == 0 && + vn_start_write(vp, &mp, V_NOWAIT) == 0) { vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p); VOP_UNLOCK(vp, 0, p); + vn_finished_write(mp); } s = splbio(); if (LIST_FIRST(slp) == vp) { @@ -1386,6 +1399,7 @@ vrele(vp) struct proc *p = curproc; /* XXX */ KASSERT(vp != NULL, ("vrele: null vp")); + KASSERT(vp->v_writecount < vp->v_usecount, ("vrele: missed vn_close")); simple_lock(&vp->v_interlock); @@ -1427,6 +1441,7 @@ vput(vp) struct proc *p = curproc; /* XXX */ KASSERT(vp != NULL, ("vput: null vp")); + KASSERT(vp->v_writecount < vp->v_usecount, ("vput: missed vn_close")); simple_lock(&vp->v_interlock); @@ -1632,6 +1647,8 @@ vclean(vp, flags, p) * If the flush fails, just toss the buffers. */ if (flags & DOCLOSE) { + if (TAILQ_FIRST(&vp->v_dirtyblkhd) != NULL) + (void) vn_write_suspend_wait(vp, V_WAIT); if (vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0) != 0) vinvalbuf(vp, 0, NOCRED, p, 0, 0); } @@ -2785,12 +2802,18 @@ sync_fsync(ap) simple_unlock(&mountlist_slock); return (0); } + if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) { + vfs_unbusy(mp, p); + simple_unlock(&mountlist_slock); + return (0); + } asyncflag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; vfs_msync(mp, MNT_NOWAIT); VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p); if (asyncflag) mp->mnt_flag |= MNT_ASYNC; + vn_finished_write(mp); vfs_unbusy(mp, p); return (0); } diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c index 65a297ca..404114a 100644 --- a/sys/kern/vfs_extattr.c +++ b/sys/kern/vfs_extattr.c @@ -164,8 +164,8 @@ mount(p, uap) vput(vp); return (EOPNOTSUPP); /* Needs translation */ } - mp->mnt_flag |= - SCARG(uap, flags) & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); + mp->mnt_flag |= SCARG(uap, flags) & + (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT); /* * Only root, or the user that did the original mount is * permitted to update it. @@ -303,7 +303,8 @@ update: vrele(vp); if (mp->mnt_kern_flag & MNTK_WANTRDWR) mp->mnt_flag &= ~MNT_RDONLY; - mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); + mp->mnt_flag &=~ + (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT); mp->mnt_kern_flag &=~ MNTK_WANTRDWR; if (error) { mp->mnt_flag = flag; @@ -458,7 +459,7 @@ unmount(p, uap) */ int dounmount(mp, flags, p) - register struct mount *mp; + struct mount *mp; int flags; struct proc *p; { @@ -469,6 +470,7 @@ dounmount(mp, flags, p) simple_lock(&mountlist_slock); mp->mnt_kern_flag |= MNTK_UNMOUNT; lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock, p); + vn_start_write(NULL, &mp, V_WAIT); if (mp->mnt_flag & MNT_EXPUBLIC) vfs_setpublicfs(NULL, NULL, NULL); @@ -481,8 +483,10 @@ dounmount(mp, flags, p) vrele(mp->mnt_syncer); if (((mp->mnt_flag & MNT_RDONLY) || (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) || - (flags & MNT_FORCE)) + (flags & MNT_FORCE)) { error = VFS_UNMOUNT(mp, flags, p); + } + vn_finished_write(mp); simple_lock(&mountlist_slock); if (error) { if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL) @@ -530,7 +534,7 @@ sync(p, uap) struct proc *p; struct sync_args *uap; { - register struct mount *mp, *nmp; + struct mount *mp, *nmp; int asyncflag; simple_lock(&mountlist_slock); @@ -539,13 +543,15 @@ sync(p, uap) nmp = TAILQ_NEXT(mp, mnt_list); continue; } - if ((mp->mnt_flag & MNT_RDONLY) == 0) { + if ((mp->mnt_flag & MNT_RDONLY) == 0 && + vn_start_write(NULL, &mp, V_NOWAIT) == 0) { asyncflag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; vfs_msync(mp, MNT_NOWAIT); VFS_SYNC(mp, MNT_NOWAIT, - ((p != NULL) ? p->p_ucred : NOCRED), p); + ((p != NULL) ? p->p_ucred : NOCRED), p); mp->mnt_flag |= asyncflag; + vn_finished_write(mp); } simple_lock(&mountlist_slock); nmp = TAILQ_NEXT(mp, mnt_list); @@ -593,7 +599,7 @@ quotactl(p, uap) syscallarg(caddr_t) arg; } */ *uap; { - register struct mount *mp; + struct mount *mp; int error; struct nameidata nd; @@ -602,11 +608,15 @@ quotactl(p, uap) NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if ((error = namei(&nd)) != 0) return (error); - mp = nd.ni_vp->v_mount; NDFREE(&nd, NDF_ONLY_PNBUF); + error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH); vrele(nd.ni_vp); - return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid), - SCARG(uap, arg), p)); + if (error) + return (error); + error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid), + SCARG(uap, arg), p); + vn_finished_write(mp); + return (error); } /* @@ -972,6 +982,7 @@ open(p, uap) struct file *fp; struct vnode *vp; struct vattr vat; + struct mount *mp; int cmode, flags, oflags; struct file *nfp; int type, indx, error; @@ -1029,12 +1040,15 @@ open(p, uap) fp->f_flag |= FHASLOCK; } if (flags & O_TRUNC) { + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + goto bad; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); VATTR_NULL(&vat); vat.va_size = 0; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_SETATTR(vp, &vat, p->p_ucred, p); VOP_UNLOCK(vp, 0, p); + vn_finished_write(mp); if (error) goto bad; } @@ -1101,7 +1115,8 @@ mknod(p, uap) syscallarg(int) dev; } */ *uap; { - register struct vnode *vp; + struct vnode *vp; + struct mount *mp; struct vattr vattr; int error; int whiteout = 0; @@ -1118,14 +1133,16 @@ mknod(p, uap) } if (error) return (error); +restart: bwillwrite(); NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; - if (vp != NULL) + if (vp != NULL) { + vrele(vp); error = EEXIST; - else { + } else { VATTR_NULL(&vattr); vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask; vattr.va_rdev = SCARG(uap, dev); @@ -1149,6 +1166,13 @@ mknod(p, uap) break; } } + if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_dvp); + if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) + return (error); + goto restart; + } if (!error) { VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); if (whiteout) @@ -1159,17 +1183,10 @@ mknod(p, uap) if (error == 0) vput(nd.ni_vp); } - NDFREE(&nd, NDF_ONLY_PNBUF); - vput(nd.ni_dvp); - } else { - NDFREE(&nd, NDF_ONLY_PNBUF); - if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - if (vp) - vrele(vp); } + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_dvp); + vn_finished_write(mp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod"); return (error); @@ -1193,23 +1210,29 @@ mkfifo(p, uap) syscallarg(int) mode; } */ *uap; { + struct mount *mp; struct vattr vattr; int error; struct nameidata nd; +restart: bwillwrite(); NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); if ((error = namei(&nd)) != 0) return (error); if (nd.ni_vp != NULL) { NDFREE(&nd, NDF_ONLY_PNBUF); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); vrele(nd.ni_vp); + vput(nd.ni_dvp); return (EEXIST); } + if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_dvp); + if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) + return (error); + goto restart; + } VATTR_NULL(&vattr); vattr.va_type = VFIFO; vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask; @@ -1219,6 +1242,7 @@ mkfifo(p, uap) vput(nd.ni_vp); NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); + vn_finished_write(mp); return (error); } @@ -1240,7 +1264,8 @@ link(p, uap) syscallarg(char *) link; } */ *uap; { - register struct vnode *vp; + struct vnode *vp; + struct mount *mp; struct nameidata nd; int error; @@ -1250,30 +1275,29 @@ link(p, uap) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; - if (vp->v_type == VDIR) - error = EPERM; /* POSIX */ - else { - NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p); - error = namei(&nd); - if (!error) { - if (nd.ni_vp != NULL) { - if (nd.ni_vp) - vrele(nd.ni_vp); - error = EEXIST; - } else { - VOP_LEASE(nd.ni_dvp, p, p->p_ucred, - LEASE_WRITE); - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); - } - NDFREE(&nd, NDF_ONLY_PNBUF); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); + if (vp->v_type == VDIR) { + vrele(vp); + return (EPERM); /* POSIX */ + } + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { + vrele(vp); + return (error); + } + NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p); + if ((error = namei(&nd)) == 0) { + if (nd.ni_vp != NULL) { + vrele(nd.ni_vp); + error = EEXIST; + } else { + VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); } + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_dvp); } vrele(vp); + vn_finished_write(mp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "link"); return (error); @@ -1297,6 +1321,7 @@ symlink(p, uap) syscallarg(char *) link; } */ *uap; { + struct mount *mp; struct vattr vattr; char *path; int error; @@ -1305,20 +1330,25 @@ symlink(p, uap) path = zalloc(namei_zone); if ((error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) != 0) goto out; +restart: bwillwrite(); NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p); if ((error = namei(&nd)) != 0) goto out; if (nd.ni_vp) { NDFREE(&nd, NDF_ONLY_PNBUF); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); vrele(nd.ni_vp); + vput(nd.ni_dvp); error = EEXIST; goto out; } + if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_dvp); + if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) + return (error); + goto restart; + } VATTR_NULL(&vattr); vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask; VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); @@ -1327,6 +1357,7 @@ symlink(p, uap) if (error == 0) vput(nd.ni_vp); vput(nd.ni_dvp); + vn_finished_write(mp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink"); out: @@ -1346,8 +1377,10 @@ undelete(p, uap) } */ *uap; { int error; + struct mount *mp; struct nameidata nd; +restart: bwillwrite(); NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE, SCARG(uap, path), p); @@ -1357,19 +1390,23 @@ undelete(p, uap) if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { NDFREE(&nd, NDF_ONLY_PNBUF); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); if (nd.ni_vp) vrele(nd.ni_vp); + vput(nd.ni_dvp); return (EEXIST); } - + if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_dvp); + if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) + return (error); + goto restart; + } VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); + vn_finished_write(mp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete"); return (error); @@ -1391,18 +1428,17 @@ unlink(p, uap) syscallarg(char *) path; } */ *uap; { - register struct vnode *vp; + struct mount *mp; + struct vnode *vp; int error; struct nameidata nd; +restart: bwillwrite(); NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - if (vp->v_type == VDIR) error = EPERM; /* POSIX */ else { @@ -1414,18 +1450,24 @@ unlink(p, uap) if (vp->v_flag & VROOT) error = EBUSY; } - + if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { + NDFREE(&nd, NDF_ONLY_PNBUF); + vrele(vp); + vput(nd.ni_dvp); + if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) + return (error); + goto restart; + } + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (!error) { VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); } NDFREE(&nd, NDF_ONLY_PNBUF); - if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - if (vp != NULLVP) - vput(vp); + vput(nd.ni_dvp); + vput(vp); + vn_finished_write(mp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink"); return (error); @@ -1936,6 +1978,7 @@ setfflags(p, vp, flags) int flags; { int error; + struct mount *mp; struct vattr vattr; /* @@ -1948,12 +1991,15 @@ setfflags(p, vp, flags) ((error = suser_xxx(p->p_ucred, p, PRISON_ROOT)) != 0)) return (error); + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + return (error); VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); vattr.va_flags = flags; error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); VOP_UNLOCK(vp, 0, p); + vn_finished_write(mp); return (error); } @@ -2020,14 +2066,18 @@ setfmode(p, vp, mode) int mode; { int error; + struct mount *mp; struct vattr vattr; + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + return (error); VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); vattr.va_mode = mode & ALLPERMS; error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); VOP_UNLOCK(vp, 0, p); + vn_finished_write(mp); return error; } @@ -2125,8 +2175,11 @@ setfown(p, vp, uid, gid) gid_t gid; { int error; + struct mount *mp; struct vattr vattr; + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + return (error); VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); @@ -2134,6 +2187,7 @@ setfown(p, vp, uid, gid) vattr.va_gid = gid; error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); VOP_UNLOCK(vp, 0, p); + vn_finished_write(mp); return error; } @@ -2259,8 +2313,11 @@ setutimes(p, vp, ts, nullflag) int nullflag; { int error; + struct mount *mp; struct vattr vattr; + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + return (error); VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); @@ -2270,6 +2327,7 @@ setutimes(p, vp, ts, nullflag) vattr.va_vaflags |= VA_UTIMES_NULL; error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); VOP_UNLOCK(vp, 0, p); + vn_finished_write(mp); return error; } @@ -2394,7 +2452,8 @@ truncate(p, uap) syscallarg(off_t) length; } */ *uap; { - register struct vnode *vp; + struct mount *mp; + struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; @@ -2405,6 +2464,10 @@ truncate(p, uap) if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { + vrele(vp); + return (error); + } NDFREE(&nd, NDF_ONLY_PNBUF); VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); @@ -2417,6 +2480,7 @@ truncate(p, uap) error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); } vput(vp); + vn_finished_write(mp); return (error); } @@ -2440,6 +2504,7 @@ ftruncate(p, uap) syscallarg(off_t) length; } */ *uap; { + struct mount *mp; struct vattr vattr; struct vnode *vp; struct file *fp; @@ -2452,6 +2517,8 @@ ftruncate(p, uap) if ((fp->f_flag & FWRITE) == 0) return (EINVAL); vp = (struct vnode *)fp->f_data; + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + return (error); VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type == VDIR) @@ -2462,6 +2529,7 @@ ftruncate(p, uap) error = VOP_SETATTR(vp, &vattr, fp->f_cred, p); } VOP_UNLOCK(vp, 0, p); + vn_finished_write(mp); return (error); } @@ -2541,13 +2609,16 @@ fsync(p, uap) syscallarg(int) fd; } */ *uap; { - register struct vnode *vp; + struct vnode *vp; + struct mount *mp; struct file *fp; int error; if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); vp = (struct vnode *)fp->f_data; + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + return (error); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_object) vm_object_page_clean(vp->v_object, 0, 0, 0); @@ -2558,6 +2629,7 @@ fsync(p, uap) #endif VOP_UNLOCK(vp, 0, p); + vn_finished_write(mp); return (error); } @@ -2580,7 +2652,8 @@ rename(p, uap) syscallarg(char *) to; } */ *uap; { - register struct vnode *tvp, *fvp, *tdvp; + struct mount *mp; + struct vnode *tvp, *fvp, *tdvp; struct nameidata fromnd, tond; int error; @@ -2590,6 +2663,12 @@ rename(p, uap) if ((error = namei(&fromnd)) != 0) return (error); fvp = fromnd.ni_vp; + if ((error = vn_start_write(fvp, &mp, V_WAIT | PCATCH)) != 0) { + NDFREE(&fromnd, NDF_ONLY_PNBUF); + vrele(fromnd.ni_dvp); + vrele(fvp); + goto out1; + } NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ, UIO_USERSPACE, SCARG(uap, to), p); if (fromnd.ni_vp->v_type == VDIR) @@ -2652,6 +2731,7 @@ out: vrele(fvp); } vrele(tond.ni_startdir); + vn_finished_write(mp); ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename"); ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename"); ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename"); @@ -2682,11 +2762,13 @@ mkdir(p, uap) syscallarg(int) mode; } */ *uap; { - register struct vnode *vp; + struct mount *mp; + struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; +restart: bwillwrite(); NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); nd.ni_cnd.cn_flags |= WILLBEDIR; @@ -2695,13 +2777,17 @@ mkdir(p, uap) vp = nd.ni_vp; if (vp != NULL) { NDFREE(&nd, NDF_ONLY_PNBUF); - if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); vrele(vp); + vput(nd.ni_dvp); return (EEXIST); } + if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_dvp); + if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) + return (error); + goto restart; + } VATTR_NULL(&vattr); vattr.va_type = VDIR; vattr.va_mode = (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_fd->fd_cmask; @@ -2711,6 +2797,7 @@ mkdir(p, uap) vput(nd.ni_dvp); if (!error) vput(nd.ni_vp); + vn_finished_write(mp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir"); return (error); @@ -2732,10 +2819,12 @@ rmdir(p, uap) syscallarg(char *) path; } */ *uap; { - register struct vnode *vp; + struct mount *mp; + struct vnode *vp; int error; struct nameidata nd; +restart: bwillwrite(); NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path), p); @@ -2756,21 +2845,32 @@ rmdir(p, uap) /* * The root of a mounted filesystem cannot be deleted. */ - if (vp->v_flag & VROOT) + if (vp->v_flag & VROOT) { error = EBUSY; - else { - VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); + goto out; } + if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { + NDFREE(&nd, NDF_ONLY_PNBUF); + if (nd.ni_dvp == vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + vput(vp); + if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) + return (error); + goto restart; + } + VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); + vn_finished_write(mp); out: NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); - if (vp != NULLVP) - vput(vp); + vput(vp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir"); return (error); @@ -3049,7 +3149,8 @@ revoke(p, uap) syscallarg(char *) path; } */ *uap; { - register struct vnode *vp; + struct mount *mp; + struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; @@ -3068,8 +3169,11 @@ revoke(p, uap) if (p->p_ucred->cr_uid != vattr.va_uid && (error = suser_xxx(0, p, PRISON_ROOT))) goto out; + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + goto out; if (vcount(vp) > 1) VOP_REVOKE(vp, REVOKEALL); + vn_finished_write(mp); out: vrele(vp); return (error); @@ -3228,11 +3332,16 @@ fhopen(p, uap) } if (fmode & O_TRUNC) { VOP_UNLOCK(vp, 0, p); /* XXX */ + if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) { + vrele(vp); + return (error); + } VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ VATTR_NULL(vap); vap->va_size = 0; error = VOP_SETATTR(vp, vap, p->p_ucred, p); + vn_finished_write(mp); if (error) goto bad; } @@ -3407,10 +3516,15 @@ extattrctl(p, uap) NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if ((error = namei(&nd)) != 0) return (error); - mp = nd.ni_vp->v_mount; + error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH); NDFREE(&nd, 0); - return (VFS_EXTATTRCTL(mp, SCARG(uap, cmd), SCARG(uap, attrname), - SCARG(uap, arg), p)); + vrele(nd.ni_vp); + if (error) + return (error); + error = VFS_EXTATTRCTL(mp, SCARG(uap, cmd), SCARG(uap, attrname), + SCARG(uap, arg), p); + vn_finished_write(mp); + return (error); } /* @@ -3425,6 +3539,7 @@ extattr_set_file(p, uap) struct extattr_set_file_args *uap; { struct nameidata nd; + struct mount *mp; struct uio auio; struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV]; char attrname[EXTATTR_MAXNAMELEN]; @@ -3434,10 +3549,11 @@ extattr_set_file(p, uap) error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN); if (error) return (error); - NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW, UIO_USERSPACE, SCARG(uap, path), - p); + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if ((error = namei(&nd)) != 0) return(error); + if ((error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH)) != 0) + goto done; iovlen = uap->iovcnt * sizeof(struct iovec); if (uap->iovcnt > UIO_SMALLIOV) { if (uap->iovcnt > UIO_MAXIOV) { @@ -3477,6 +3593,8 @@ done: if (needfree) FREE(needfree, M_IOV); NDFREE(&nd, 0); + vrele(nd.ni_vp); + vn_finished_write(mp); return (error); } @@ -3508,6 +3626,7 @@ extattr_get_file(p, uap) if (uap->iovcnt > UIO_SMALLIOV) { if (uap->iovcnt > UIO_MAXIOV) { NDFREE(&nd, 0); + vrele(nd.ni_vp); return (EINVAL); } MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); @@ -3545,6 +3664,7 @@ done: if (needfree) FREE(needfree, M_IOV); NDFREE(&nd, 0); + vrele(nd.ni_vp); return(error); } @@ -3557,6 +3677,7 @@ extattr_delete_file(p, uap) struct proc *p; struct extattr_delete_file_args *uap; { + struct mount *mp; struct nameidata nd; char attrname[EXTATTR_MAXNAMELEN]; int error; @@ -3564,12 +3685,17 @@ extattr_delete_file(p, uap) error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN); if (error) return(error); - NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW, UIO_USERSPACE, SCARG(uap, path), - p); + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if ((error = namei(&nd)) != 0) return(error); + if ((error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH)) != 0) { + vrele(nd.ni_vp); + return (error); + } error = VOP_SETEXTATTR(nd.ni_vp, attrname, NULL, p->p_cred->pc_ucred, p); NDFREE(&nd, 0); + vrele(nd.ni_vp); + vn_finished_write(mp); return(error); } diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 6483660..0e5ec3f 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -453,6 +453,7 @@ getnewvnode(tag, mp, vops, vpp) int s, count; struct proc *p = curproc; /* XXX */ struct vnode *vp = NULL; + struct mount *vnmp; vm_object_t object; /* @@ -491,7 +492,14 @@ getnewvnode(tag, mp, vops, vpp) vp = NULL; continue; } - break; + /* + * Skip over it if its filesystem is being suspended. + */ + if (vn_start_write(vp, &vnmp, V_NOWAIT) == 0) + break; + simple_unlock(&vp->v_interlock); + TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); + vp = NULL; } if (vp) { vp->v_flag |= VDOOMED; @@ -504,6 +512,7 @@ getnewvnode(tag, mp, vops, vpp) } else { simple_unlock(&vp->v_interlock); } + vn_finished_write(vnmp); #ifdef INVARIANTS { @@ -515,6 +524,8 @@ getnewvnode(tag, mp, vops, vpp) if (vp->v_numoutput) panic("Clean vnode has pending I/O's"); splx(s); + if (vp->v_writecount != 0) + panic("Non-zero write count"); } #endif vp->v_flag = 0; @@ -523,7 +534,6 @@ getnewvnode(tag, mp, vops, vpp) vp->v_cstart = 0; vp->v_clen = 0; vp->v_socket = 0; - vp->v_writecount = 0; /* XXX */ } else { simple_unlock(&vnode_free_list_slock); vp = (struct vnode *) zalloc(vnode_zone); @@ -946,6 +956,7 @@ sched_sync(void) { struct synclist *slp; struct vnode *vp; + struct mount *mp; long starttime; int s; struct proc *p = updateproc; @@ -970,10 +981,12 @@ sched_sync(void) splx(s); while ((vp = LIST_FIRST(slp)) != NULL) { - if (VOP_ISLOCKED(vp, NULL) == 0) { + if (VOP_ISLOCKED(vp, NULL) == 0 && + vn_start_write(vp, &mp, V_NOWAIT) == 0) { vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p); VOP_UNLOCK(vp, 0, p); + vn_finished_write(mp); } s = splbio(); if (LIST_FIRST(slp) == vp) { @@ -1386,6 +1399,7 @@ vrele(vp) struct proc *p = curproc; /* XXX */ KASSERT(vp != NULL, ("vrele: null vp")); + KASSERT(vp->v_writecount < vp->v_usecount, ("vrele: missed vn_close")); simple_lock(&vp->v_interlock); @@ -1427,6 +1441,7 @@ vput(vp) struct proc *p = curproc; /* XXX */ KASSERT(vp != NULL, ("vput: null vp")); + KASSERT(vp->v_writecount < vp->v_usecount, ("vput: missed vn_close")); simple_lock(&vp->v_interlock); @@ -1632,6 +1647,8 @@ vclean(vp, flags, p) * If the flush fails, just toss the buffers. */ if (flags & DOCLOSE) { + if (TAILQ_FIRST(&vp->v_dirtyblkhd) != NULL) + (void) vn_write_suspend_wait(vp, V_WAIT); if (vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0) != 0) vinvalbuf(vp, 0, NOCRED, p, 0, 0); } @@ -2785,12 +2802,18 @@ sync_fsync(ap) simple_unlock(&mountlist_slock); return (0); } + if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) { + vfs_unbusy(mp, p); + simple_unlock(&mountlist_slock); + return (0); + } asyncflag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; vfs_msync(mp, MNT_NOWAIT); VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p); if (asyncflag) mp->mnt_flag |= MNT_ASYNC; + vn_finished_write(mp); vfs_unbusy(mp, p); return (0); } diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 65a297ca..404114a 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -164,8 +164,8 @@ mount(p, uap) vput(vp); return (EOPNOTSUPP); /* Needs translation */ } - mp->mnt_flag |= - SCARG(uap, flags) & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); + mp->mnt_flag |= SCARG(uap, flags) & + (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT); /* * Only root, or the user that did the original mount is * permitted to update it. @@ -303,7 +303,8 @@ update: vrele(vp); if (mp->mnt_kern_flag & MNTK_WANTRDWR) mp->mnt_flag &= ~MNT_RDONLY; - mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); + mp->mnt_flag &=~ + (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT); mp->mnt_kern_flag &=~ MNTK_WANTRDWR; if (error) { mp->mnt_flag = flag; @@ -458,7 +459,7 @@ unmount(p, uap) */ int dounmount(mp, flags, p) - register struct mount *mp; + struct mount *mp; int flags; struct proc *p; { @@ -469,6 +470,7 @@ dounmount(mp, flags, p) simple_lock(&mountlist_slock); mp->mnt_kern_flag |= MNTK_UNMOUNT; lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock, p); + vn_start_write(NULL, &mp, V_WAIT); if (mp->mnt_flag & MNT_EXPUBLIC) vfs_setpublicfs(NULL, NULL, NULL); @@ -481,8 +483,10 @@ dounmount(mp, flags, p) vrele(mp->mnt_syncer); if (((mp->mnt_flag & MNT_RDONLY) || (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) || - (flags & MNT_FORCE)) + (flags & MNT_FORCE)) { error = VFS_UNMOUNT(mp, flags, p); + } + vn_finished_write(mp); simple_lock(&mountlist_slock); if (error) { if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL) @@ -530,7 +534,7 @@ sync(p, uap) struct proc *p; struct sync_args *uap; { - register struct mount *mp, *nmp; + struct mount *mp, *nmp; int asyncflag; simple_lock(&mountlist_slock); @@ -539,13 +543,15 @@ sync(p, uap) nmp = TAILQ_NEXT(mp, mnt_list); continue; } - if ((mp->mnt_flag & MNT_RDONLY) == 0) { + if ((mp->mnt_flag & MNT_RDONLY) == 0 && + vn_start_write(NULL, &mp, V_NOWAIT) == 0) { asyncflag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; vfs_msync(mp, MNT_NOWAIT); VFS_SYNC(mp, MNT_NOWAIT, - ((p != NULL) ? p->p_ucred : NOCRED), p); + ((p != NULL) ? p->p_ucred : NOCRED), p); mp->mnt_flag |= asyncflag; + vn_finished_write(mp); } simple_lock(&mountlist_slock); nmp = TAILQ_NEXT(mp, mnt_list); @@ -593,7 +599,7 @@ quotactl(p, uap) syscallarg(caddr_t) arg; } */ *uap; { - register struct mount *mp; + struct mount *mp; int error; struct nameidata nd; @@ -602,11 +608,15 @@ quotactl(p, uap) NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if ((error = namei(&nd)) != 0) return (error); - mp = nd.ni_vp->v_mount; NDFREE(&nd, NDF_ONLY_PNBUF); + error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH); vrele(nd.ni_vp); - return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid), - SCARG(uap, arg), p)); + if (error) + return (error); + error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid), + SCARG(uap, arg), p); + vn_finished_write(mp); + return (error); } /* @@ -972,6 +982,7 @@ open(p, uap) struct file *fp; struct vnode *vp; struct vattr vat; + struct mount *mp; int cmode, flags, oflags; struct file *nfp; int type, indx, error; @@ -1029,12 +1040,15 @@ open(p, uap) fp->f_flag |= FHASLOCK; } if (flags & O_TRUNC) { + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + goto bad; VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); VATTR_NULL(&vat); vat.va_size = 0; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_SETATTR(vp, &vat, p->p_ucred, p); VOP_UNLOCK(vp, 0, p); + vn_finished_write(mp); if (error) goto bad; } @@ -1101,7 +1115,8 @@ mknod(p, uap) syscallarg(int) dev; } */ *uap; { - register struct vnode *vp; + struct vnode *vp; + struct mount *mp; struct vattr vattr; int error; int whiteout = 0; @@ -1118,14 +1133,16 @@ mknod(p, uap) } if (error) return (error); +restart: bwillwrite(); NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; - if (vp != NULL) + if (vp != NULL) { + vrele(vp); error = EEXIST; - else { + } else { VATTR_NULL(&vattr); vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask; vattr.va_rdev = SCARG(uap, dev); @@ -1149,6 +1166,13 @@ mknod(p, uap) break; } } + if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_dvp); + if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) + return (error); + goto restart; + } if (!error) { VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); if (whiteout) @@ -1159,17 +1183,10 @@ mknod(p, uap) if (error == 0) vput(nd.ni_vp); } - NDFREE(&nd, NDF_ONLY_PNBUF); - vput(nd.ni_dvp); - } else { - NDFREE(&nd, NDF_ONLY_PNBUF); - if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - if (vp) - vrele(vp); } + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_dvp); + vn_finished_write(mp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod"); return (error); @@ -1193,23 +1210,29 @@ mkfifo(p, uap) syscallarg(int) mode; } */ *uap; { + struct mount *mp; struct vattr vattr; int error; struct nameidata nd; +restart: bwillwrite(); NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); if ((error = namei(&nd)) != 0) return (error); if (nd.ni_vp != NULL) { NDFREE(&nd, NDF_ONLY_PNBUF); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); vrele(nd.ni_vp); + vput(nd.ni_dvp); return (EEXIST); } + if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_dvp); + if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) + return (error); + goto restart; + } VATTR_NULL(&vattr); vattr.va_type = VFIFO; vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask; @@ -1219,6 +1242,7 @@ mkfifo(p, uap) vput(nd.ni_vp); NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); + vn_finished_write(mp); return (error); } @@ -1240,7 +1264,8 @@ link(p, uap) syscallarg(char *) link; } */ *uap; { - register struct vnode *vp; + struct vnode *vp; + struct mount *mp; struct nameidata nd; int error; @@ -1250,30 +1275,29 @@ link(p, uap) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; - if (vp->v_type == VDIR) - error = EPERM; /* POSIX */ - else { - NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p); - error = namei(&nd); - if (!error) { - if (nd.ni_vp != NULL) { - if (nd.ni_vp) - vrele(nd.ni_vp); - error = EEXIST; - } else { - VOP_LEASE(nd.ni_dvp, p, p->p_ucred, - LEASE_WRITE); - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); - } - NDFREE(&nd, NDF_ONLY_PNBUF); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); + if (vp->v_type == VDIR) { + vrele(vp); + return (EPERM); /* POSIX */ + } + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { + vrele(vp); + return (error); + } + NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p); + if ((error = namei(&nd)) == 0) { + if (nd.ni_vp != NULL) { + vrele(nd.ni_vp); + error = EEXIST; + } else { + VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); } + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_dvp); } vrele(vp); + vn_finished_write(mp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "link"); return (error); @@ -1297,6 +1321,7 @@ symlink(p, uap) syscallarg(char *) link; } */ *uap; { + struct mount *mp; struct vattr vattr; char *path; int error; @@ -1305,20 +1330,25 @@ symlink(p, uap) path = zalloc(namei_zone); if ((error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) != 0) goto out; +restart: bwillwrite(); NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p); if ((error = namei(&nd)) != 0) goto out; if (nd.ni_vp) { NDFREE(&nd, NDF_ONLY_PNBUF); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); vrele(nd.ni_vp); + vput(nd.ni_dvp); error = EEXIST; goto out; } + if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_dvp); + if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) + return (error); + goto restart; + } VATTR_NULL(&vattr); vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask; VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); @@ -1327,6 +1357,7 @@ symlink(p, uap) if (error == 0) vput(nd.ni_vp); vput(nd.ni_dvp); + vn_finished_write(mp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink"); out: @@ -1346,8 +1377,10 @@ undelete(p, uap) } */ *uap; { int error; + struct mount *mp; struct nameidata nd; +restart: bwillwrite(); NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE, SCARG(uap, path), p); @@ -1357,19 +1390,23 @@ undelete(p, uap) if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { NDFREE(&nd, NDF_ONLY_PNBUF); - if (nd.ni_dvp == nd.ni_vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); if (nd.ni_vp) vrele(nd.ni_vp); + vput(nd.ni_dvp); return (EEXIST); } - + if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_dvp); + if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) + return (error); + goto restart; + } VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); + vn_finished_write(mp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete"); return (error); @@ -1391,18 +1428,17 @@ unlink(p, uap) syscallarg(char *) path; } */ *uap; { - register struct vnode *vp; + struct mount *mp; + struct vnode *vp; int error; struct nameidata nd; +restart: bwillwrite(); NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - if (vp->v_type == VDIR) error = EPERM; /* POSIX */ else { @@ -1414,18 +1450,24 @@ unlink(p, uap) if (vp->v_flag & VROOT) error = EBUSY; } - + if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { + NDFREE(&nd, NDF_ONLY_PNBUF); + vrele(vp); + vput(nd.ni_dvp); + if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) + return (error); + goto restart; + } + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (!error) { VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); } NDFREE(&nd, NDF_ONLY_PNBUF); - if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - if (vp != NULLVP) - vput(vp); + vput(nd.ni_dvp); + vput(vp); + vn_finished_write(mp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink"); return (error); @@ -1936,6 +1978,7 @@ setfflags(p, vp, flags) int flags; { int error; + struct mount *mp; struct vattr vattr; /* @@ -1948,12 +1991,15 @@ setfflags(p, vp, flags) ((error = suser_xxx(p->p_ucred, p, PRISON_ROOT)) != 0)) return (error); + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + return (error); VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); vattr.va_flags = flags; error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); VOP_UNLOCK(vp, 0, p); + vn_finished_write(mp); return (error); } @@ -2020,14 +2066,18 @@ setfmode(p, vp, mode) int mode; { int error; + struct mount *mp; struct vattr vattr; + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + return (error); VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); vattr.va_mode = mode & ALLPERMS; error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); VOP_UNLOCK(vp, 0, p); + vn_finished_write(mp); return error; } @@ -2125,8 +2175,11 @@ setfown(p, vp, uid, gid) gid_t gid; { int error; + struct mount *mp; struct vattr vattr; + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + return (error); VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); @@ -2134,6 +2187,7 @@ setfown(p, vp, uid, gid) vattr.va_gid = gid; error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); VOP_UNLOCK(vp, 0, p); + vn_finished_write(mp); return error; } @@ -2259,8 +2313,11 @@ setutimes(p, vp, ts, nullflag) int nullflag; { int error; + struct mount *mp; struct vattr vattr; + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + return (error); VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); @@ -2270,6 +2327,7 @@ setutimes(p, vp, ts, nullflag) vattr.va_vaflags |= VA_UTIMES_NULL; error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); VOP_UNLOCK(vp, 0, p); + vn_finished_write(mp); return error; } @@ -2394,7 +2452,8 @@ truncate(p, uap) syscallarg(off_t) length; } */ *uap; { - register struct vnode *vp; + struct mount *mp; + struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; @@ -2405,6 +2464,10 @@ truncate(p, uap) if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { + vrele(vp); + return (error); + } NDFREE(&nd, NDF_ONLY_PNBUF); VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); @@ -2417,6 +2480,7 @@ truncate(p, uap) error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); } vput(vp); + vn_finished_write(mp); return (error); } @@ -2440,6 +2504,7 @@ ftruncate(p, uap) syscallarg(off_t) length; } */ *uap; { + struct mount *mp; struct vattr vattr; struct vnode *vp; struct file *fp; @@ -2452,6 +2517,8 @@ ftruncate(p, uap) if ((fp->f_flag & FWRITE) == 0) return (EINVAL); vp = (struct vnode *)fp->f_data; + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + return (error); VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type == VDIR) @@ -2462,6 +2529,7 @@ ftruncate(p, uap) error = VOP_SETATTR(vp, &vattr, fp->f_cred, p); } VOP_UNLOCK(vp, 0, p); + vn_finished_write(mp); return (error); } @@ -2541,13 +2609,16 @@ fsync(p, uap) syscallarg(int) fd; } */ *uap; { - register struct vnode *vp; + struct vnode *vp; + struct mount *mp; struct file *fp; int error; if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); vp = (struct vnode *)fp->f_data; + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + return (error); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_object) vm_object_page_clean(vp->v_object, 0, 0, 0); @@ -2558,6 +2629,7 @@ fsync(p, uap) #endif VOP_UNLOCK(vp, 0, p); + vn_finished_write(mp); return (error); } @@ -2580,7 +2652,8 @@ rename(p, uap) syscallarg(char *) to; } */ *uap; { - register struct vnode *tvp, *fvp, *tdvp; + struct mount *mp; + struct vnode *tvp, *fvp, *tdvp; struct nameidata fromnd, tond; int error; @@ -2590,6 +2663,12 @@ rename(p, uap) if ((error = namei(&fromnd)) != 0) return (error); fvp = fromnd.ni_vp; + if ((error = vn_start_write(fvp, &mp, V_WAIT | PCATCH)) != 0) { + NDFREE(&fromnd, NDF_ONLY_PNBUF); + vrele(fromnd.ni_dvp); + vrele(fvp); + goto out1; + } NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ, UIO_USERSPACE, SCARG(uap, to), p); if (fromnd.ni_vp->v_type == VDIR) @@ -2652,6 +2731,7 @@ out: vrele(fvp); } vrele(tond.ni_startdir); + vn_finished_write(mp); ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename"); ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename"); ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename"); @@ -2682,11 +2762,13 @@ mkdir(p, uap) syscallarg(int) mode; } */ *uap; { - register struct vnode *vp; + struct mount *mp; + struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; +restart: bwillwrite(); NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); nd.ni_cnd.cn_flags |= WILLBEDIR; @@ -2695,13 +2777,17 @@ mkdir(p, uap) vp = nd.ni_vp; if (vp != NULL) { NDFREE(&nd, NDF_ONLY_PNBUF); - if (nd.ni_dvp == vp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); vrele(vp); + vput(nd.ni_dvp); return (EEXIST); } + if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_dvp); + if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) + return (error); + goto restart; + } VATTR_NULL(&vattr); vattr.va_type = VDIR; vattr.va_mode = (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_fd->fd_cmask; @@ -2711,6 +2797,7 @@ mkdir(p, uap) vput(nd.ni_dvp); if (!error) vput(nd.ni_vp); + vn_finished_write(mp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir"); return (error); @@ -2732,10 +2819,12 @@ rmdir(p, uap) syscallarg(char *) path; } */ *uap; { - register struct vnode *vp; + struct mount *mp; + struct vnode *vp; int error; struct nameidata nd; +restart: bwillwrite(); NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path), p); @@ -2756,21 +2845,32 @@ rmdir(p, uap) /* * The root of a mounted filesystem cannot be deleted. */ - if (vp->v_flag & VROOT) + if (vp->v_flag & VROOT) { error = EBUSY; - else { - VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); - error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); + goto out; } + if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { + NDFREE(&nd, NDF_ONLY_PNBUF); + if (nd.ni_dvp == vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + vput(vp); + if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) + return (error); + goto restart; + } + VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); + vn_finished_write(mp); out: NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); - if (vp != NULLVP) - vput(vp); + vput(vp); ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir"); ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir"); return (error); @@ -3049,7 +3149,8 @@ revoke(p, uap) syscallarg(char *) path; } */ *uap; { - register struct vnode *vp; + struct mount *mp; + struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; @@ -3068,8 +3169,11 @@ revoke(p, uap) if (p->p_ucred->cr_uid != vattr.va_uid && (error = suser_xxx(0, p, PRISON_ROOT))) goto out; + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + goto out; if (vcount(vp) > 1) VOP_REVOKE(vp, REVOKEALL); + vn_finished_write(mp); out: vrele(vp); return (error); @@ -3228,11 +3332,16 @@ fhopen(p, uap) } if (fmode & O_TRUNC) { VOP_UNLOCK(vp, 0, p); /* XXX */ + if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) { + vrele(vp); + return (error); + } VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ VATTR_NULL(vap); vap->va_size = 0; error = VOP_SETATTR(vp, vap, p->p_ucred, p); + vn_finished_write(mp); if (error) goto bad; } @@ -3407,10 +3516,15 @@ extattrctl(p, uap) NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if ((error = namei(&nd)) != 0) return (error); - mp = nd.ni_vp->v_mount; + error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH); NDFREE(&nd, 0); - return (VFS_EXTATTRCTL(mp, SCARG(uap, cmd), SCARG(uap, attrname), - SCARG(uap, arg), p)); + vrele(nd.ni_vp); + if (error) + return (error); + error = VFS_EXTATTRCTL(mp, SCARG(uap, cmd), SCARG(uap, attrname), + SCARG(uap, arg), p); + vn_finished_write(mp); + return (error); } /* @@ -3425,6 +3539,7 @@ extattr_set_file(p, uap) struct extattr_set_file_args *uap; { struct nameidata nd; + struct mount *mp; struct uio auio; struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV]; char attrname[EXTATTR_MAXNAMELEN]; @@ -3434,10 +3549,11 @@ extattr_set_file(p, uap) error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN); if (error) return (error); - NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW, UIO_USERSPACE, SCARG(uap, path), - p); + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if ((error = namei(&nd)) != 0) return(error); + if ((error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH)) != 0) + goto done; iovlen = uap->iovcnt * sizeof(struct iovec); if (uap->iovcnt > UIO_SMALLIOV) { if (uap->iovcnt > UIO_MAXIOV) { @@ -3477,6 +3593,8 @@ done: if (needfree) FREE(needfree, M_IOV); NDFREE(&nd, 0); + vrele(nd.ni_vp); + vn_finished_write(mp); return (error); } @@ -3508,6 +3626,7 @@ extattr_get_file(p, uap) if (uap->iovcnt > UIO_SMALLIOV) { if (uap->iovcnt > UIO_MAXIOV) { NDFREE(&nd, 0); + vrele(nd.ni_vp); return (EINVAL); } MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); @@ -3545,6 +3664,7 @@ done: if (needfree) FREE(needfree, M_IOV); NDFREE(&nd, 0); + vrele(nd.ni_vp); return(error); } @@ -3557,6 +3677,7 @@ extattr_delete_file(p, uap) struct proc *p; struct extattr_delete_file_args *uap; { + struct mount *mp; struct nameidata nd; char attrname[EXTATTR_MAXNAMELEN]; int error; @@ -3564,12 +3685,17 @@ extattr_delete_file(p, uap) error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN); if (error) return(error); - NDINIT(&nd, LOOKUP, LOCKLEAF | FOLLOW, UIO_USERSPACE, SCARG(uap, path), - p); + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); if ((error = namei(&nd)) != 0) return(error); + if ((error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH)) != 0) { + vrele(nd.ni_vp); + return (error); + } error = VOP_SETEXTATTR(nd.ni_vp, attrname, NULL, p->p_cred->pc_ucred, p); NDFREE(&nd, 0); + vrele(nd.ni_vp); + vn_finished_write(mp); return(error); } diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index 0d0dc24..0708f7c 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -103,12 +103,14 @@ vn_open(ndp, flagp, cmode) int *flagp, cmode; { struct vnode *vp; + struct mount *mp; struct proc *p = ndp->ni_cnd.cn_proc; struct ucred *cred = p->p_ucred; struct vattr vat; struct vattr *vap = &vat; int mode, fmode, error; +restart: fmode = *flagp; if (fmode & O_CREAT) { ndp->ni_cnd.cn_nameiop = CREATE; @@ -124,10 +126,19 @@ vn_open(ndp, flagp, cmode) vap->va_mode = cmode; if (fmode & O_EXCL) vap->va_vaflags |= VA_EXCLUSIVE; + if (vn_start_write(ndp->ni_dvp, &mp, V_NOWAIT) != 0) { + NDFREE(ndp, NDF_ONLY_PNBUF); + vput(ndp->ni_dvp); + if ((error = vn_start_write(NULL, &mp, + V_XSLEEP | PCATCH)) != 0) + return (error); + goto restart; + } VOP_LEASE(ndp->ni_dvp, p, cred, LEASE_WRITE); error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, &ndp->ni_cnd, vap); vput(ndp->ni_dvp); + vn_finished_write(mp); if (error) { NDFREE(ndp, NDF_ONLY_PNBUF); return (error); @@ -293,10 +304,17 @@ vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p) { struct uio auio; struct iovec aiov; + struct mount *mp; int error; - if ((ioflg & IO_NODELOCKED) == 0) + if ((ioflg & IO_NODELOCKED) == 0) { + mp = NULL; + if (rw == UIO_WRITE && + vp->v_type != VCHR && vp->v_type != VBLK && + (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + return (error); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + } auio.uio_iov = &aiov; auio.uio_iovcnt = 1; aiov.iov_base = base; @@ -316,8 +334,10 @@ vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p) else if (auio.uio_resid && error == 0) error = EIO; - if ((ioflg & IO_NODELOCKED) == 0) + if ((ioflg & IO_NODELOCKED) == 0) { + vn_finished_write(mp); VOP_UNLOCK(vp, 0, p); + } return (error); } @@ -368,6 +388,7 @@ vn_write(fp, uio, cred, flags, p) int flags; { struct vnode *vp; + struct mount *mp; int error, ioflag; KASSERT(uio->uio_procp == p, ("uio_procp %p is not p %p", @@ -384,6 +405,10 @@ vn_write(fp, uio, cred, flags, p) if ((fp->f_flag & O_FSYNC) || (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) ioflag |= IO_SYNC; + mp = NULL; + if (vp->v_type != VCHR && vp->v_type != VBLK && + (error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + return (error); VOP_LEASE(vp, p, cred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if ((flags & FOF_OFFSET) == 0) @@ -394,6 +419,7 @@ vn_write(fp, uio, cred, flags, p) fp->f_offset = uio->uio_offset; fp->f_nextoff = uio->uio_offset; VOP_UNLOCK(vp, 0, p); + vn_finished_write(mp); return (error); } @@ -649,6 +675,140 @@ vn_closefile(fp, p) fp->f_cred, p)); } +/* + * Preparing to start a filesystem write operation. If the operation is + * permitted, then we bump the count of operations in progress and + * proceed. If a suspend request is in progress, we wait until the + * suspension is over, and then proceed. + */ +int +vn_start_write(vp, mpp, flags) + struct vnode *vp; + struct mount **mpp; + int flags; +{ + struct mount *mp; + int error; + + /* + * If a vnode is provided, get and return the mount point that + * to which it will write. + */ + if (vp != NULL) { + if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) { + *mpp = NULL; + if (error != EOPNOTSUPP) + return (error); + return (0); + } + } + if ((mp = *mpp) == NULL) + return (0); + /* + * Check on status of suspension. + */ + while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) { + if (flags & V_NOWAIT) + return (EWOULDBLOCK); + error = tsleep(&mp->mnt_flag, (PUSER - 1) | (flags & PCATCH), + "suspfs", 0); + if (error) + return (error); + } + if (flags & V_XSLEEP) + return (0); + mp->mnt_writeopcount++; + return (0); +} + +/* + * Secondary suspension. Used by operations such as vop_inactive + * routines that are needed by the higher level functions. These + * are allowed to proceed until all the higher level functions have + * completed (indicated by mnt_writeopcount dropping to zero). At that + * time, these operations are halted until the suspension is over. + */ +int +vn_write_suspend_wait(vp, flags) + struct vnode *vp; + int flags; +{ + struct mount *mp; + int error; + + if ((error = VOP_GETWRITEMOUNT(vp, &mp)) != 0) { + if (error != EOPNOTSUPP) + return (error); + return (0); + } + /* + * If we are not suspended or have not yet reached suspended + * mode, then let the operation proceed. + */ + if (mp == NULL || (mp->mnt_kern_flag & MNTK_SUSPENDED) == 0) + return (0); + if (flags & V_NOWAIT) + return (EWOULDBLOCK); + /* + * Wait for the suspension to finish. + */ + return (tsleep(&mp->mnt_flag, (PUSER - 1) | (flags & PCATCH), + "suspfs", 0)); +} + +/* + * Filesystem write operation has completed. If we are suspending and this + * operation is the last one, notify the suspender that the suspension is + * now in effect. + */ +void +vn_finished_write(mp) + struct mount *mp; +{ + + if (mp == NULL) + return; + mp->mnt_writeopcount--; + if (mp->mnt_writeopcount < 0) + panic("vn_finished_write: neg cnt"); + if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 && + mp->mnt_writeopcount <= 0) + wakeup(&mp->mnt_writeopcount); +} + +/* + * Request a filesystem to suspend write operations. + */ +void +vfs_write_suspend(mp) + struct mount *mp; +{ + struct proc *p = curproc; + + if (mp->mnt_kern_flag & MNTK_SUSPEND) + return; + mp->mnt_kern_flag |= MNTK_SUSPEND; + if (mp->mnt_writeopcount > 0) + (void) tsleep(&mp->mnt_writeopcount, PUSER - 1, "suspwt", 0); + VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p); + mp->mnt_kern_flag |= MNTK_SUSPENDED; +} + +/* + * Request a filesystem to resume write operations. + */ +void +vfs_write_resume(mp) + struct mount *mp; +{ + + if ((mp->mnt_kern_flag & MNTK_SUSPEND) == 0) + return; + mp->mnt_kern_flag &= ~(MNTK_SUSPEND | MNTK_SUSPENDED); + wakeup(&mp->mnt_writeopcount); + wakeup(&mp->mnt_flag); +} + static int filt_vnattach(struct knote *kn) { diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src index 479cc92..bda7e98 100644 --- a/sys/kern/vnode_if.src +++ b/sys/kern/vnode_if.src @@ -394,6 +394,22 @@ vop_strategy { }; # +#% getwritemount vp = = = +# +vop_getwritemount { + IN struct vnode *vp; + OUT struct mount **mpp; +}; + +# +#% copyonwrite vp L L L +# +vop_copyonwrite { + IN struct vnode *vp; + IN struct buf *bp; +}; + +# #% print vp = = = # vop_print { |