diff options
Diffstat (limited to 'sys/kern/vfs_subr.c')
-rw-r--r-- | sys/kern/vfs_subr.c | 796 |
1 files changed, 561 insertions, 235 deletions
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 43f8669..0dea7bd 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -35,7 +35,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 + * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 * $FreeBSD$ */ @@ -75,7 +75,9 @@ #ifdef DDB extern void printlockedvnodes __P((void)); #endif -extern void vclean __P((struct vnode *vp, int flags)); +static void vclean __P((struct vnode *vp, int flags, struct proc *p)); +extern void vgonel __P((struct vnode *vp, struct proc *p)); +unsigned long numvnodes; extern void vfs_unmountroot __P((struct mount *rootfs)); enum vtype iftovt_tab[16] = { @@ -91,15 +93,19 @@ int vttoif_tab[9] = { * Insq/Remq for the vnode usage lists. */ #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) -#define bufremvn(bp) { \ - LIST_REMOVE(bp, b_vnbufs); \ - (bp)->b_vnbufs.le_next = NOLIST; \ +#define bufremvn(bp) { \ + LIST_REMOVE(bp, b_vnbufs); \ + (bp)->b_vnbufs.le_next = NOLIST; \ } - TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ static u_long freevnodes = 0; struct mntlist mountlist; /* mounted filesystem list */ +struct simplelock mountlist_slock; +static struct simplelock mntid_slock; +struct simplelock mntvnode_slock; +struct simplelock vnode_free_list_slock; +static struct simplelock spechash_slock; int desiredvnodes; SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, ""); @@ -117,164 +123,153 @@ vntblinit() { desiredvnodes = maxproc + vm_object_cache_max; + simple_lock_init(&mntvnode_slock); + simple_lock_init(&mntid_slock); + simple_lock_init(&spechash_slock); TAILQ_INIT(&vnode_free_list); + simple_lock_init(&vnode_free_list_slock); CIRCLEQ_INIT(&mountlist); } /* - * Lock a filesystem. - * Used to prevent access to it while mounting and unmounting. + * Mark a mount point as busy. Used to synchronize access and to delay + * unmounting. Interlock is not released on failure. */ int -vfs_lock(mp) - register struct mount *mp; +vfs_busy(mp, flags, interlkp, p) + struct mount *mp; + int flags; + struct simplelock *interlkp; + struct proc *p; { + int lkflags; - while (mp->mnt_flag & MNT_MLOCK) { + if (mp->mnt_flag & MNT_UNMOUNT) { + if (flags & LK_NOWAIT) + return (ENOENT); mp->mnt_flag |= MNT_MWAIT; - (void) tsleep((caddr_t) mp, PVFS, "vfslck", 0); + if (interlkp) { + simple_unlock(interlkp); + } + /* + * Since all busy locks are shared except the exclusive + * lock granted when unmounting, the only place that a + * wakeup needs to be done is at the release of the + * exclusive lock at the end of dounmount. + */ + tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); + if (interlkp) { + simple_lock(interlkp); + } + return (ENOENT); } - mp->mnt_flag |= MNT_MLOCK; + lkflags = LK_SHARED; + if (interlkp) + lkflags |= LK_INTERLOCK; + if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) + panic("vfs_busy: unexpected lock failure"); return (0); } /* - * Unlock a locked filesystem. - * Panic if filesystem is not locked. + * Free a busy filesystem. */ void -vfs_unlock(mp) - register struct mount *mp; +vfs_unbusy(mp, p) + struct mount *mp; + struct proc *p; { - if ((mp->mnt_flag & MNT_MLOCK) == 0) - panic("vfs_unlock: not locked"); - mp->mnt_flag &= ~MNT_MLOCK; - if (mp->mnt_flag & MNT_MWAIT) { - mp->mnt_flag &= ~MNT_MWAIT; - wakeup((caddr_t) mp); - } + lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); } /* - * Mark a mount point as busy. - * Used to synchronize access and to delay unmounting. + * Lookup a filesystem type, and if found allocate and initialize + * a mount structure for it. + * + * Devname is usually updated by mount(8) after booting. */ int -vfs_busy(mp) - register struct mount *mp; +vfs_rootmountalloc(fstypename, devname, mpp) + char *fstypename; + char *devname; + struct mount **mpp; { + struct proc *p = curproc; /* XXX */ + struct vfsconf *vfsp; + struct mount *mp; - while (mp->mnt_flag & MNT_MPBUSY) { - mp->mnt_flag |= MNT_MPWANT; - (void) tsleep((caddr_t) &mp->mnt_flag, PVFS, "vfsbsy", 0); - } - if (mp->mnt_flag & MNT_UNMOUNT) - return (1); - mp->mnt_flag |= MNT_MPBUSY; + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) + if (!strcmp(vfsp->vfc_name, fstypename)) + break; + if (vfsp == NULL) + return (ENODEV); + mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); + bzero((char *)mp, (u_long)sizeof(struct mount)); + lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); + (void)vfs_busy(mp, LK_NOWAIT, 0, p); + LIST_INIT(&mp->mnt_vnodelist); + mp->mnt_vfc = vfsp; + mp->mnt_op = vfsp->vfc_vfsops; + mp->mnt_flag = MNT_RDONLY; + mp->mnt_vnodecovered = NULLVP; + vfsp->vfc_refcount++; + mp->mnt_stat.f_type = vfsp->vfc_typenum; + mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; + strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); + mp->mnt_stat.f_mntonname[0] = '/'; + mp->mnt_stat.f_mntonname[1] = 0; + (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); + *mpp = mp; return (0); } /* - * Free a busy filesystem. - * Panic if filesystem is not busy. - */ -void -vfs_unbusy(mp) - register struct mount *mp; -{ - - if ((mp->mnt_flag & MNT_MPBUSY) == 0) - panic("vfs_unbusy: not busy"); - mp->mnt_flag &= ~MNT_MPBUSY; - if (mp->mnt_flag & MNT_MPWANT) { - mp->mnt_flag &= ~MNT_MPWANT; - wakeup((caddr_t) &mp->mnt_flag); - } -} - -void -vfs_unmountroot(struct mount *rootfs) -{ - struct mount *mp = rootfs; - int error; - - if (vfs_busy(mp)) { - printf("failed to unmount root\n"); - return; - } - mp->mnt_flag |= MNT_UNMOUNT; - if ((error = vfs_lock(mp))) { - printf("lock of root filesystem failed (%d)\n", error); - return; - } - vnode_pager_umount(mp); /* release cached vnodes */ - cache_purgevfs(mp); /* remove cache entries for this file sys */ - - if ((error = VFS_SYNC(mp, MNT_WAIT, initproc->p_ucred, initproc))) - printf("sync of root filesystem failed (%d)\n", error); - - if ((error = VFS_UNMOUNT(mp, MNT_FORCE, initproc))) { - printf("unmount of root filesystem failed ("); - if (error == EBUSY) - printf("BUSY)\n"); - else - printf("%d)\n", error); - } - mp->mnt_flag &= ~MNT_UNMOUNT; - vfs_unbusy(mp); -} - -/* - * Unmount all filesystems. Should only be called by halt(). + * Find an appropriate filesystem to use for the root. If a filesystem + * has not been preselected, walk through the list of known filesystems + * trying those that have mountroot routines, and try them until one + * works or we have tried them all. */ -void -vfs_unmountall() +#ifdef notdef /* XXX JH */ +int +lite2_vfs_mountroot(void) { - struct mount *mp, *nmp, *rootfs = NULL; + struct vfsconf *vfsp; + extern int (*lite2_mountroot)(void); int error; - /* unmount all but rootfs */ - for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { - nmp = mp->mnt_list.cqe_prev; - - if (mp->mnt_flag & MNT_ROOTFS) { - rootfs = mp; + if (lite2_mountroot != NULL) + return ((*lite2_mountroot)()); + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { + if (vfsp->vfc_mountroot == NULL) continue; - } - error = dounmount(mp, MNT_FORCE, initproc); - if (error) { - printf("unmount of %s failed (", mp->mnt_stat.f_mntonname); - if (error == EBUSY) - printf("BUSY)\n"); - else - printf("%d)\n", error); - } - } - - /* and finally... */ - if (rootfs) { - vfs_unmountroot(rootfs); - } else { - printf("no root filesystem\n"); + if ((error = (*vfsp->vfc_mountroot)()) == 0) + return (0); + printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); } + return (ENODEV); } +#endif /* * Lookup a mount point by filesystem identifier. */ struct mount * -getvfs(fsid) +vfs_getvfs(fsid) fsid_t *fsid; { register struct mount *mp; + simple_lock(&mountlist_slock); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = mp->mnt_list.cqe_next) { if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && - mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) + mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { + simple_unlock(&mountlist_slock); return (mp); + } } + simple_unlock(&mountlist_slock); return ((struct mount *) 0); } @@ -282,14 +277,16 @@ getvfs(fsid) * Get a new unique fsid */ void -getnewfsid(mp, mtype) +vfs_getnewfsid(mp) struct mount *mp; - int mtype; { static u_short xxxfs_mntid; fsid_t tfsid; + int mtype; + simple_lock(&mntid_slock); + mtype = mp->mnt_vfc->vfc_typenum; mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); mp->mnt_stat.f_fsid.val[1] = mtype; if (xxxfs_mntid == 0) @@ -297,12 +294,13 @@ getnewfsid(mp, mtype) tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); tfsid.val[1] = mtype; if (mountlist.cqh_first != (void *)&mountlist) { - while (getvfs(&tfsid)) { + while (vfs_getvfs(&tfsid)) { tfsid.val[0]++; xxxfs_mntid++; } } mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; + simple_unlock(&mntid_slock); } /* @@ -326,6 +324,35 @@ vattr_null(vap) vap->va_vaflags = 0; } +void +vfs_unmountroot(struct mount *rootfs) +{ + struct proc *p = curproc; /* XXX */ + struct mount *mp = rootfs; + int error; + + if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { + printf("failed to unmount root\n"); + return; + } + mp->mnt_flag |= MNT_UNMOUNT; + vnode_pager_umount(mp); /* release cached vnodes */ + cache_purgevfs(mp); /* remove cache entries for this file sys */ + + if ((error = VFS_SYNC(mp, MNT_WAIT, initproc->p_ucred, initproc))) + printf("sync of root filesystem failed (%d)\n", error); + + if ((error = VFS_UNMOUNT(mp, MNT_FORCE, initproc))) { + printf("unmount of root filesystem failed ("); + if (error == EBUSY) + printf("BUSY)\n"); + else + printf("%d)\n", error); + } + mp->mnt_flag &= ~MNT_UNMOUNT; + vfs_unbusy(mp, p); +} + /* * Routines having to do with the management of the vnode table. */ @@ -341,10 +368,11 @@ getnewvnode(tag, mp, vops, vpp) vop_t **vops; struct vnode **vpp; { - register struct vnode *vp; + struct proc *p = curproc; /* XXX */ + struct vnode *vp; + simple_lock(&vnode_free_list_slock); retry: - vp = vnode_free_list.tqh_first; /* * we allocate a new vnode if * 1. we don't have any free @@ -357,12 +385,31 @@ retry: */ if (freevnodes < (numvnodes >> 2) || numvnodes < desiredvnodes || - vp == NULL) { + vnode_free_list.tqh_first == NULL) { + simple_unlock(&vnode_free_list_slock); vp = (struct vnode *) malloc((u_long) sizeof *vp, M_VNODE, M_WAITOK); bzero((char *) vp, sizeof *vp); numvnodes++; } else { + for (vp = vnode_free_list.tqh_first; + vp != NULLVP; vp = vp->v_freelist.tqe_next) { + if (simple_lock_try(&vp->v_interlock)) + break; + } + /* + * Unless this is a bad time of the month, at most + * the first NCPUS items on the free list are + * locked, so this is close enough to being empty. + */ + if (vp == NULLVP) { + simple_unlock(&vnode_free_list_slock); + tablefull("vnode"); + *vpp = 0; + return (ENFILE); + } + if (vp->v_usecount) + panic("free vnode isn't"); TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); if (vp->v_usage > 0) { --vp->v_usage; @@ -370,14 +417,16 @@ retry: goto retry; } freevnodes--; - if (vp->v_usecount) - panic("free vnode isn't"); /* see comment on why 0xdeadb is set at end of vgone (below) */ vp->v_freelist.tqe_prev = (struct vnode **) 0xdeadb; + simple_unlock(&vnode_free_list_slock); vp->v_lease = NULL; if (vp->v_type != VBAD) - vgone(vp); + vgonel(vp, p); + else { + simple_unlock(&vp->v_interlock); + } #ifdef DIAGNOSTIC { @@ -421,6 +470,7 @@ insmntque(vp, mp) register struct mount *mp; { + simple_lock(&mntvnode_slock); /* * Delete from old mount point vnode list, if on one. */ @@ -429,9 +479,12 @@ insmntque(vp, mp) /* * Insert into list of vnodes for the new mount point, if available. */ - if ((vp->v_mount = mp) == NULL) + if ((vp->v_mount = mp) == NULL) { + simple_unlock(&mntvnode_slock); return; + } LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); + simple_unlock(&mntvnode_slock); } /* @@ -723,7 +776,8 @@ checkalias(nvp, nvp_rdev, mp) dev_t nvp_rdev; struct mount *mp; { - register struct vnode *vp; + struct proc *p = curproc; /* XXX */ + struct vnode *vp; struct vnode **vpp; if (nvp->v_type != VBLK && nvp->v_type != VCHR) @@ -731,18 +785,24 @@ checkalias(nvp, nvp_rdev, mp) vpp = &speclisth[SPECHASH(nvp_rdev)]; loop: + simple_lock(&spechash_slock); for (vp = *vpp; vp; vp = vp->v_specnext) { if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) continue; /* * Alias, but not in use, so flush it out. */ + simple_lock(&vp->v_interlock); if (vp->v_usecount == 0) { - vgone(vp); + simple_unlock(&spechash_slock); + vgonel(vp, p); goto loop; } - if (vget(vp, 1)) + simple_unlock(&spechash_slock); + if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { goto loop; + } + simple_lock(&spechash_slock); break; } @@ -753,16 +813,19 @@ loop: nvp->v_hashchain = vpp; nvp->v_specnext = *vpp; nvp->v_specflags = 0; + simple_unlock(&spechash_slock); *vpp = nvp; - if (vp != NULL) { + if (vp != NULLVP) { nvp->v_flag |= VALIASED; vp->v_flag |= VALIASED; vput(vp); } return (NULLVP); } - VOP_UNLOCK(vp); - vclean(vp, 0); + simple_unlock(&spechash_slock); + VOP_UNLOCK(vp, 0, p); + simple_lock(&vp->v_interlock); + vclean(vp, 0, p); vp->v_op = nvp->v_op; vp->v_tag = nvp->v_tag; nvp->v_type = VNON; @@ -779,47 +842,162 @@ loop: * been changed to a new file system type). */ int -vget(vp, lockflag) +vget(vp, flags, p) register struct vnode *vp; - int lockflag; + int flags; + struct proc *p; { + int error; /* - * If the vnode is in the process of being cleaned out for another - * use, we wait for the cleaning to finish and then return failure. - * Cleaning is determined either by checking that the VXLOCK flag is - * set, or that the use count is zero with the back pointer set to - * show that it has been removed from the free list by getnewvnode. - * The VXLOCK flag may not have been set yet because vclean is blocked - * in the VOP_LOCK call waiting for the VOP_INACTIVE to complete. + * If the vnode is in the process of being cleaned out for + * another use, we wait for the cleaning to finish and then + * return failure. Cleaning is determined by checking that + * the VXLOCK flag is set. */ - if ((vp->v_flag & VXLOCK) || - (vp->v_usecount == 0 && - vp->v_freelist.tqe_prev == (struct vnode **) 0xdeadb)) { + if ((flags & LK_INTERLOCK) == 0) { + simple_lock(&vp->v_interlock); + } + if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; - (void) tsleep((caddr_t) vp, PINOD, "vget", 0); - return (1); + simple_unlock(&vp->v_interlock); + tsleep((caddr_t)vp, PINOD, "vget", 0); + return (ENOENT); } if (vp->v_usecount == 0) { + simple_lock(&vnode_free_list_slock); TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); + simple_unlock(&vnode_free_list_slock); freevnodes--; } vp->v_usecount++; - /* * Create the VM object, if needed */ if ((vp->v_type == VREG) && ((vp->v_object == NULL) || (vp->v_object->flags & OBJ_VFS_REF) == 0)) { + /* + * XXX vfs_object_create probably needs the interlock. + */ + simple_unlock(&vp->v_interlock); vfs_object_create(vp, curproc, curproc->p_ucred, 0); + simple_lock(&vp->v_interlock); + } + if (flags & LK_TYPE_MASK) { + if (error = vn_lock(vp, flags | LK_INTERLOCK, p)) + vrele(vp); + return (error); } - if (lockflag) - VOP_LOCK(vp); + simple_unlock(&vp->v_interlock); + return (0); +} + +/* + * Stubs to use when there is no locking to be done on the underlying object. + * A minimal shared lock is necessary to ensure that the underlying object + * is not revoked while an operation is in progress. So, an active shared + * count is maintained in an auxillary vnode lock structure. + */ +int +vop_nolock(ap) + struct vop_lock_args /* { + struct vnode *a_vp; + int a_flags; + struct proc *a_p; + } */ *ap; +{ +#ifdef notyet + /* + * This code cannot be used until all the non-locking filesystems + * (notably NFS) are converted to properly lock and release nodes. + * Also, certain vnode operations change the locking state within + * the operation (create, mknod, remove, link, rename, mkdir, rmdir, + * and symlink). Ideally these operations should not change the + * lock state, but should be changed to let the caller of the + * function unlock them. Otherwise all intermediate vnode layers + * (such as union, umapfs, etc) must catch these functions to do + * the necessary locking at their layer. Note that the inactive + * and lookup operations also change their lock state, but this + * cannot be avoided, so these two operations will always need + * to be handled in intermediate layers. + */ + struct vnode *vp = ap->a_vp; + int vnflags, flags = ap->a_flags; + if (vp->v_vnlock == NULL) { + if ((flags & LK_TYPE_MASK) == LK_DRAIN) + return (0); + MALLOC(vp->v_vnlock, struct lock *, sizeof(struct lock), + M_VNODE, M_WAITOK); + lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); + } + switch (flags & LK_TYPE_MASK) { + case LK_DRAIN: + vnflags = LK_DRAIN; + break; + case LK_EXCLUSIVE: + case LK_SHARED: + vnflags = LK_SHARED; + break; + case LK_UPGRADE: + case LK_EXCLUPGRADE: + case LK_DOWNGRADE: + return (0); + case LK_RELEASE: + default: + panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK); + } + if (flags & LK_INTERLOCK) + vnflags |= LK_INTERLOCK; + return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p)); +#else /* for now */ + /* + * Since we are not using the lock manager, we must clear + * the interlock here. + */ + if (ap->a_flags & LK_INTERLOCK) { + simple_unlock(&ap->a_vp->v_interlock); + } return (0); +#endif +} + +/* + * Decrement the active use count. + */ +int +vop_nounlock(ap) + struct vop_unlock_args /* { + struct vnode *a_vp; + int a_flags; + struct proc *a_p; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + + if (vp->v_vnlock == NULL) + return (0); + return (lockmgr(vp->v_vnlock, LK_RELEASE, NULL, ap->a_p)); +} + +/* + * Return whether or not the node is in use. + */ +int +vop_noislocked(ap) + struct vop_islocked_args /* { + struct vnode *a_vp; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + + if (vp->v_vnlock == NULL) + return (0); + return (lockstatus(vp->v_vnlock)); } +/* #ifdef DIAGNOSTIC */ /* * Vnode reference, just increment the count */ @@ -827,6 +1005,7 @@ void vref(vp) struct vnode *vp; { + simple_lock(&vp->v_interlock); if (vp->v_usecount <= 0) panic("vref used where vget required"); @@ -840,8 +1019,11 @@ vref(vp) * the object is created. This is necessary to * keep the system from re-entrantly doing it * multiple times. + * XXX vfs_object_create probably needs the interlock? */ + simple_unlock(&vp->v_interlock); vfs_object_create(vp, curproc, curproc->p_ucred, 0); + simple_lock(&vp->v_interlock); } } @@ -850,9 +1032,9 @@ vref(vp) */ void vput(vp) - register struct vnode *vp; + struct vnode *vp; { - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, curproc); vrele(vp); } @@ -862,33 +1044,38 @@ vput(vp) */ void vrele(vp) - register struct vnode *vp; + struct vnode *vp; { + struct proc *p = curproc; /* XXX */ #ifdef DIAGNOSTIC if (vp == NULL) panic("vrele: null vp"); #endif - + simple_lock(&vp->v_interlock); vp->v_usecount--; if ((vp->v_usecount == 1) && vp->v_object && (vp->v_object->flags & OBJ_VFS_REF)) { vp->v_object->flags &= ~OBJ_VFS_REF; + simple_unlock(&vp->v_interlock); vm_object_deallocate(vp->v_object); return; } - if (vp->v_usecount > 0) + if (vp->v_usecount > 0) { + simple_unlock(&vp->v_interlock); return; + } if (vp->v_usecount < 0) { #ifdef DIAGNOSTIC vprint("vrele: negative ref count", vp); #endif - panic("vrele: negative reference cnt"); + panic("vrele: negative ref cnt"); } + simple_lock(&vnode_free_list_slock); if (vp->v_flag & VAGE) { if(vp->v_tag != VT_TFS) TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); @@ -898,9 +1085,12 @@ vrele(vp) if(vp->v_tag != VT_TFS) TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); } + simple_unlock(&vnode_free_list_slock); + freevnodes++; - VOP_INACTIVE(vp); + if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) + VOP_INACTIVE(vp, p); } #ifdef DIAGNOSTIC @@ -912,7 +1102,9 @@ vhold(vp) register struct vnode *vp; { + simple_lock(&vp->v_interlock); vp->v_holdcnt++; + simple_unlock(&vp->v_interlock); } /* @@ -923,9 +1115,11 @@ holdrele(vp) register struct vnode *vp; { + simple_lock(&vp->v_interlock); if (vp->v_holdcnt <= 0) panic("holdrele: holdcnt"); vp->v_holdcnt--; + simple_unlock(&vp->v_interlock); } #endif /* DIAGNOSTIC */ @@ -948,11 +1142,11 @@ vflush(mp, skipvp, flags) struct vnode *skipvp; int flags; { - register struct vnode *vp, *nvp; + struct proc *p = curproc; /* XXX */ + struct vnode *vp, *nvp; int busy = 0; - if ((mp->mnt_flag & MNT_MPBUSY) == 0) - panic("vflush: not busy"); + simple_lock(&mntvnode_slock); loop: for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { /* @@ -967,24 +1161,34 @@ loop: */ if (vp == skipvp) continue; + + simple_lock(&vp->v_interlock); /* * Skip over a vnodes marked VSYSTEM. */ - if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) + if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { + simple_unlock(&vp->v_interlock); continue; + } /* * If WRITECLOSE is set, only flush out regular file vnodes * open for writing. */ if ((flags & WRITECLOSE) && - (vp->v_writecount == 0 || vp->v_type != VREG)) + (vp->v_writecount == 0 || vp->v_type != VREG)) { + simple_unlock(&vp->v_interlock); continue; + } if (vp->v_object && (vp->v_object->flags & OBJ_VFS_REF)) { + simple_unlock(&vp->v_interlock); + simple_unlock(&mntvnode_slock); vm_object_reference(vp->v_object); pager_cache(vp->v_object, FALSE); vp->v_object->flags &= ~OBJ_VFS_REF; vm_object_deallocate(vp->v_object); + simple_lock(&mntvnode_slock); + simple_lock(&vp->v_interlock); } /* @@ -992,7 +1196,9 @@ loop: * vnode data structures and we are done. */ if (vp->v_usecount == 0) { - vgone(vp); + simple_unlock(&mntvnode_slock); + vgonel(vp, p); + simple_lock(&mntvnode_slock); continue; } @@ -1002,21 +1208,25 @@ loop: * all other files, just kill them. */ if (flags & FORCECLOSE) { + simple_unlock(&mntvnode_slock); if (vp->v_type != VBLK && vp->v_type != VCHR) { - vgone(vp); + vgonel(vp, p); } else { - vclean(vp, 0); + vclean(vp, 0, p); vp->v_op = spec_vnodeop_p; insmntque(vp, (struct mount *) 0); } + simple_lock(&mntvnode_slock); continue; } #ifdef DIAGNOSTIC if (busyprt) vprint("vflush: busy vnode", vp); #endif + simple_unlock(&vp->v_interlock); busy++; } + simple_unlock(&mntvnode_slock); if (busy) return (EBUSY); return (0); @@ -1025,8 +1235,8 @@ loop: /* * Disassociate the underlying file system from a vnode. */ -void -vclean(struct vnode *vp, int flags) +static void +vclean(struct vnode *vp, int flags, struct proc *p) { int active; @@ -1036,15 +1246,7 @@ vclean(struct vnode *vp, int flags) * generate a race against ourselves to recycle it. */ if ((active = vp->v_usecount)) - VREF(vp); - /* - * Even if the count is zero, the VOP_INACTIVE routine may still have - * the object locked while it cleans it out. The VOP_LOCK ensures that - * the VOP_INACTIVE routine is done with its work. For active vnodes, - * it ensures that no other activity can occur while the underlying - * object is being cleaned out. - */ - VOP_LOCK(vp); + vp->v_usecount++; /* * Prevent the vnode from being recycled or brought into use while we * clean it out. @@ -1053,31 +1255,48 @@ vclean(struct vnode *vp, int flags) panic("vclean: deadlock"); vp->v_flag |= VXLOCK; /* - * Clean out any buffers associated with the vnode. + * Even if the count is zero, the VOP_INACTIVE routine may still + * have the object locked while it cleans it out. The VOP_LOCK + * ensures that the VOP_INACTIVE routine is done with its work. + * For active vnodes, it ensures that no other activity can + * occur while the underlying object is being cleaned out. */ - if (flags & DOCLOSE) - vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); + VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); /* - * Any other processes trying to obtain this lock must first wait for - * VXLOCK to clear, then call the new lock operation. + * Clean out any buffers associated with the vnode. */ - VOP_UNLOCK(vp); + if (flags & DOCLOSE) + vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); /* - * If purging an active vnode, it must be closed and deactivated - * before being reclaimed. + * If purging an active vnode, it must be closed and + * deactivated before being reclaimed. Note that the + * VOP_INACTIVE will unlock the vnode. */ if (active) { if (flags & DOCLOSE) - VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL); - VOP_INACTIVE(vp); + VOP_CLOSE(vp, IO_NDELAY, NOCRED, p); + VOP_INACTIVE(vp, p); + } else { + /* + * Any other processes trying to obtain this lock must first + * wait for VXLOCK to clear, then call the new lock operation. + */ + VOP_UNLOCK(vp, 0, p); } /* * Reclaim the vnode. */ - if (VOP_RECLAIM(vp)) + if (VOP_RECLAIM(vp, p)) panic("vclean: cannot reclaim"); if (active) vrele(vp); + cache_purge(vp); + if (vp->v_vnlock) { + if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) + vprint("vclean: lock not drained", vp); + FREE(vp->v_vnlock, M_VNODE); + vp->v_vnlock = NULL; + } /* * Done with purge, notify sleepers of the grim news. @@ -1092,46 +1311,91 @@ vclean(struct vnode *vp, int flags) } /* - * Eliminate all activity associated with the requested vnode + * Eliminate all activity associated with the requested vnode * and with all vnodes aliased to the requested vnode. */ -void -vgoneall(vp) - register struct vnode *vp; +int +vop_revoke(ap) + struct vop_revoke_args /* { + struct vnode *a_vp; + int a_flags; + } */ *ap; { - register struct vnode *vq; + struct vnode *vp, *vq; + struct proc *p = curproc; /* XXX */ + +#ifdef DIAGNOSTIC + if ((ap->a_flags & REVOKEALL) == 0) + panic("vop_revoke"); +#endif + + vp = ap->a_vp; + simple_lock(&vp->v_interlock); if (vp->v_flag & VALIASED) { /* - * If a vgone (or vclean) is already in progress, wait until - * it is done and return. + * If a vgone (or vclean) is already in progress, + * wait until it is done and return. */ if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; - (void) tsleep((caddr_t) vp, PINOD, "vgall", 0); - return; + simple_unlock(&vp->v_interlock); + tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); + return (0); } /* - * Ensure that vp will not be vgone'd while we are eliminating - * its aliases. + * Ensure that vp will not be vgone'd while we + * are eliminating its aliases. */ vp->v_flag |= VXLOCK; + simple_unlock(&vp->v_interlock); while (vp->v_flag & VALIASED) { + simple_lock(&spechash_slock); for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type || vp == vq) continue; + simple_unlock(&spechash_slock); vgone(vq); break; } + if (vq == NULLVP) { + simple_unlock(&spechash_slock); + } } /* - * Remove the lock so that vgone below will really eliminate - * the vnode after which time vgone will awaken any sleepers. + * Remove the lock so that vgone below will + * really eliminate the vnode after which time + * vgone will awaken any sleepers. */ + simple_lock(&vp->v_interlock); vp->v_flag &= ~VXLOCK; } - vgone(vp); + vgonel(vp, p); + return (0); +} + +/* + * Recycle an unused vnode to the front of the free list. + * Release the passed interlock if the vnode will be recycled. + */ +int +vrecycle(vp, inter_lkp, p) + struct vnode *vp; + struct simplelock *inter_lkp; + struct proc *p; +{ + + simple_lock(&vp->v_interlock); + if (vp->v_usecount == 0) { + if (inter_lkp) { + simple_unlock(inter_lkp); + } + vgonel(vp, p); + return (1); + } + simple_unlock(&vp->v_interlock); + return (0); } /* @@ -1142,16 +1406,31 @@ void vgone(vp) register struct vnode *vp; { - register struct vnode *vq; + struct proc *p = curproc; /* XXX */ + + simple_lock(&vp->v_interlock); + vgonel(vp, p); +} + +/* + * vgone, with the vp interlock held. + */ +void +vgonel(vp, p) + struct vnode *vp; + struct proc *p; +{ + struct vnode *vq; struct vnode *vx; /* - * If a vgone (or vclean) is already in progress, wait until it is - * done and return. + * If a vgone (or vclean) is already in progress, + * wait until it is done and return. */ if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; - (void) tsleep((caddr_t) vp, PINOD, "vgone", 0); + simple_unlock(&vp->v_interlock); + tsleep((caddr_t)vp, PINOD, "vgone", 0); return; } @@ -1162,18 +1441,18 @@ vgone(vp) /* * Clean out the filesystem specific data. */ - vclean(vp, DOCLOSE); + vclean(vp, DOCLOSE, p); /* * Delete from old mount point vnode list, if on one. */ - if (vp->v_mount != NULL) { - LIST_REMOVE(vp, v_mntvnodes); - vp->v_mount = NULL; - } + if (vp->v_mount != NULL) + insmntque(vp, (struct mount *)0); /* - * If special device, remove it from special device alias list. + * If special device, remove it from special device alias list + * if it is on one. */ - if (vp->v_type == VBLK || vp->v_type == VCHR) { + if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { + simple_lock(&spechash_slock); if (*vp->v_hashchain == vp) { *vp->v_hashchain = vp->v_specnext; } else { @@ -1202,28 +1481,34 @@ vgone(vp) vx->v_flag &= ~VALIASED; vp->v_flag &= ~VALIASED; } + simple_unlock(&spechash_slock); FREE(vp->v_specinfo, M_VNODE); vp->v_specinfo = NULL; } + /* - * If it is on the freelist and not already at the head, move it to - * the head of the list. The test of the back pointer and the - * reference count of zero is because it will be removed from the free - * list by getnewvnode, but will not have its reference count - * incremented until after calling vgone. If the reference count were - * incremented first, vgone would (incorrectly) try to close the - * previous instance of the underlying object. So, the back pointer is - * explicitly set to `0xdeadb' in getnewvnode after removing it from - * the freelist to ensure that we do not try to move it here. + * If it is on the freelist and not already at the head, + * move it to the head of the list. The test of the back + * pointer and the reference count of zero is because + * it will be removed from the free list by getnewvnode, + * but will not have its reference count incremented until + * after calling vgone. If the reference count were + * incremented first, vgone would (incorrectly) try to + * close the previous instance of the underlying object. + * So, the back pointer is explicitly set to `0xdeadb' in + * getnewvnode after removing it from the freelist to ensure + * that we do not try to move it here. */ - if (vp->v_usecount == 0 && - vp->v_freelist.tqe_prev != (struct vnode **) 0xdeadb && - vnode_free_list.tqh_first != vp) { - if(vp->v_tag != VT_TFS) { + if (vp->v_usecount == 0) { + simple_lock(&vnode_free_list_slock); + if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && + vnode_free_list.tqh_first != vp) { TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); } + simple_unlock(&vnode_free_list_slock); } + vp->v_type = VBAD; } @@ -1254,7 +1539,7 @@ int vcount(vp) register struct vnode *vp; { - register struct vnode *vq, *vnext; + struct vnode *vq, *vnext; int count; loop: @@ -1354,6 +1639,7 @@ int kinfo_vgetfailed; static int sysctl_vnode SYSCTL_HANDLER_ARGS { + struct proc *p = curproc; /* XXX */ register struct mount *mp, *nmp; struct vnode *vp; int error; @@ -1368,7 +1654,7 @@ sysctl_vnode SYSCTL_HANDLER_ARGS for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { nmp = mp->mnt_list.cqe_next; - if (vfs_busy(mp)) + if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) continue; again: for (vp = mp->mnt_vnodelist.lh_first; @@ -1386,11 +1672,11 @@ again: } if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) || (error = SYSCTL_OUT(req, vp, VNODESZ))) { - vfs_unbusy(mp); + vfs_unbusy(mp, p); return (error); } } - vfs_unbusy(mp); + vfs_unbusy(mp, p); } return (0); @@ -1404,22 +1690,63 @@ SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, */ int vfs_mountedon(vp) - register struct vnode *vp; + struct vnode *vp; { - register struct vnode *vq; + struct vnode *vq; + int error = 0; if (vp->v_specflags & SI_MOUNTEDON) return (EBUSY); if (vp->v_flag & VALIASED) { + simple_lock(&spechash_slock); for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) continue; - if (vq->v_specflags & SI_MOUNTEDON) - return (EBUSY); + if (vq->v_specflags & SI_MOUNTEDON) { + error = EBUSY; + break; + } } + simple_unlock(&spechash_slock); + } + return (error); +} + +/* + * Unmount all filesystems. The list is traversed in reverse order + * of mounting to avoid dependencies. Should only be called by halt(). + */ +void +vfs_unmountall() +{ + struct mount *mp, *nmp, *rootfs = NULL; + int error; + + /* unmount all but rootfs */ + for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { + nmp = mp->mnt_list.cqe_prev; + + if (mp->mnt_flag & MNT_ROOTFS) { + rootfs = mp; + continue; + } + error = dounmount(mp, MNT_FORCE, initproc); + if (error) { + printf("unmount of %s failed (", mp->mnt_stat.f_mntonname); + if (error == EBUSY) + printf("BUSY)\n"); + else + printf("%d)\n", error); + } + } + + /* and finally... */ + if (rootfs) { + vfs_unmountroot(rootfs); + } else { + printf("no root filesystem\n"); } - return (0); } /* @@ -1565,8 +1892,8 @@ vfs_export_lookup(mp, nep, nam) rnh = nep->ne_rtable[saddr->sa_family]; if (rnh != NULL) { np = (struct netcred *) - (*rnh->rnh_matchaddr) ((caddr_t) saddr, - rnh); + (*rnh->rnh_matchaddr)((caddr_t)saddr, + rnh); if (np && np->netc_rnodes->rn_flags & RNF_ROOT) np = NULL; } @@ -1580,7 +1907,6 @@ vfs_export_lookup(mp, nep, nam) return (np); } - /* * perform msync on all vnodes under a mount point * the mount point must be locked. @@ -1639,10 +1965,10 @@ retry: } else { if (object->flags & OBJ_DEAD) { if (waslocked) - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); tsleep(object, PVM, "vodead", 0); if (waslocked) - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); goto retry; } if ((object->flags & OBJ_VFS_REF) == 0) { |