diff options
author | dyson <dyson@FreeBSD.org> | 1997-02-10 02:22:35 +0000 |
---|---|---|
committer | dyson <dyson@FreeBSD.org> | 1997-02-10 02:22:35 +0000 |
commit | 10f666af84d48e89e4e2960415c9b616fce4077f (patch) | |
tree | 88a944de263165091f0a18abeedbaaccec532407 /sys/kern | |
parent | 0960d7e91af3428ffba89b42228d82d8afaa0389 (diff) | |
download | FreeBSD-src-10f666af84d48e89e4e2960415c9b616fce4077f.zip FreeBSD-src-10f666af84d48e89e4e2960415c9b616fce4077f.tar.gz |
This is the kernel Lite/2 commit. There are some requisite userland
changes, so don't expect to be able to run the kernel as-is (very well)
without the appropriate Lite/2 userland changes.
The system boots and can mount UFS filesystems.
Untested: ext2fs, msdosfs, NFS
Known problems: Incorrect Berkeley ID strings in some files.
Mount_std mounts will not work until the getfsent
library routine is changed.
Reviewed by: various people
Submitted by: Jeffery Hsu <hsu@freebsd.org>
Diffstat (limited to 'sys/kern')
43 files changed, 3555 insertions, 1808 deletions
diff --git a/sys/kern/imgact_aout.c b/sys/kern/imgact_aout.c index 71311eb..0730253 100644 --- a/sys/kern/imgact_aout.c +++ b/sys/kern/imgact_aout.c @@ -43,7 +43,7 @@ #include <vm/vm.h> #include <vm/vm_param.h> #include <vm/vm_prot.h> -#include <vm/lock.h> +#include <sys/lock.h> #include <vm/pmap.h> #include <vm/vm_map.h> #include <vm/vm_extern.h> diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index edc4c45..47caf98 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -55,7 +55,7 @@ #include <vm/vm_kern.h> #include <vm/vm_param.h> #include <vm/pmap.h> -#include <vm/lock.h> +#include <sys/lock.h> #include <vm/vm_map.h> #include <vm/vm_prot.h> #include <vm/vm_extern.h> @@ -358,7 +358,7 @@ elf_load_file(struct proc *p, char *file, u_long *addr, u_long *entry) /* * No longer need this, and it prevents demand paging. */ - VOP_UNLOCK(nd.ni_vp); + VOP_UNLOCK(nd.ni_vp, 0, p); if (error) goto fail; diff --git a/sys/kern/imgact_gzip.c b/sys/kern/imgact_gzip.c index 3156a86..9186749 100644 --- a/sys/kern/imgact_gzip.c +++ b/sys/kern/imgact_gzip.c @@ -39,7 +39,7 @@ #include <vm/vm.h> #include <vm/vm_param.h> #include <vm/vm_prot.h> -#include <vm/lock.h> +#include <sys/lock.h> #include <vm/pmap.h> #include <vm/vm_map.h> #include <vm/vm_kern.h> diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index 24c8ffb..589a4c5 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -66,7 +66,7 @@ #include <vm/vm.h> #include <vm/vm_param.h> #include <vm/vm_prot.h> -#include <vm/lock.h> +#include <sys/lock.h> #include <vm/pmap.h> #include <vm/vm_map.h> #include <sys/user.h> @@ -468,16 +468,16 @@ sched_setup(dummy) SYSINIT(sched_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, sched_setup, NULL) /* ARGSUSED*/ -static void xxx_vfs_mountroot __P((void *dummy)); +static void xxx_vfs_mountroot __P((void *fsnamep)); static void -xxx_vfs_mountroot(dummy) - void *dummy; +xxx_vfs_mountroot(fsnamep) + void *fsnamep; { /* Mount the root file system. */ - if ((*mountroot)(mountrootvfsops)) + if (vfs_mountrootfs(*((char **) fsnamep))) panic("cannot mount root"); } -SYSINIT(mountroot, SI_SUB_ROOT, SI_ORDER_FIRST, xxx_vfs_mountroot, NULL) +SYSINIT(mountroot, SI_SUB_ROOT, SI_ORDER_FIRST, xxx_vfs_mountroot, &mountrootfsname) /* ARGSUSED*/ static void xxx_vfs_root_fdtab __P((void *dummy)); @@ -492,7 +492,7 @@ xxx_vfs_root_fdtab(dummy) panic("cannot find root vnode"); fdp->fd_fd.fd_cdir = rootvnode; VREF(fdp->fd_fd.fd_cdir); - VOP_UNLOCK(rootvnode); + VOP_UNLOCK(rootvnode, 0, &proc0); fdp->fd_fd.fd_rdir = NULL; } SYSINIT(retrofit, SI_SUB_ROOT_FDTAB, SI_ORDER_FIRST, xxx_vfs_root_fdtab, NULL) diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c index 9f41c00..dc5978d 100644 --- a/sys/kern/init_sysent.c +++ b/sys/kern/init_sysent.c @@ -238,7 +238,7 @@ struct sysent sysent[] = { { 2, (sy_call_t *)mlock }, /* 203 = mlock */ { 2, (sy_call_t *)munlock }, /* 204 = munlock */ { 2, (sy_call_t *)utrace }, /* 205 = utrace */ - { 0, (sy_call_t *)nosys }, /* 206 = nosys */ + { 1, (sy_call_t *)undelete }, /* 206 = undelete */ { 0, (sy_call_t *)nosys }, /* 207 = nosys */ { 0, (sy_call_t *)nosys }, /* 208 = nosys */ { 0, (sy_call_t *)nosys }, /* 209 = nosys */ diff --git a/sys/kern/kern_acct.c b/sys/kern/kern_acct.c index 2704347..59d0d35 100644 --- a/sys/kern/kern_acct.c +++ b/sys/kern/kern_acct.c @@ -49,6 +49,7 @@ #include <sys/file.h> #include <sys/syslog.h> #include <sys/kernel.h> +#include <sys/sysent.h> #include <sys/sysctl.h> #include <sys/namei.h> #include <sys/errno.h> @@ -101,18 +102,15 @@ SYSCTL_INT(_kern, OID_AUTO, acct_chkfreq, CTLFLAG_RW, * Accounting system call. Written based on the specification and * previous implementation done by Mark Tinguely. */ -#ifndef _SYS_SYSPROTO_H_ -struct acct_args { - char *path; -}; - -#endif int -acct(p, uap, retval) - struct proc *p; - struct acct_args *uap; - int *retval; +acct(a1, uap, a3) + struct proc *a1; + struct acct_args /* { + syscallarg(char *) path; + } */ *uap; + int *a3; { + struct proc *p = curproc; /* XXX */ struct nameidata nd; int error; @@ -125,12 +123,13 @@ acct(p, uap, retval) * If accounting is to be started to a file, open that file for * writing and make sure it's a 'normal'. */ - if (uap->path != NULL) { - NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, p); + if (SCARG(uap, path) != NULL) { + NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), + p); error = vn_open(&nd, FWRITE, 0); if (error) return (error); - VOP_UNLOCK(nd.ni_vp); + VOP_UNLOCK(nd.ni_vp, 0, p); if (nd.ni_vp->v_type != VREG) { vn_close(nd.ni_vp, FWRITE, p->p_ucred, p); return (EACCES); @@ -147,7 +146,7 @@ acct(p, uap, retval) p->p_ucred, p); acctp = savacctp = NULLVP; } - if (uap->path == NULL) + if (SCARG(uap, path) == NULL) return (error); /* @@ -228,7 +227,7 @@ acct_process(p) /* * Now, just write the accounting information to the file. */ - LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); return (vn_rdwr(UIO_WRITE, vp, (caddr_t)&acct, sizeof (acct), (off_t)0, UIO_SYSSPACE, IO_APPEND|IO_UNIT, p->p_ucred, (int *)0, p)); @@ -298,7 +297,9 @@ acctwatch(a) savacctp = NULLVP; log(LOG_NOTICE, "Accounting resumed\n"); } - } else if (acctp != NULLVP) { + } else { + if (acctp == NULLVP) + return; if (acctp->v_type == VBAD) { (void) vn_close(acctp, FWRITE, NOCRED, NULL); acctp = NULLVP; @@ -310,7 +311,6 @@ acctwatch(a) acctp = NULLVP; log(LOG_NOTICE, "Accounting suspended\n"); } - } else - return; + } timeout(acctwatch, NULL, acctchkfreq * hz); } diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c index 6b958df..938e3df 100644 --- a/sys/kern/kern_clock.c +++ b/sys/kern/kern_clock.c @@ -70,7 +70,7 @@ #include <vm/vm.h> #include <vm/vm_param.h> #include <vm/vm_prot.h> -#include <vm/lock.h> +#include <sys/lock.h> #include <vm/pmap.h> #include <vm/vm_map.h> #include <sys/sysctl.h> diff --git a/sys/kern/kern_conf.c b/sys/kern/kern_conf.c index 62deb52..c6a9e14 100644 --- a/sys/kern/kern_conf.c +++ b/sys/kern/kern_conf.c @@ -192,6 +192,14 @@ void bdevsw_add_generic(int bdev, int cdev, struct bdevsw *bdevsw) { dev_t dev; + /* + * XXX hack alert. + */ + if (isdisk(makedev(bdev, 0), VBLK) && bdevsw->d_flags != D_DISK) { + printf("bdevsw_add_generic: adding D_DISK flag for device %d\n", + bdev); + bdevsw->d_flags = D_DISK; + } cdevsw_make(bdevsw); dev = makedev(cdev, 0); cdevsw_add(&dev, bdevsw->d_cdev, NULL); diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 1a27a34..2d4247c 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -51,7 +51,7 @@ #include <vm/vm.h> #include <vm/vm_param.h> #include <vm/vm_prot.h> -#include <vm/lock.h> +#include <sys/lock.h> #include <vm/pmap.h> #include <vm/vm_map.h> #include <vm/vm_kern.h> @@ -162,7 +162,7 @@ interpret: * Lose the lock on the vnode. It's no longer needed, and must not * exist for the pagefault paging to work below. */ - VOP_UNLOCK(imgp->vp); + VOP_UNLOCK(imgp->vp, 0, p); if (error) goto exec_fail_dealloc; diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 82c1f8b..f65419f 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -1,4 +1,4 @@ -/*- +/* * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. @@ -73,7 +73,7 @@ #include <vm/vm.h> #include <vm/vm_param.h> #include <vm/vm_prot.h> -#include <vm/lock.h> +#include <sys/lock.h> #include <vm/pmap.h> #include <vm/vm_map.h> #include <vm/vm_kern.h> @@ -208,7 +208,7 @@ exit1(p, rv) * if we blocked. */ if (sp->s_ttyvp) - vgoneall(sp->s_ttyvp); + VOP_REVOKE(sp->s_ttyvp, REVOKEALL); } if (sp->s_ttyvp) vrele(sp->s_ttyvp); diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index 572cb60..0de5272 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -56,7 +56,7 @@ #include <vm/vm.h> #include <vm/vm_param.h> -#include <vm/lock.h> +#include <sys/lock.h> #include <vm/pmap.h> #include <vm/vm_map.h> #include <vm/vm_extern.h> diff --git a/sys/kern/kern_ktrace.c b/sys/kern/kern_ktrace.c index e9f4792..3ed8013 100644 --- a/sys/kern/kern_ktrace.c +++ b/sys/kern/kern_ktrace.c @@ -271,7 +271,7 @@ ktrace(curp, uap, retval) return (error); } vp = nd.ni_vp; - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, curp); if (vp->v_type != VREG) { (void) vn_close(vp, FREAD|FWRITE, curp->p_ucred, curp); curp->p_traceflag &= ~KTRFAC_ACTIVE; @@ -478,9 +478,9 @@ ktrwrite(vp, kth) aiov[1].iov_len = kth->ktr_len; auio.uio_resid += kth->ktr_len; } - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_WRITE(vp, &auio, IO_UNIT|IO_APPEND, p->p_ucred); - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); if (!error) return; /* diff --git a/sys/kern/kern_lkm.c b/sys/kern/kern_lkm.c index c2dad16..33c5ef3 100644 --- a/sys/kern/kern_lkm.c +++ b/sys/kern/kern_lkm.c @@ -604,8 +604,11 @@ _lkm_vfs(lkmtp, cmd) { struct lkm_vfs *args = lkmtp->private.lkm_vfs; struct vfsconf *vfc = args->lkm_vfsconf; + struct vfsconf *vfsp, *lastvfsp, *prev_vfsp, *new_vfc; int i; int err = 0; + char fstypename[MFSNAMELEN]; + int neednamesearch = 1; switch(cmd) { case LKM_E_LOAD: @@ -613,26 +616,51 @@ _lkm_vfs(lkmtp, cmd) if (lkmexists(lkmtp)) return(EEXIST); - for(i = 0; i < MOUNT_MAXTYPE; i++) { - if(!strcmp(vfc->vfc_name, vfsconf[i]->vfc_name)) { - return EEXIST; - } - } - - i = args->lkm_offset = vfc->vfc_index; - if (i < 0) { - for (i = MOUNT_MAXTYPE - 1; i >= 0; i--) { - if(vfsconf[i] == &void_vfsconf) + /* check to see if filesystem already exists */ + vfsp = NULL; +#ifdef COMPAT_43 /* see vfs_syscalls.c:mount() */ + if (vfc->vfc_typenum < maxvfsconf) { + neednamesearch = 0; + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) + if (vfsp->vfc_typenum == vfc->vfc_typenum) break; + if (vfsp != NULL) { + neednamesearch = 1; + strncpy(fstypename, vfsp->vfc_name, MFSNAMELEN); + } + } else +#endif /* COMPAT_43 */ + if (neednamesearch) { + strncpy(fstypename, vfc->vfc_name, MFSNAMELEN); + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { + if (!strcmp(vfsp->vfc_name, fstypename)) { + return EEXIST; + } } } + + i = args->lkm_offset = vfc->vfc_typenum; if (i < 0) { - return EINVAL; + i = maxvfsconf; } - args->lkm_offset = vfc->vfc_index = i; + args->lkm_offset = vfc->vfc_typenum = i; + + if (maxvfsconf <= vfsp->vfc_typenum) + maxvfsconf = vfsp->vfc_typenum + 1; + + /* find vfsconf tail */ + for (lastvfsp = vfsconf; lastvfsp->vfc_next; + lastvfsp = lastvfsp->vfc_next) ; - vfsconf[i] = vfc; - vfssw[i] = vfc->vfc_vfsops; + /* make copy */ +/* possible race condition if vfsconf changes while we wait XXX JH */ + MALLOC(new_vfc, struct vfsconf *, sizeof(struct vfsconf), + M_VFSCONF, M_WAITOK); + *new_vfc = *vfc; + vfc = new_vfc; + + lastvfsp->vfc_next = vfc; + vfc->vfc_next = NULL; /* like in vfs_op_init */ for(i = 0; args->lkm_vnodeops->ls_items[i]; i++) { @@ -645,7 +673,7 @@ _lkm_vfs(lkmtp, cmd) /* * Call init function for this VFS... */ - (*(vfssw[vfc->vfc_index]->vfs_init))(); + (*(vfsp->vfc_vfsops->vfs_init))(vfsp); /* done! */ break; @@ -654,13 +682,23 @@ _lkm_vfs(lkmtp, cmd) /* current slot... */ i = args->lkm_offset; - if (vfsconf[i]->vfc_refcount) { + prev_vfsp = NULL; + for (vfsp = vfsconf; vfsp; + prev_vfsp = vfsp, vfsp = vfsp->vfc_next) { + if (vfsp->vfc_typenum == vfc->vfc_typenum) + break; + } + if (vfsp == NULL) { + return EINVAL; + } + + if (vfsp->vfc_refcount) { return EBUSY; } - /* replace current slot contents with old contents */ - vfssw[i] = (struct vfsops *)0; - vfsconf[i] = &void_vfsconf; + FREE(vfsp, M_VFSCONF); + + prev_vfsp->vfc_next = vfsp->vfc_next; break; diff --git a/sys/kern/kern_lockf.c b/sys/kern/kern_lockf.c index aea3976..3491e63 100644 --- a/sys/kern/kern_lockf.c +++ b/sys/kern/kern_lockf.c @@ -52,16 +52,18 @@ * This variable controls the maximum number of processes that will * be checked in doing deadlock detection. */ -int maxlockdepth = MAXDEPTH; +static int maxlockdepth = MAXDEPTH; #ifdef LOCKF_DEBUG +#include <vm/vm.h> +#include <sys/sysctl.h> int lockf_debug = 0; +SYSCTL_INT(_debug, 4, lockf_debug, CTLFLAG_RW, &lockf_debug, 0, ""); #endif #define NOLOCKF (struct lockf *)0 #define SELF 0x1 #define OTHERS 0x2 -static void lf_addblock __P((struct lockf *, struct lockf *)); static int lf_clearlock __P((struct lockf *)); static int lf_findoverlap __P((struct lockf *, struct lockf *, int, struct lockf ***, struct lockf **)); @@ -138,10 +140,11 @@ lf_advlock(ap, head, size) lock->lf_start = start; lock->lf_end = end; lock->lf_id = ap->a_id; - lock->lf_head = head; +/* lock->lf_inode = ip; */ /* XXX JH */ lock->lf_type = fl->l_type; + lock->lf_head = head; lock->lf_next = (struct lockf *)0; - lock->lf_block = (struct lockf *)0; + TAILQ_INIT(&lock->lf_blkhd); lock->lf_flags = ap->a_flags; /* * Do the requested operation. @@ -252,7 +255,7 @@ lf_setlock(lock) * Remember who blocked us (for deadlock detection). */ lock->lf_next = block; - lf_addblock(block, lock); + TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block); #ifdef LOCKF_DEBUG if (lockf_debug & 1) { lf_print("lf_setlock: blocking on", block); @@ -260,19 +263,19 @@ lf_setlock(lock) } #endif /* LOCKF_DEBUG */ if ((error = tsleep((caddr_t)lock, priority, lockstr, 0))) { - /* - * Delete ourselves from the waiting to lock list. - */ - for (block = lock->lf_next; - block != NOLOCKF; - block = block->lf_block) { - if (block->lf_block != lock) - continue; - block->lf_block = block->lf_block->lf_block; - break; - } - free(lock, M_LOCKF); - return (error); + /* + * We may have been awakened by a signal (in + * which case we must remove ourselves from the + * blocked list) and/or by another process + * releasing a lock (in which case we have already + * been removed from the blocked list and our + * lf_next field set to NOLOCKF). + */ + if (lock->lf_next) + TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, + lf_block); + free(lock, M_LOCKF); + return (error); } } /* @@ -347,9 +350,12 @@ lf_setlock(lock) overlap->lf_type == F_WRLCK) { lf_wakelock(overlap); } else { - ltmp = lock->lf_block; - lock->lf_block = overlap->lf_block; - lf_addblock(lock, ltmp); + while (ltmp = overlap->lf_blkhd.tqh_first) { + TAILQ_REMOVE(&overlap->lf_blkhd, ltmp, + lf_block); + TAILQ_INSERT_TAIL(&lock->lf_blkhd, + ltmp, lf_block); + } } /* * Add the new lock if necessary and delete the overlap. @@ -645,34 +651,6 @@ lf_findoverlap(lf, lock, type, prev, overlap) } /* - * Add a lock to the end of the blocked list. - */ -static void -lf_addblock(blocklist, lock) - struct lockf *blocklist; - struct lockf *lock; -{ - register struct lockf *lf; - - if (lock == NOLOCKF) - return; -#ifdef LOCKF_DEBUG - if (lockf_debug & 2) { - lf_print("addblock: adding", lock); - lf_print("to blocked list of", blocklist); - } -#endif /* LOCKF_DEBUG */ - if ((lf = blocklist->lf_block) == NOLOCKF) { - blocklist->lf_block = lock; - return; - } - while (lf->lf_block != NOLOCKF) - lf = lf->lf_block; - lf->lf_block = lock; - return; -} - -/* * Split a lock and a contained region into * two or three locks as necessary. */ @@ -710,7 +688,7 @@ lf_split(lock1, lock2) MALLOC(splitlock, struct lockf *, sizeof *splitlock, M_LOCKF, M_WAITOK); bcopy((caddr_t)lock1, (caddr_t)splitlock, sizeof *splitlock); splitlock->lf_start = lock2->lf_end + 1; - splitlock->lf_block = NOLOCKF; + TAILQ_INIT(&splitlock->lf_blkhd); lock1->lf_end = lock2->lf_start - 1; /* * OK, now link it in @@ -727,28 +705,23 @@ static void lf_wakelock(listhead) struct lockf *listhead; { - register struct lockf *blocklist, *wakelock; - - blocklist = listhead->lf_block; - listhead->lf_block = NOLOCKF; - while (blocklist != NOLOCKF) { - wakelock = blocklist; - blocklist = blocklist->lf_block; - wakelock->lf_block = NOLOCKF; + register struct lockf *wakelock; + + while (wakelock = listhead->lf_blkhd.tqh_first) { + TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block); wakelock->lf_next = NOLOCKF; #ifdef LOCKF_DEBUG if (lockf_debug & 2) lf_print("lf_wakelock: awakening", wakelock); #endif /* LOCKF_DEBUG */ - wakeup((caddr_t)wakelock); - } + wakeup((caddr_t)wakelock); + } } #ifdef LOCKF_DEBUG /* * Print out a lock. */ -void lf_print(tag, lock) char *tag; register struct lockf *lock; @@ -767,18 +740,17 @@ lf_print(tag, lock) lock->lf_type == F_WRLCK ? "exclusive" : lock->lf_type == F_UNLCK ? "unlock" : "unknown", lock->lf_start, lock->lf_end); - if (lock->lf_block) - printf(" block 0x%x\n", lock->lf_block); + if (lock->lf_blkhd.tqh_first) + printf(" block 0x%x\n", lock->lf_blkhd.tqh_first); else printf("\n"); } -void lf_printlist(tag, lock) char *tag; struct lockf *lock; { - register struct lockf *lf; + register struct lockf *lf, *blk; printf("%s: Lock list for ino %d on dev <%d, %d>:\n", tag, lock->lf_inode->i_number, @@ -795,10 +767,23 @@ lf_printlist(tag, lock) lf->lf_type == F_WRLCK ? "exclusive" : lf->lf_type == F_UNLCK ? "unlock" : "unknown", lf->lf_start, lf->lf_end); - if (lf->lf_block) - printf(" block 0x%x\n", lf->lf_block); - else - printf("\n"); + for (blk = lf->lf_blkhd.tqh_first; blk; + blk = blk->lf_block.tqe_next) { + printf("\n\t\tlock request 0x%lx for ", blk); + if (blk->lf_flags & F_POSIX) + printf("proc %d", + ((struct proc *)(blk->lf_id))->p_pid); + else + printf("id 0x%x", blk->lf_id); + printf(", %s, start %d, end %d", + blk->lf_type == F_RDLCK ? "shared" : + blk->lf_type == F_WRLCK ? "exclusive" : + blk->lf_type == F_UNLCK ? "unlock" : + "unknown", blk->lf_start, blk->lf_end); + if (blk->lf_blkhd.tqh_first) + panic("lf_printlist: bad list"); + } + printf("\n"); } } #endif /* LOCKF_DEBUG */ diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index 2b162be..37d99e4 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -53,7 +53,7 @@ #include <vm/vm.h> #include <vm/vm_param.h> #include <vm/vm_prot.h> -#include <vm/lock.h> +#include <sys/lock.h> #include <vm/pmap.h> #include <vm/vm_map.h> #include <sys/user.h> diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c index 70d1551..d9381a6 100644 --- a/sys/kern/kern_resource.c +++ b/sys/kern/kern_resource.c @@ -53,7 +53,7 @@ #include <vm/vm.h> #include <vm/vm_param.h> #include <vm/vm_prot.h> -#include <vm/lock.h> +#include <sys/lock.h> #include <vm/pmap.h> #include <vm/vm_map.h> diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index 0b82597..0f6c9e5 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -67,7 +67,7 @@ #include <vm/vm.h> #include <vm/vm_param.h> #include <vm/vm_prot.h> -#include <vm/lock.h> +#include <sys/lock.h> #include <vm/pmap.h> #include <vm/vm_map.h> #include <sys/user.h> /* for coredump */ @@ -660,7 +660,7 @@ gsignal(pgid, signum) } /* - * Send a signal to a process group. If checktty is 1, + * Send a signal to a process group. If checktty is 1, * limit to members which have a controlling terminal. */ void @@ -1255,7 +1255,7 @@ coredump(p) } VATTR_NULL(&vattr); vattr.va_size = 0; - LEASE_CHECK(vp, p, cred, LEASE_WRITE); + VOP_LEASE(vp, p, cred, LEASE_WRITE); VOP_SETATTR(vp, &vattr, cred, p); p->p_acflag |= ACORE; bcopy(p, &p->p_addr->u_kproc.kp_proc, sizeof(struct proc)); @@ -1272,7 +1272,7 @@ coredump(p) (off_t)ctob(UPAGES) + ctob(vm->vm_dsize), UIO_USERSPACE, IO_NODELOCKED|IO_UNIT, cred, (int *) NULL, p); out: - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); error1 = vn_close(vp, FWRITE, cred, p); if (error == 0) error = error1; diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index d591c96..43f9e5d 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -529,9 +529,10 @@ mi_switch() register long s, u; struct timeval tv; -#ifdef DEBUG - if (p->p_simple_locks) - panic("sleep: holding simple lock"); +#ifdef SIMPLELOCK_DEBUG + if (p->p_simple_locks) { + printf("sleep: holding simple lock"); + } #endif /* * Compute the amount of time during which the current diff --git a/sys/kern/kern_tc.c b/sys/kern/kern_tc.c index 6b958df..938e3df 100644 --- a/sys/kern/kern_tc.c +++ b/sys/kern/kern_tc.c @@ -70,7 +70,7 @@ #include <vm/vm.h> #include <vm/vm_param.h> #include <vm/vm_prot.h> -#include <vm/lock.h> +#include <sys/lock.h> #include <vm/pmap.h> #include <vm/vm_map.h> #include <sys/sysctl.h> diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c index 82b03d5..baa8789 100644 --- a/sys/kern/kern_time.c +++ b/sys/kern/kern_time.c @@ -139,7 +139,9 @@ settimeofday(p, uap, retval) for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) if (timerisset(&p->p_realtimer.it_value)) timevaladd(&p->p_realtimer.it_value, &delta); - LEASE_UPDATETIME(delta.tv_sec); +# ifdef NFS + lease_updatetime(delta.tv_sec); +# endif splx(s); resettodr(); } diff --git a/sys/kern/kern_timeout.c b/sys/kern/kern_timeout.c index 6b958df..938e3df 100644 --- a/sys/kern/kern_timeout.c +++ b/sys/kern/kern_timeout.c @@ -70,7 +70,7 @@ #include <vm/vm.h> #include <vm/vm_param.h> #include <vm/vm_prot.h> -#include <vm/lock.h> +#include <sys/lock.h> #include <vm/pmap.h> #include <vm/vm_map.h> #include <sys/sysctl.h> diff --git a/sys/kern/subr_diskslice.c b/sys/kern/subr_diskslice.c index c88ffe7..303e33b 100644 --- a/sys/kern/subr_diskslice.c +++ b/sys/kern/subr_diskslice.c @@ -63,6 +63,7 @@ #include <sys/systm.h> #include <sys/vnode.h> +#include <ufs/ufs/dinode.h> #include <ufs/ffs/fs.h> #define TRACE(str) do { if (ds_debug) printf str; } while (0) diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c index b38e35d..c866f32 100644 --- a/sys/kern/subr_trap.c +++ b/sys/kern/subr_trap.c @@ -61,7 +61,7 @@ #include <vm/vm.h> #include <vm/vm_param.h> #include <vm/vm_prot.h> -#include <vm/lock.h> +#include <sys/lock.h> #include <vm/pmap.h> #include <vm/vm_kern.h> #include <vm/vm_map.h> diff --git a/sys/kern/subr_xxx.c b/sys/kern/subr_xxx.c index 78e4013..fcb951f 100644 --- a/sys/kern/subr_xxx.c +++ b/sys/kern/subr_xxx.c @@ -52,6 +52,17 @@ eopnotsupp() } /* + * Return error for an inval operation + * on a specific object or file type. + */ +int +einval() +{ + + return (EINVAL); +} + +/* * Generic null operation, always returns success. */ int diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c index 9ad0a61..6d9a566 100644 --- a/sys/kern/sys_pipe.c +++ b/sys/kern/sys_pipe.c @@ -73,7 +73,7 @@ #include <vm/vm.h> #include <vm/vm_prot.h> #include <vm/vm_param.h> -#include <vm/lock.h> +#include <sys/lock.h> #include <vm/vm_object.h> #include <vm/vm_kern.h> #include <vm/vm_extern.h> diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c index b64f1e6..72e95e7 100644 --- a/sys/kern/sys_process.c +++ b/sys/kern/sys_process.c @@ -45,7 +45,7 @@ #include <vm/vm.h> #include <vm/vm_param.h> #include <vm/vm_prot.h> -#include <vm/lock.h> +#include <sys/lock.h> #include <vm/pmap.h> #include <vm/vm_map.h> #include <vm/vm_object.h> diff --git a/sys/kern/syscalls.c b/sys/kern/syscalls.c index e0c707d..2afd2b4 100644 --- a/sys/kern/syscalls.c +++ b/sys/kern/syscalls.c @@ -227,7 +227,7 @@ char *syscallnames[] = { "mlock", /* 203 = mlock */ "munlock", /* 204 = munlock */ "utrace", /* 205 = utrace */ - "#206", /* 206 = nosys */ + "undelete", /* 206 = undelete */ "#207", /* 207 = nosys */ "#208", /* 208 = nosys */ "#209", /* 209 = nosys */ diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master index 896f846..cacaf42 100644 --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -60,7 +60,7 @@ int flags); } 19 COMPAT POSIX { long lseek(int fd, long offset, int whence); } 20 STD POSIX { pid_t getpid(void); } -21 STD BSD { int mount(int type, char *path, int flags, \ +21 STD BSD { int mount(char *type, char *path, int flags, \ caddr_t data); } ; XXX 4.4lite2 uses `char *type' but we're not ready for that. ; XXX `path' should have type `const char *' but we're not ready for that. @@ -111,7 +111,7 @@ 57 STD POSIX { int symlink(char *path, char *link); } 58 STD POSIX { int readlink(char *path, char *buf, int count); } 59 STD POSIX { int execve(char *fname, char **argv, char **envv); } -60 STD POSIX { int umask(int newmask); } umask umask_args mode_t +60 STD POSIX { int umask(int newmask); } umask umask_args int 61 STD BSD { int chroot(char *path); } 62 COMPAT POSIX { int fstat(int fd, struct ostat *sb); } 63 COMPAT BSD { int getkerninfo(int op, char *where, int *size, \ @@ -328,7 +328,7 @@ 203 STD BSD { int mlock(caddr_t addr, size_t len); } 204 STD BSD { int munlock(caddr_t addr, size_t len); } 205 STD BSD { int utrace(caddr_t addr, size_t len); } -206 UNIMPL NOHIDE nosys +206 STD BSD { int undelete(char *path); } 207 UNIMPL NOHIDE nosys 208 UNIMPL NOHIDE nosys 209 UNIMPL NOHIDE nosys diff --git a/sys/kern/sysv_shm.c b/sys/kern/sysv_shm.c index f02a785..b777250 100644 --- a/sys/kern/sysv_shm.c +++ b/sys/kern/sysv_shm.c @@ -48,7 +48,7 @@ #include <vm/vm.h> #include <vm/vm_param.h> #include <vm/vm_prot.h> -#include <vm/lock.h> +#include <sys/lock.h> #include <vm/pmap.h> #include <vm/vm_object.h> #include <vm/vm_map.h> diff --git a/sys/kern/tty.c b/sys/kern/tty.c index 47707db..06ce568 100644 --- a/sys/kern/tty.c +++ b/sys/kern/tty.c @@ -94,7 +94,7 @@ #include <vm/vm.h> #include <vm/vm_param.h> #include <vm/vm_prot.h> -#include <vm/lock.h> +#include <sys/lock.h> #include <vm/pmap.h> #include <vm/vm_map.h> diff --git a/sys/kern/tty_tty.c b/sys/kern/tty_tty.c index 770b66e..b6e3fda 100644 --- a/sys/kern/tty_tty.c +++ b/sys/kern/tty_tty.c @@ -78,7 +78,7 @@ cttyopen(dev, flag, mode, p) if (ttyvp == NULL) return (ENXIO); - VOP_LOCK(ttyvp); + vn_lock(ttyvp, LK_EXCLUSIVE | LK_RETRY, p); #ifdef PARANOID /* * Since group is tty and mode is 620 on most terminal lines @@ -93,7 +93,7 @@ cttyopen(dev, flag, mode, p) if (!error) #endif /* PARANOID */ error = VOP_OPEN(ttyvp, flag, NOCRED, p); - VOP_UNLOCK(ttyvp); + VOP_UNLOCK(ttyvp, 0, p); return (error); } @@ -104,14 +104,15 @@ cttyread(dev, uio, flag) struct uio *uio; int flag; { - register struct vnode *ttyvp = cttyvp(uio->uio_procp); + struct proc *p = uio->uio_procp; + register struct vnode *ttyvp = cttyvp(p); int error; if (ttyvp == NULL) return (EIO); - VOP_LOCK(ttyvp); + vn_lock(ttyvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_READ(ttyvp, uio, flag, NOCRED); - VOP_UNLOCK(ttyvp); + VOP_UNLOCK(ttyvp, 0, p); return (error); } @@ -122,14 +123,15 @@ cttywrite(dev, uio, flag) struct uio *uio; int flag; { - register struct vnode *ttyvp = cttyvp(uio->uio_procp); + struct proc *p = uio->uio_procp; + struct vnode *ttyvp = cttyvp(uio->uio_procp); int error; if (ttyvp == NULL) return (EIO); - VOP_LOCK(ttyvp); + vn_lock(ttyvp, LK_EXCLUSIVE | LK_RETRY, p); error = VOP_WRITE(ttyvp, uio, flag, NOCRED); - VOP_UNLOCK(ttyvp); + VOP_UNLOCK(ttyvp, 0, p); return (error); } @@ -195,5 +197,3 @@ ctty_drvinit(void *unused) } SYSINIT(cttydev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,ctty_drvinit,NULL) - - diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index 2017de9..bfbb3c6 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -436,7 +436,7 @@ unp_bind(unp, nam, p) struct nameidata nd; NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE, - soun->sun_path, p); + soun->sun_path, p); if (unp->unp_vnode != NULL) return (EINVAL); if (nam->m_len == MLEN) { @@ -461,15 +461,14 @@ unp_bind(unp, nam, p) VATTR_NULL(&vattr); vattr.va_type = VSOCK; vattr.va_mode = ACCESSPERMS; - LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); - if (error) + VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); + if (error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr)) return (error); vp = nd.ni_vp; vp->v_socket = unp->unp_socket; unp->unp_vnode = vp; unp->unp_addr = m_copy(nam, 0, (int)M_COPYALL); - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (0); } @@ -888,7 +887,7 @@ unp_gc() for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) sorflush((struct socket *)(*fpp)->f_data); for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) - closef(*fpp,(struct proc*) NULL); + closef(*fpp, (struct proc *) NULL); free((caddr_t)extra_ref, M_FILE); unp_gcing = 0; } @@ -897,6 +896,7 @@ void unp_dispose(m) struct mbuf *m; { + if (m) unp_scan(m, unp_discard); } @@ -904,7 +904,7 @@ unp_dispose(m) static void unp_scan(m0, op) register struct mbuf *m0; - void (*op)(struct file *); + void (*op) __P((struct file *)); { register struct mbuf *m; register struct file **rp; diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c index bee702d..718c4a3 100644 --- a/sys/kern/vfs_cache.c +++ b/sys/kern/vfs_cache.c @@ -1,8 +1,9 @@ /* - * Copyright (c) 1989, 1993 + * Copyright (c) 1989, 1993, 1995 * The Regents of the University of California. All rights reserved. - * Copyright (c) 1995 - * Poul-Henning Kamp. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Poul-Henning Kamp of the FreeBSD Project. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -58,9 +59,8 @@ * obtained from (vp, name) where vp refers to the directory * containing name. * - * If it is a "negative" entry, (that we know a name to >not< exist) - * we point out entry at our own "nchENOENT", to avoid too much special - * casing in the inner loops of lookup. + * If it is a "negative" entry, (i.e. for a name that is known NOT to + * exist) the vnode pointer will be NULL. * * For simplicity (and economy of storage), names longer than * a maximum length of NCHNAMLEN are not cached; they occur @@ -74,15 +74,15 @@ /* * Structures associated with name cacheing. */ +#define NCHHASH(dvp, cnp) \ + (&nchashtbl[((dvp)->v_id + (cnp)->cn_hash) & nchash]) static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ -static TAILQ_HEAD(, namecache) nclruhead; /* LRU chain */ -static u_long nchash; /* size of hash table */ -struct nchstats nchstats; /* cache effectiveness statistics */ -static struct vnode nchENOENT; /* our own "novnode" */ -static int doingcache = 1; /* 1 => enable the cache */ +static u_long nchash; /* size of hash table - 1 */ +static int doingcache = 1; /* 1 => enable the cache */ SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, ""); -static u_long numcache; -u_long numvnodes; +static u_long numcache; /* number of cache entries allocated */ +static TAILQ_HEAD(, namecache) nclruhead; /* LRU chain */ +struct nchstats nchstats; /* cache effectiveness statistics */ #ifdef NCH_STATISTICS u_long nchnbr; @@ -93,32 +93,42 @@ u_long nchnbr; #define NCHHIT(ncp) #endif +/* + * Delete an entry from its hash list and move it to the front + * of the LRU list for immediate reuse. + */ #define PURGE(ncp) { \ LIST_REMOVE(ncp, nc_hash); \ ncp->nc_hash.le_prev = 0; \ TAILQ_REMOVE(&nclruhead, ncp, nc_lru); \ - TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru); } + TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru); \ +} +/* + * Move an entry that has been used to the tail of the LRU list + * so that it will be preserved for future use. + */ #define TOUCH(ncp) { \ - if (ncp->nc_lru.tqe_next == 0) { } else { \ + if (ncp->nc_lru.tqe_next != 0) { \ TAILQ_REMOVE(&nclruhead, ncp, nc_lru); \ TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru); \ - NCHNBR(ncp); } } + NCHNBR(ncp); \ + } \ +} /* - * Lookup an entry in the cache + * Lookup an entry in the cache * - * We don't do this if the segment name is long, simply so the cache + * We don't do this if the segment name is long, simply so the cache * can avoid holding long names (which would either waste space, or * add greatly to the complexity). * * Lookup is called with dvp pointing to the directory to search, - * cnp pointing to the name of the entry being sought. - * If the lookup succeeds, the vnode is returned in *vpp, and a status - * of -1 is returned. - * If the lookup determines that the name does not exist (negative cacheing), - * a status of ENOENT is returned. - * If the lookup fails, a status of zero is returned. + * cnp pointing to the name of the entry being sought. If the lookup + * succeeds, the vnode is returned in *vpp, and a status of -1 is + * returned. If the lookup determines that the name does not exist + * (negative cacheing), a status of ENOENT is returned. If the lookup + * fails, a status of zero is returned. */ int @@ -127,7 +137,7 @@ cache_lookup(dvp, vpp, cnp) struct vnode **vpp; struct componentname *cnp; { - register struct namecache *ncp,*nnp; + register struct namecache *ncp, *nnp; register struct nchashhead *ncpp; if (!doingcache) { @@ -141,12 +151,12 @@ cache_lookup(dvp, vpp, cnp) return (0); } - ncpp = &nchashtbl[(dvp->v_id + cnp->cn_hash) % nchash]; + ncpp = NCHHASH(dvp, cnp); for (ncp = ncpp->lh_first; ncp != 0; ncp = nnp) { nnp = ncp->nc_hash.le_next; /* If one of the vp's went stale, don't bother anymore. */ if ((ncp->nc_dvpid != ncp->nc_dvp->v_id) || - (ncp->nc_vpid != ncp->nc_vp->v_id)) { + (ncp->nc_vp && ncp->nc_vpid != ncp->nc_vp->v_id)) { nchstats.ncs_falsehits++; PURGE(ncp); continue; @@ -155,12 +165,15 @@ cache_lookup(dvp, vpp, cnp) if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen && !bcmp(ncp->nc_name, cnp->cn_nameptr, (u_int)ncp->nc_nlen)) - goto found; /* Fanatism considered bad. */ + break; + } + + /* We failed to find an entry */ + if (ncp == 0) { + nchstats.ncs_miss++; + return (0); } - nchstats.ncs_miss++; - return (0); - found: NCHHIT(ncp); /* We don't want to have an entry, so dump it */ @@ -168,10 +181,10 @@ cache_lookup(dvp, vpp, cnp) nchstats.ncs_badhits++; PURGE(ncp); return (0); - } + } /* We found a "positive" match, return the vnode */ - if (ncp->nc_vp != &nchENOENT) { + if (ncp->nc_vp) { nchstats.ncs_goodhits++; TOUCH(ncp); *vpp = ncp->nc_vp; @@ -187,16 +200,19 @@ cache_lookup(dvp, vpp, cnp) return (0); } - /* The name does not exists */ + /* + * We found a "negative" match, ENOENT notifies client of this match. + * The nc_vpid field records whether this is a whiteout. + */ nchstats.ncs_neghits++; TOUCH(ncp); + cnp->cn_flags |= ncp->nc_vpid; return (ENOENT); } /* * Add an entry to the cache. */ - void cache_enter(dvp, vp, cnp) struct vnode *dvp; @@ -209,12 +225,21 @@ cache_enter(dvp, vp, cnp) if (!doingcache) return; +#ifdef DIAGNOSTIC if (cnp->cn_namelen > NCHNAMLEN) { printf("cache_enter: name too long"); return; } +#endif - if (numcache < numvnodes) { + /* + * We allocate a new entry if we are less than the maximum + * allowed and the one at the front of the LRU list is in use. + * Otherwise we use the one at the front of the LRU list. + */ + if (numcache < desiredvnodes && + ((ncp = nclruhead.tqh_first) == NULL || + ncp->nc_hash.le_prev != 0)) { /* Add one more entry */ ncp = (struct namecache *) malloc((u_long)sizeof *ncp, M_CACHE, M_WAITOK); @@ -231,52 +256,51 @@ cache_enter(dvp, vp, cnp) /* give up */ return; } - - /* If vp is NULL this is a "negative" cache entry */ - if (!vp) - vp = &nchENOENT; - - /* fill in cache info */ + /* + * Fill in cache info, if vp is NULL this is a "negative" cache entry. + * For negative entries, we have to record whether it is a whiteout. + * the whiteout flag is stored in the nc_vpid field which is + * otherwise unused. + */ ncp->nc_vp = vp; - if (vp->v_usage < MAXVNODEUSE) - ++vp->v_usage; - ncp->nc_vpid = vp->v_id; + if (vp) { + ncp->nc_vpid = vp->v_id; + if (vp->v_usage < MAXVNODEUSE) + ++vp->v_usage; + } else + ncp->nc_vpid = cnp->cn_flags & ISWHITEOUT; ncp->nc_dvp = dvp; ncp->nc_dvpid = dvp->v_id; ncp->nc_nlen = cnp->cn_namelen; bcopy(cnp->cn_nameptr, ncp->nc_name, (unsigned)ncp->nc_nlen); TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru); - ncpp = &nchashtbl[(dvp->v_id + cnp->cn_hash) % nchash]; + ncpp = NCHHASH(dvp, cnp); LIST_INSERT_HEAD(ncpp, ncp, nc_hash); } /* * Name cache initialization, from vfs_init() when we are booting */ - void nchinit() { - TAILQ_INIT(&nclruhead); nchashtbl = phashinit(desiredvnodes, M_CACHE, &nchash); - cache_purge(&nchENOENT); /* Initialize v_id */ } /* - * Invalidate all entries to a particular vnode. - * - * We actually just increment the v_id, that will do it. The stale entries - * will be purged by lookup as they get found. - * If the v_id wraps around, we need to ditch the entire cache, to avoid - * confusion. - * No valid vnode will ever have (v_id == 0). + * Invalidate all entries to particular vnode. + * + * We actually just increment the v_id, that will do it. The stale entries + * will be purged by lookup as they get found. If the v_id wraps around, we + * need to ditch the entire cache, to avoid confusion. No valid vnode will + * ever have (v_id == 0). */ - void cache_purge(vp) struct vnode *vp; { + struct namecache *ncp; struct nchashhead *ncpp; static u_long nextvnodeid; @@ -284,10 +308,9 @@ cache_purge(vp) if (nextvnodeid != 0) return; for (ncpp = &nchashtbl[nchash - 1]; ncpp >= nchashtbl; ncpp--) { - while(ncpp->lh_first) - PURGE(ncpp->lh_first); + while (ncp = ncpp->lh_first) + PURGE(ncp); } - nchENOENT.v_id = ++nextvnodeid; vp->v_id = ++nextvnodeid; } @@ -296,29 +319,22 @@ cache_purge(vp) * * Since we need to check it anyway, we will flush all the invalid * entries at the same time. - * - * If we purge anything, we scan the hash-bucket again. There is only - * a handful of entries, so it cheap and simple. */ - void cache_purgevfs(mp) struct mount *mp; { struct nchashhead *ncpp; - struct namecache *ncp; + struct namecache *ncp, *nnp; /* Scan hash tables for applicable entries */ for (ncpp = &nchashtbl[nchash - 1]; ncpp >= nchashtbl; ncpp--) { - ncp = ncpp->lh_first; - while(ncp) { + for (ncp = ncpp->lh_first; ncp != 0; ncp = nnp) { + nnp = ncp->nc_hash.le_next; if (ncp->nc_dvpid != ncp->nc_dvp->v_id || - ncp->nc_vpid != ncp->nc_vp->v_id || + (ncp->nc_vp && ncp->nc_vpid != ncp->nc_vp->v_id) || ncp->nc_dvp->v_mount == mp) { PURGE(ncp); - ncp = ncpp->lh_first; - } else { - ncp = ncp->nc_hash.le_next; } } } diff --git a/sys/kern/vfs_conf.c b/sys/kern/vfs_conf.c index fb8061b..5a84570 100644 --- a/sys/kern/vfs_conf.c +++ b/sys/kern/vfs_conf.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1989, 1993 + * Copyright (c) 1989, 1993, 1995 * The Regents of the University of California. All rights reserved. * Copyright (c) 1995 Artisoft, Inc. All Rights Reserved. * @@ -61,26 +61,33 @@ /* * GLOBALS */ -int (*mountroot) __P((void *)); + +/* + * These define the root filesystem, device, and root filesystem type. + */ +struct mount *rootfs; struct vnode *rootvnode; -struct vfsops *mountrootvfsops; +char *mountrootfsname; +/* + * vfs_init() will set maxvfsconf + * to the highest defined type number. + */ +int maxvfsconf; +struct vfsconf *vfsconf; /* * Common root mount code shared by all filesystems */ -#define ROOTDIR "/" #define ROOTNAME "root_device" - - /* - * vfs_mountroot + * vfs_mountrootfs * * Common entry point for root mounts * * PARAMETERS: - * data pointer to the vfs_ops for the FS type mounting + * fsname name of the filesystem * * RETURNS: 0 Success * !0 error number (errno.h) @@ -97,67 +104,44 @@ struct vfsops *mountrootvfsops; * fixing the other file systems, not this code! */ int -vfs_mountroot(data) - void *data; +vfs_mountrootfs(fsname) + char *fsname; { struct mount *mp; - u_int size; int err = 0; struct proc *p = curproc; /* XXX */ - struct vfsops *mnt_op = (struct vfsops *)data; /* * New root mount structure */ - mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); - bzero((char *)mp, (u_long)sizeof(struct mount)); - mp->mnt_op = mnt_op; - mp->mnt_flag = MNT_ROOTFS; - mp->mnt_vnodecovered = NULLVP; - - /* - * Lock mount point - */ - if( ( err = vfs_lock(mp)) != 0) - goto error_1; - - /* Save "last mounted on" info for mount point (NULL pad)*/ - copystr( ROOTDIR, /* mount point*/ - mp->mnt_stat.f_mntonname, /* save area*/ - MNAMELEN - 1, /* max size*/ - &size); /* real size*/ - bzero( mp->mnt_stat.f_mntonname + size, MNAMELEN - size); - - /* Save "mounted from" info for mount point (NULL pad)*/ - copystr( ROOTNAME, /* device name*/ - mp->mnt_stat.f_mntfromname, /* save area*/ - MNAMELEN - 1, /* max size*/ - &size); /* real size*/ - bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); + err = vfs_rootmountalloc(fsname, ROOTNAME, &mp); + if (err) + return (err); + mp->mnt_flag |= MNT_ROOTFS; /* * Attempt the mount */ - err = VFS_MOUNT( mp, NULL, NULL, NULL, p); - if( err) + err = VFS_MOUNT(mp, NULL, NULL, NULL, p); + if (err) goto error_2; + simple_lock(&mountlist_slock); /* Add fs to list of mounted file systems*/ - CIRCLEQ_INSERT_TAIL( &mountlist, mp, mnt_list); + CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); + simple_unlock(&mountlist_slock); - /* Unlock mount point*/ - vfs_unlock(mp); + vfs_unbusy(mp, p); /* root mount, update system time from FS specific data*/ - inittodr( mp->mnt_time); + inittodr(mp->mnt_time); goto success; error_2: /* mount error*/ - /* unlock before failing*/ - vfs_unlock( mp); + vfs_unbusy(mp, p); error_1: /* lock error*/ diff --git a/sys/kern/vfs_export.c b/sys/kern/vfs_export.c index 43f8669..0dea7bd 100644 --- a/sys/kern/vfs_export.c +++ b/sys/kern/vfs_export.c @@ -35,7 +35,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 + * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 * $FreeBSD$ */ @@ -75,7 +75,9 @@ #ifdef DDB extern void printlockedvnodes __P((void)); #endif -extern void vclean __P((struct vnode *vp, int flags)); +static void vclean __P((struct vnode *vp, int flags, struct proc *p)); +extern void vgonel __P((struct vnode *vp, struct proc *p)); +unsigned long numvnodes; extern void vfs_unmountroot __P((struct mount *rootfs)); enum vtype iftovt_tab[16] = { @@ -91,15 +93,19 @@ int vttoif_tab[9] = { * Insq/Remq for the vnode usage lists. */ #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) -#define bufremvn(bp) { \ - LIST_REMOVE(bp, b_vnbufs); \ - (bp)->b_vnbufs.le_next = NOLIST; \ +#define bufremvn(bp) { \ + LIST_REMOVE(bp, b_vnbufs); \ + (bp)->b_vnbufs.le_next = NOLIST; \ } - TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ static u_long freevnodes = 0; struct mntlist mountlist; /* mounted filesystem list */ +struct simplelock mountlist_slock; +static struct simplelock mntid_slock; +struct simplelock mntvnode_slock; +struct simplelock vnode_free_list_slock; +static struct simplelock spechash_slock; int desiredvnodes; SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, ""); @@ -117,164 +123,153 @@ vntblinit() { desiredvnodes = maxproc + vm_object_cache_max; + simple_lock_init(&mntvnode_slock); + simple_lock_init(&mntid_slock); + simple_lock_init(&spechash_slock); TAILQ_INIT(&vnode_free_list); + simple_lock_init(&vnode_free_list_slock); CIRCLEQ_INIT(&mountlist); } /* - * Lock a filesystem. - * Used to prevent access to it while mounting and unmounting. + * Mark a mount point as busy. Used to synchronize access and to delay + * unmounting. Interlock is not released on failure. */ int -vfs_lock(mp) - register struct mount *mp; +vfs_busy(mp, flags, interlkp, p) + struct mount *mp; + int flags; + struct simplelock *interlkp; + struct proc *p; { + int lkflags; - while (mp->mnt_flag & MNT_MLOCK) { + if (mp->mnt_flag & MNT_UNMOUNT) { + if (flags & LK_NOWAIT) + return (ENOENT); mp->mnt_flag |= MNT_MWAIT; - (void) tsleep((caddr_t) mp, PVFS, "vfslck", 0); + if (interlkp) { + simple_unlock(interlkp); + } + /* + * Since all busy locks are shared except the exclusive + * lock granted when unmounting, the only place that a + * wakeup needs to be done is at the release of the + * exclusive lock at the end of dounmount. + */ + tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); + if (interlkp) { + simple_lock(interlkp); + } + return (ENOENT); } - mp->mnt_flag |= MNT_MLOCK; + lkflags = LK_SHARED; + if (interlkp) + lkflags |= LK_INTERLOCK; + if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) + panic("vfs_busy: unexpected lock failure"); return (0); } /* - * Unlock a locked filesystem. - * Panic if filesystem is not locked. + * Free a busy filesystem. */ void -vfs_unlock(mp) - register struct mount *mp; +vfs_unbusy(mp, p) + struct mount *mp; + struct proc *p; { - if ((mp->mnt_flag & MNT_MLOCK) == 0) - panic("vfs_unlock: not locked"); - mp->mnt_flag &= ~MNT_MLOCK; - if (mp->mnt_flag & MNT_MWAIT) { - mp->mnt_flag &= ~MNT_MWAIT; - wakeup((caddr_t) mp); - } + lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); } /* - * Mark a mount point as busy. - * Used to synchronize access and to delay unmounting. + * Lookup a filesystem type, and if found allocate and initialize + * a mount structure for it. + * + * Devname is usually updated by mount(8) after booting. */ int -vfs_busy(mp) - register struct mount *mp; +vfs_rootmountalloc(fstypename, devname, mpp) + char *fstypename; + char *devname; + struct mount **mpp; { + struct proc *p = curproc; /* XXX */ + struct vfsconf *vfsp; + struct mount *mp; - while (mp->mnt_flag & MNT_MPBUSY) { - mp->mnt_flag |= MNT_MPWANT; - (void) tsleep((caddr_t) &mp->mnt_flag, PVFS, "vfsbsy", 0); - } - if (mp->mnt_flag & MNT_UNMOUNT) - return (1); - mp->mnt_flag |= MNT_MPBUSY; + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) + if (!strcmp(vfsp->vfc_name, fstypename)) + break; + if (vfsp == NULL) + return (ENODEV); + mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); + bzero((char *)mp, (u_long)sizeof(struct mount)); + lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); + (void)vfs_busy(mp, LK_NOWAIT, 0, p); + LIST_INIT(&mp->mnt_vnodelist); + mp->mnt_vfc = vfsp; + mp->mnt_op = vfsp->vfc_vfsops; + mp->mnt_flag = MNT_RDONLY; + mp->mnt_vnodecovered = NULLVP; + vfsp->vfc_refcount++; + mp->mnt_stat.f_type = vfsp->vfc_typenum; + mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; + strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); + mp->mnt_stat.f_mntonname[0] = '/'; + mp->mnt_stat.f_mntonname[1] = 0; + (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); + *mpp = mp; return (0); } /* - * Free a busy filesystem. - * Panic if filesystem is not busy. - */ -void -vfs_unbusy(mp) - register struct mount *mp; -{ - - if ((mp->mnt_flag & MNT_MPBUSY) == 0) - panic("vfs_unbusy: not busy"); - mp->mnt_flag &= ~MNT_MPBUSY; - if (mp->mnt_flag & MNT_MPWANT) { - mp->mnt_flag &= ~MNT_MPWANT; - wakeup((caddr_t) &mp->mnt_flag); - } -} - -void -vfs_unmountroot(struct mount *rootfs) -{ - struct mount *mp = rootfs; - int error; - - if (vfs_busy(mp)) { - printf("failed to unmount root\n"); - return; - } - mp->mnt_flag |= MNT_UNMOUNT; - if ((error = vfs_lock(mp))) { - printf("lock of root filesystem failed (%d)\n", error); - return; - } - vnode_pager_umount(mp); /* release cached vnodes */ - cache_purgevfs(mp); /* remove cache entries for this file sys */ - - if ((error = VFS_SYNC(mp, MNT_WAIT, initproc->p_ucred, initproc))) - printf("sync of root filesystem failed (%d)\n", error); - - if ((error = VFS_UNMOUNT(mp, MNT_FORCE, initproc))) { - printf("unmount of root filesystem failed ("); - if (error == EBUSY) - printf("BUSY)\n"); - else - printf("%d)\n", error); - } - mp->mnt_flag &= ~MNT_UNMOUNT; - vfs_unbusy(mp); -} - -/* - * Unmount all filesystems. Should only be called by halt(). + * Find an appropriate filesystem to use for the root. If a filesystem + * has not been preselected, walk through the list of known filesystems + * trying those that have mountroot routines, and try them until one + * works or we have tried them all. */ -void -vfs_unmountall() +#ifdef notdef /* XXX JH */ +int +lite2_vfs_mountroot(void) { - struct mount *mp, *nmp, *rootfs = NULL; + struct vfsconf *vfsp; + extern int (*lite2_mountroot)(void); int error; - /* unmount all but rootfs */ - for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { - nmp = mp->mnt_list.cqe_prev; - - if (mp->mnt_flag & MNT_ROOTFS) { - rootfs = mp; + if (lite2_mountroot != NULL) + return ((*lite2_mountroot)()); + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { + if (vfsp->vfc_mountroot == NULL) continue; - } - error = dounmount(mp, MNT_FORCE, initproc); - if (error) { - printf("unmount of %s failed (", mp->mnt_stat.f_mntonname); - if (error == EBUSY) - printf("BUSY)\n"); - else - printf("%d)\n", error); - } - } - - /* and finally... */ - if (rootfs) { - vfs_unmountroot(rootfs); - } else { - printf("no root filesystem\n"); + if ((error = (*vfsp->vfc_mountroot)()) == 0) + return (0); + printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); } + return (ENODEV); } +#endif /* * Lookup a mount point by filesystem identifier. */ struct mount * -getvfs(fsid) +vfs_getvfs(fsid) fsid_t *fsid; { register struct mount *mp; + simple_lock(&mountlist_slock); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = mp->mnt_list.cqe_next) { if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && - mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) + mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { + simple_unlock(&mountlist_slock); return (mp); + } } + simple_unlock(&mountlist_slock); return ((struct mount *) 0); } @@ -282,14 +277,16 @@ getvfs(fsid) * Get a new unique fsid */ void -getnewfsid(mp, mtype) +vfs_getnewfsid(mp) struct mount *mp; - int mtype; { static u_short xxxfs_mntid; fsid_t tfsid; + int mtype; + simple_lock(&mntid_slock); + mtype = mp->mnt_vfc->vfc_typenum; mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); mp->mnt_stat.f_fsid.val[1] = mtype; if (xxxfs_mntid == 0) @@ -297,12 +294,13 @@ getnewfsid(mp, mtype) tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); tfsid.val[1] = mtype; if (mountlist.cqh_first != (void *)&mountlist) { - while (getvfs(&tfsid)) { + while (vfs_getvfs(&tfsid)) { tfsid.val[0]++; xxxfs_mntid++; } } mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; + simple_unlock(&mntid_slock); } /* @@ -326,6 +324,35 @@ vattr_null(vap) vap->va_vaflags = 0; } +void +vfs_unmountroot(struct mount *rootfs) +{ + struct proc *p = curproc; /* XXX */ + struct mount *mp = rootfs; + int error; + + if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { + printf("failed to unmount root\n"); + return; + } + mp->mnt_flag |= MNT_UNMOUNT; + vnode_pager_umount(mp); /* release cached vnodes */ + cache_purgevfs(mp); /* remove cache entries for this file sys */ + + if ((error = VFS_SYNC(mp, MNT_WAIT, initproc->p_ucred, initproc))) + printf("sync of root filesystem failed (%d)\n", error); + + if ((error = VFS_UNMOUNT(mp, MNT_FORCE, initproc))) { + printf("unmount of root filesystem failed ("); + if (error == EBUSY) + printf("BUSY)\n"); + else + printf("%d)\n", error); + } + mp->mnt_flag &= ~MNT_UNMOUNT; + vfs_unbusy(mp, p); +} + /* * Routines having to do with the management of the vnode table. */ @@ -341,10 +368,11 @@ getnewvnode(tag, mp, vops, vpp) vop_t **vops; struct vnode **vpp; { - register struct vnode *vp; + struct proc *p = curproc; /* XXX */ + struct vnode *vp; + simple_lock(&vnode_free_list_slock); retry: - vp = vnode_free_list.tqh_first; /* * we allocate a new vnode if * 1. we don't have any free @@ -357,12 +385,31 @@ retry: */ if (freevnodes < (numvnodes >> 2) || numvnodes < desiredvnodes || - vp == NULL) { + vnode_free_list.tqh_first == NULL) { + simple_unlock(&vnode_free_list_slock); vp = (struct vnode *) malloc((u_long) sizeof *vp, M_VNODE, M_WAITOK); bzero((char *) vp, sizeof *vp); numvnodes++; } else { + for (vp = vnode_free_list.tqh_first; + vp != NULLVP; vp = vp->v_freelist.tqe_next) { + if (simple_lock_try(&vp->v_interlock)) + break; + } + /* + * Unless this is a bad time of the month, at most + * the first NCPUS items on the free list are + * locked, so this is close enough to being empty. + */ + if (vp == NULLVP) { + simple_unlock(&vnode_free_list_slock); + tablefull("vnode"); + *vpp = 0; + return (ENFILE); + } + if (vp->v_usecount) + panic("free vnode isn't"); TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); if (vp->v_usage > 0) { --vp->v_usage; @@ -370,14 +417,16 @@ retry: goto retry; } freevnodes--; - if (vp->v_usecount) - panic("free vnode isn't"); /* see comment on why 0xdeadb is set at end of vgone (below) */ vp->v_freelist.tqe_prev = (struct vnode **) 0xdeadb; + simple_unlock(&vnode_free_list_slock); vp->v_lease = NULL; if (vp->v_type != VBAD) - vgone(vp); + vgonel(vp, p); + else { + simple_unlock(&vp->v_interlock); + } #ifdef DIAGNOSTIC { @@ -421,6 +470,7 @@ insmntque(vp, mp) register struct mount *mp; { + simple_lock(&mntvnode_slock); /* * Delete from old mount point vnode list, if on one. */ @@ -429,9 +479,12 @@ insmntque(vp, mp) /* * Insert into list of vnodes for the new mount point, if available. */ - if ((vp->v_mount = mp) == NULL) + if ((vp->v_mount = mp) == NULL) { + simple_unlock(&mntvnode_slock); return; + } LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); + simple_unlock(&mntvnode_slock); } /* @@ -723,7 +776,8 @@ checkalias(nvp, nvp_rdev, mp) dev_t nvp_rdev; struct mount *mp; { - register struct vnode *vp; + struct proc *p = curproc; /* XXX */ + struct vnode *vp; struct vnode **vpp; if (nvp->v_type != VBLK && nvp->v_type != VCHR) @@ -731,18 +785,24 @@ checkalias(nvp, nvp_rdev, mp) vpp = &speclisth[SPECHASH(nvp_rdev)]; loop: + simple_lock(&spechash_slock); for (vp = *vpp; vp; vp = vp->v_specnext) { if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) continue; /* * Alias, but not in use, so flush it out. */ + simple_lock(&vp->v_interlock); if (vp->v_usecount == 0) { - vgone(vp); + simple_unlock(&spechash_slock); + vgonel(vp, p); goto loop; } - if (vget(vp, 1)) + simple_unlock(&spechash_slock); + if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { goto loop; + } + simple_lock(&spechash_slock); break; } @@ -753,16 +813,19 @@ loop: nvp->v_hashchain = vpp; nvp->v_specnext = *vpp; nvp->v_specflags = 0; + simple_unlock(&spechash_slock); *vpp = nvp; - if (vp != NULL) { + if (vp != NULLVP) { nvp->v_flag |= VALIASED; vp->v_flag |= VALIASED; vput(vp); } return (NULLVP); } - VOP_UNLOCK(vp); - vclean(vp, 0); + simple_unlock(&spechash_slock); + VOP_UNLOCK(vp, 0, p); + simple_lock(&vp->v_interlock); + vclean(vp, 0, p); vp->v_op = nvp->v_op; vp->v_tag = nvp->v_tag; nvp->v_type = VNON; @@ -779,47 +842,162 @@ loop: * been changed to a new file system type). */ int -vget(vp, lockflag) +vget(vp, flags, p) register struct vnode *vp; - int lockflag; + int flags; + struct proc *p; { + int error; /* - * If the vnode is in the process of being cleaned out for another - * use, we wait for the cleaning to finish and then return failure. - * Cleaning is determined either by checking that the VXLOCK flag is - * set, or that the use count is zero with the back pointer set to - * show that it has been removed from the free list by getnewvnode. - * The VXLOCK flag may not have been set yet because vclean is blocked - * in the VOP_LOCK call waiting for the VOP_INACTIVE to complete. + * If the vnode is in the process of being cleaned out for + * another use, we wait for the cleaning to finish and then + * return failure. Cleaning is determined by checking that + * the VXLOCK flag is set. */ - if ((vp->v_flag & VXLOCK) || - (vp->v_usecount == 0 && - vp->v_freelist.tqe_prev == (struct vnode **) 0xdeadb)) { + if ((flags & LK_INTERLOCK) == 0) { + simple_lock(&vp->v_interlock); + } + if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; - (void) tsleep((caddr_t) vp, PINOD, "vget", 0); - return (1); + simple_unlock(&vp->v_interlock); + tsleep((caddr_t)vp, PINOD, "vget", 0); + return (ENOENT); } if (vp->v_usecount == 0) { + simple_lock(&vnode_free_list_slock); TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); + simple_unlock(&vnode_free_list_slock); freevnodes--; } vp->v_usecount++; - /* * Create the VM object, if needed */ if ((vp->v_type == VREG) && ((vp->v_object == NULL) || (vp->v_object->flags & OBJ_VFS_REF) == 0)) { + /* + * XXX vfs_object_create probably needs the interlock. + */ + simple_unlock(&vp->v_interlock); vfs_object_create(vp, curproc, curproc->p_ucred, 0); + simple_lock(&vp->v_interlock); + } + if (flags & LK_TYPE_MASK) { + if (error = vn_lock(vp, flags | LK_INTERLOCK, p)) + vrele(vp); + return (error); } - if (lockflag) - VOP_LOCK(vp); + simple_unlock(&vp->v_interlock); + return (0); +} + +/* + * Stubs to use when there is no locking to be done on the underlying object. + * A minimal shared lock is necessary to ensure that the underlying object + * is not revoked while an operation is in progress. So, an active shared + * count is maintained in an auxillary vnode lock structure. + */ +int +vop_nolock(ap) + struct vop_lock_args /* { + struct vnode *a_vp; + int a_flags; + struct proc *a_p; + } */ *ap; +{ +#ifdef notyet + /* + * This code cannot be used until all the non-locking filesystems + * (notably NFS) are converted to properly lock and release nodes. + * Also, certain vnode operations change the locking state within + * the operation (create, mknod, remove, link, rename, mkdir, rmdir, + * and symlink). Ideally these operations should not change the + * lock state, but should be changed to let the caller of the + * function unlock them. Otherwise all intermediate vnode layers + * (such as union, umapfs, etc) must catch these functions to do + * the necessary locking at their layer. Note that the inactive + * and lookup operations also change their lock state, but this + * cannot be avoided, so these two operations will always need + * to be handled in intermediate layers. + */ + struct vnode *vp = ap->a_vp; + int vnflags, flags = ap->a_flags; + if (vp->v_vnlock == NULL) { + if ((flags & LK_TYPE_MASK) == LK_DRAIN) + return (0); + MALLOC(vp->v_vnlock, struct lock *, sizeof(struct lock), + M_VNODE, M_WAITOK); + lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); + } + switch (flags & LK_TYPE_MASK) { + case LK_DRAIN: + vnflags = LK_DRAIN; + break; + case LK_EXCLUSIVE: + case LK_SHARED: + vnflags = LK_SHARED; + break; + case LK_UPGRADE: + case LK_EXCLUPGRADE: + case LK_DOWNGRADE: + return (0); + case LK_RELEASE: + default: + panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK); + } + if (flags & LK_INTERLOCK) + vnflags |= LK_INTERLOCK; + return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p)); +#else /* for now */ + /* + * Since we are not using the lock manager, we must clear + * the interlock here. + */ + if (ap->a_flags & LK_INTERLOCK) { + simple_unlock(&ap->a_vp->v_interlock); + } return (0); +#endif +} + +/* + * Decrement the active use count. + */ +int +vop_nounlock(ap) + struct vop_unlock_args /* { + struct vnode *a_vp; + int a_flags; + struct proc *a_p; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + + if (vp->v_vnlock == NULL) + return (0); + return (lockmgr(vp->v_vnlock, LK_RELEASE, NULL, ap->a_p)); +} + +/* + * Return whether or not the node is in use. + */ +int +vop_noislocked(ap) + struct vop_islocked_args /* { + struct vnode *a_vp; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + + if (vp->v_vnlock == NULL) + return (0); + return (lockstatus(vp->v_vnlock)); } +/* #ifdef DIAGNOSTIC */ /* * Vnode reference, just increment the count */ @@ -827,6 +1005,7 @@ void vref(vp) struct vnode *vp; { + simple_lock(&vp->v_interlock); if (vp->v_usecount <= 0) panic("vref used where vget required"); @@ -840,8 +1019,11 @@ vref(vp) * the object is created. This is necessary to * keep the system from re-entrantly doing it * multiple times. + * XXX vfs_object_create probably needs the interlock? */ + simple_unlock(&vp->v_interlock); vfs_object_create(vp, curproc, curproc->p_ucred, 0); + simple_lock(&vp->v_interlock); } } @@ -850,9 +1032,9 @@ vref(vp) */ void vput(vp) - register struct vnode *vp; + struct vnode *vp; { - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, curproc); vrele(vp); } @@ -862,33 +1044,38 @@ vput(vp) */ void vrele(vp) - register struct vnode *vp; + struct vnode *vp; { + struct proc *p = curproc; /* XXX */ #ifdef DIAGNOSTIC if (vp == NULL) panic("vrele: null vp"); #endif - + simple_lock(&vp->v_interlock); vp->v_usecount--; if ((vp->v_usecount == 1) && vp->v_object && (vp->v_object->flags & OBJ_VFS_REF)) { vp->v_object->flags &= ~OBJ_VFS_REF; + simple_unlock(&vp->v_interlock); vm_object_deallocate(vp->v_object); return; } - if (vp->v_usecount > 0) + if (vp->v_usecount > 0) { + simple_unlock(&vp->v_interlock); return; + } if (vp->v_usecount < 0) { #ifdef DIAGNOSTIC vprint("vrele: negative ref count", vp); #endif - panic("vrele: negative reference cnt"); + panic("vrele: negative ref cnt"); } + simple_lock(&vnode_free_list_slock); if (vp->v_flag & VAGE) { if(vp->v_tag != VT_TFS) TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); @@ -898,9 +1085,12 @@ vrele(vp) if(vp->v_tag != VT_TFS) TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); } + simple_unlock(&vnode_free_list_slock); + freevnodes++; - VOP_INACTIVE(vp); + if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) + VOP_INACTIVE(vp, p); } #ifdef DIAGNOSTIC @@ -912,7 +1102,9 @@ vhold(vp) register struct vnode *vp; { + simple_lock(&vp->v_interlock); vp->v_holdcnt++; + simple_unlock(&vp->v_interlock); } /* @@ -923,9 +1115,11 @@ holdrele(vp) register struct vnode *vp; { + simple_lock(&vp->v_interlock); if (vp->v_holdcnt <= 0) panic("holdrele: holdcnt"); vp->v_holdcnt--; + simple_unlock(&vp->v_interlock); } #endif /* DIAGNOSTIC */ @@ -948,11 +1142,11 @@ vflush(mp, skipvp, flags) struct vnode *skipvp; int flags; { - register struct vnode *vp, *nvp; + struct proc *p = curproc; /* XXX */ + struct vnode *vp, *nvp; int busy = 0; - if ((mp->mnt_flag & MNT_MPBUSY) == 0) - panic("vflush: not busy"); + simple_lock(&mntvnode_slock); loop: for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { /* @@ -967,24 +1161,34 @@ loop: */ if (vp == skipvp) continue; + + simple_lock(&vp->v_interlock); /* * Skip over a vnodes marked VSYSTEM. */ - if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) + if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { + simple_unlock(&vp->v_interlock); continue; + } /* * If WRITECLOSE is set, only flush out regular file vnodes * open for writing. */ if ((flags & WRITECLOSE) && - (vp->v_writecount == 0 || vp->v_type != VREG)) + (vp->v_writecount == 0 || vp->v_type != VREG)) { + simple_unlock(&vp->v_interlock); continue; + } if (vp->v_object && (vp->v_object->flags & OBJ_VFS_REF)) { + simple_unlock(&vp->v_interlock); + simple_unlock(&mntvnode_slock); vm_object_reference(vp->v_object); pager_cache(vp->v_object, FALSE); vp->v_object->flags &= ~OBJ_VFS_REF; vm_object_deallocate(vp->v_object); + simple_lock(&mntvnode_slock); + simple_lock(&vp->v_interlock); } /* @@ -992,7 +1196,9 @@ loop: * vnode data structures and we are done. */ if (vp->v_usecount == 0) { - vgone(vp); + simple_unlock(&mntvnode_slock); + vgonel(vp, p); + simple_lock(&mntvnode_slock); continue; } @@ -1002,21 +1208,25 @@ loop: * all other files, just kill them. */ if (flags & FORCECLOSE) { + simple_unlock(&mntvnode_slock); if (vp->v_type != VBLK && vp->v_type != VCHR) { - vgone(vp); + vgonel(vp, p); } else { - vclean(vp, 0); + vclean(vp, 0, p); vp->v_op = spec_vnodeop_p; insmntque(vp, (struct mount *) 0); } + simple_lock(&mntvnode_slock); continue; } #ifdef DIAGNOSTIC if (busyprt) vprint("vflush: busy vnode", vp); #endif + simple_unlock(&vp->v_interlock); busy++; } + simple_unlock(&mntvnode_slock); if (busy) return (EBUSY); return (0); @@ -1025,8 +1235,8 @@ loop: /* * Disassociate the underlying file system from a vnode. */ -void -vclean(struct vnode *vp, int flags) +static void +vclean(struct vnode *vp, int flags, struct proc *p) { int active; @@ -1036,15 +1246,7 @@ vclean(struct vnode *vp, int flags) * generate a race against ourselves to recycle it. */ if ((active = vp->v_usecount)) - VREF(vp); - /* - * Even if the count is zero, the VOP_INACTIVE routine may still have - * the object locked while it cleans it out. The VOP_LOCK ensures that - * the VOP_INACTIVE routine is done with its work. For active vnodes, - * it ensures that no other activity can occur while the underlying - * object is being cleaned out. - */ - VOP_LOCK(vp); + vp->v_usecount++; /* * Prevent the vnode from being recycled or brought into use while we * clean it out. @@ -1053,31 +1255,48 @@ vclean(struct vnode *vp, int flags) panic("vclean: deadlock"); vp->v_flag |= VXLOCK; /* - * Clean out any buffers associated with the vnode. + * Even if the count is zero, the VOP_INACTIVE routine may still + * have the object locked while it cleans it out. The VOP_LOCK + * ensures that the VOP_INACTIVE routine is done with its work. + * For active vnodes, it ensures that no other activity can + * occur while the underlying object is being cleaned out. */ - if (flags & DOCLOSE) - vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); + VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); /* - * Any other processes trying to obtain this lock must first wait for - * VXLOCK to clear, then call the new lock operation. + * Clean out any buffers associated with the vnode. */ - VOP_UNLOCK(vp); + if (flags & DOCLOSE) + vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); /* - * If purging an active vnode, it must be closed and deactivated - * before being reclaimed. + * If purging an active vnode, it must be closed and + * deactivated before being reclaimed. Note that the + * VOP_INACTIVE will unlock the vnode. */ if (active) { if (flags & DOCLOSE) - VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL); - VOP_INACTIVE(vp); + VOP_CLOSE(vp, IO_NDELAY, NOCRED, p); + VOP_INACTIVE(vp, p); + } else { + /* + * Any other processes trying to obtain this lock must first + * wait for VXLOCK to clear, then call the new lock operation. + */ + VOP_UNLOCK(vp, 0, p); } /* * Reclaim the vnode. */ - if (VOP_RECLAIM(vp)) + if (VOP_RECLAIM(vp, p)) panic("vclean: cannot reclaim"); if (active) vrele(vp); + cache_purge(vp); + if (vp->v_vnlock) { + if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) + vprint("vclean: lock not drained", vp); + FREE(vp->v_vnlock, M_VNODE); + vp->v_vnlock = NULL; + } /* * Done with purge, notify sleepers of the grim news. @@ -1092,46 +1311,91 @@ vclean(struct vnode *vp, int flags) } /* - * Eliminate all activity associated with the requested vnode + * Eliminate all activity associated with the requested vnode * and with all vnodes aliased to the requested vnode. */ -void -vgoneall(vp) - register struct vnode *vp; +int +vop_revoke(ap) + struct vop_revoke_args /* { + struct vnode *a_vp; + int a_flags; + } */ *ap; { - register struct vnode *vq; + struct vnode *vp, *vq; + struct proc *p = curproc; /* XXX */ + +#ifdef DIAGNOSTIC + if ((ap->a_flags & REVOKEALL) == 0) + panic("vop_revoke"); +#endif + + vp = ap->a_vp; + simple_lock(&vp->v_interlock); if (vp->v_flag & VALIASED) { /* - * If a vgone (or vclean) is already in progress, wait until - * it is done and return. + * If a vgone (or vclean) is already in progress, + * wait until it is done and return. */ if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; - (void) tsleep((caddr_t) vp, PINOD, "vgall", 0); - return; + simple_unlock(&vp->v_interlock); + tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); + return (0); } /* - * Ensure that vp will not be vgone'd while we are eliminating - * its aliases. + * Ensure that vp will not be vgone'd while we + * are eliminating its aliases. */ vp->v_flag |= VXLOCK; + simple_unlock(&vp->v_interlock); while (vp->v_flag & VALIASED) { + simple_lock(&spechash_slock); for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type || vp == vq) continue; + simple_unlock(&spechash_slock); vgone(vq); break; } + if (vq == NULLVP) { + simple_unlock(&spechash_slock); + } } /* - * Remove the lock so that vgone below will really eliminate - * the vnode after which time vgone will awaken any sleepers. + * Remove the lock so that vgone below will + * really eliminate the vnode after which time + * vgone will awaken any sleepers. */ + simple_lock(&vp->v_interlock); vp->v_flag &= ~VXLOCK; } - vgone(vp); + vgonel(vp, p); + return (0); +} + +/* + * Recycle an unused vnode to the front of the free list. + * Release the passed interlock if the vnode will be recycled. + */ +int +vrecycle(vp, inter_lkp, p) + struct vnode *vp; + struct simplelock *inter_lkp; + struct proc *p; +{ + + simple_lock(&vp->v_interlock); + if (vp->v_usecount == 0) { + if (inter_lkp) { + simple_unlock(inter_lkp); + } + vgonel(vp, p); + return (1); + } + simple_unlock(&vp->v_interlock); + return (0); } /* @@ -1142,16 +1406,31 @@ void vgone(vp) register struct vnode *vp; { - register struct vnode *vq; + struct proc *p = curproc; /* XXX */ + + simple_lock(&vp->v_interlock); + vgonel(vp, p); +} + +/* + * vgone, with the vp interlock held. + */ +void +vgonel(vp, p) + struct vnode *vp; + struct proc *p; +{ + struct vnode *vq; struct vnode *vx; /* - * If a vgone (or vclean) is already in progress, wait until it is - * done and return. + * If a vgone (or vclean) is already in progress, + * wait until it is done and return. */ if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; - (void) tsleep((caddr_t) vp, PINOD, "vgone", 0); + simple_unlock(&vp->v_interlock); + tsleep((caddr_t)vp, PINOD, "vgone", 0); return; } @@ -1162,18 +1441,18 @@ vgone(vp) /* * Clean out the filesystem specific data. */ - vclean(vp, DOCLOSE); + vclean(vp, DOCLOSE, p); /* * Delete from old mount point vnode list, if on one. */ - if (vp->v_mount != NULL) { - LIST_REMOVE(vp, v_mntvnodes); - vp->v_mount = NULL; - } + if (vp->v_mount != NULL) + insmntque(vp, (struct mount *)0); /* - * If special device, remove it from special device alias list. + * If special device, remove it from special device alias list + * if it is on one. */ - if (vp->v_type == VBLK || vp->v_type == VCHR) { + if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { + simple_lock(&spechash_slock); if (*vp->v_hashchain == vp) { *vp->v_hashchain = vp->v_specnext; } else { @@ -1202,28 +1481,34 @@ vgone(vp) vx->v_flag &= ~VALIASED; vp->v_flag &= ~VALIASED; } + simple_unlock(&spechash_slock); FREE(vp->v_specinfo, M_VNODE); vp->v_specinfo = NULL; } + /* - * If it is on the freelist and not already at the head, move it to - * the head of the list. The test of the back pointer and the - * reference count of zero is because it will be removed from the free - * list by getnewvnode, but will not have its reference count - * incremented until after calling vgone. If the reference count were - * incremented first, vgone would (incorrectly) try to close the - * previous instance of the underlying object. So, the back pointer is - * explicitly set to `0xdeadb' in getnewvnode after removing it from - * the freelist to ensure that we do not try to move it here. + * If it is on the freelist and not already at the head, + * move it to the head of the list. The test of the back + * pointer and the reference count of zero is because + * it will be removed from the free list by getnewvnode, + * but will not have its reference count incremented until + * after calling vgone. If the reference count were + * incremented first, vgone would (incorrectly) try to + * close the previous instance of the underlying object. + * So, the back pointer is explicitly set to `0xdeadb' in + * getnewvnode after removing it from the freelist to ensure + * that we do not try to move it here. */ - if (vp->v_usecount == 0 && - vp->v_freelist.tqe_prev != (struct vnode **) 0xdeadb && - vnode_free_list.tqh_first != vp) { - if(vp->v_tag != VT_TFS) { + if (vp->v_usecount == 0) { + simple_lock(&vnode_free_list_slock); + if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && + vnode_free_list.tqh_first != vp) { TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); } + simple_unlock(&vnode_free_list_slock); } + vp->v_type = VBAD; } @@ -1254,7 +1539,7 @@ int vcount(vp) register struct vnode *vp; { - register struct vnode *vq, *vnext; + struct vnode *vq, *vnext; int count; loop: @@ -1354,6 +1639,7 @@ int kinfo_vgetfailed; static int sysctl_vnode SYSCTL_HANDLER_ARGS { + struct proc *p = curproc; /* XXX */ register struct mount *mp, *nmp; struct vnode *vp; int error; @@ -1368,7 +1654,7 @@ sysctl_vnode SYSCTL_HANDLER_ARGS for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { nmp = mp->mnt_list.cqe_next; - if (vfs_busy(mp)) + if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) continue; again: for (vp = mp->mnt_vnodelist.lh_first; @@ -1386,11 +1672,11 @@ again: } if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) || (error = SYSCTL_OUT(req, vp, VNODESZ))) { - vfs_unbusy(mp); + vfs_unbusy(mp, p); return (error); } } - vfs_unbusy(mp); + vfs_unbusy(mp, p); } return (0); @@ -1404,22 +1690,63 @@ SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, */ int vfs_mountedon(vp) - register struct vnode *vp; + struct vnode *vp; { - register struct vnode *vq; + struct vnode *vq; + int error = 0; if (vp->v_specflags & SI_MOUNTEDON) return (EBUSY); if (vp->v_flag & VALIASED) { + simple_lock(&spechash_slock); for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) continue; - if (vq->v_specflags & SI_MOUNTEDON) - return (EBUSY); + if (vq->v_specflags & SI_MOUNTEDON) { + error = EBUSY; + break; + } } + simple_unlock(&spechash_slock); + } + return (error); +} + +/* + * Unmount all filesystems. The list is traversed in reverse order + * of mounting to avoid dependencies. Should only be called by halt(). + */ +void +vfs_unmountall() +{ + struct mount *mp, *nmp, *rootfs = NULL; + int error; + + /* unmount all but rootfs */ + for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { + nmp = mp->mnt_list.cqe_prev; + + if (mp->mnt_flag & MNT_ROOTFS) { + rootfs = mp; + continue; + } + error = dounmount(mp, MNT_FORCE, initproc); + if (error) { + printf("unmount of %s failed (", mp->mnt_stat.f_mntonname); + if (error == EBUSY) + printf("BUSY)\n"); + else + printf("%d)\n", error); + } + } + + /* and finally... */ + if (rootfs) { + vfs_unmountroot(rootfs); + } else { + printf("no root filesystem\n"); } - return (0); } /* @@ -1565,8 +1892,8 @@ vfs_export_lookup(mp, nep, nam) rnh = nep->ne_rtable[saddr->sa_family]; if (rnh != NULL) { np = (struct netcred *) - (*rnh->rnh_matchaddr) ((caddr_t) saddr, - rnh); + (*rnh->rnh_matchaddr)((caddr_t)saddr, + rnh); if (np && np->netc_rnodes->rn_flags & RNF_ROOT) np = NULL; } @@ -1580,7 +1907,6 @@ vfs_export_lookup(mp, nep, nam) return (np); } - /* * perform msync on all vnodes under a mount point * the mount point must be locked. @@ -1639,10 +1965,10 @@ retry: } else { if (object->flags & OBJ_DEAD) { if (waslocked) - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); tsleep(object, PVM, "vodead", 0); if (waslocked) - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); goto retry; } if ((object->flags & OBJ_VFS_REF) == 0) { diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c index 22e16d84..83b6dec 100644 --- a/sys/kern/vfs_extattr.c +++ b/sys/kern/vfs_extattr.c @@ -50,6 +50,7 @@ #include <sys/param.h> #include <sys/systm.h> +#include <sys/sysent.h> #include <sys/sysproto.h> #include <sys/namei.h> #include <sys/filedesc.h> @@ -64,9 +65,11 @@ #include <sys/malloc.h> #include <sys/dirent.h> +/* see if this is needed XXX JH #ifdef UNION #include <miscfs/union/union.h> #endif +*/ #include <vm/vm.h> #include <vm/vm_param.h> @@ -74,7 +77,8 @@ #include <vm/vm_extern.h> #include <sys/sysctl.h> -static int change_dir __P((struct nameidata *ndp, struct proc *p)); +static int change_dir __P((struct nameidata *ndp, struct proc *p)); +static void checkdirs __P((struct vnode *olddp)); /* * Virtual File System System Calls @@ -85,7 +89,7 @@ static int change_dir __P((struct nameidata *ndp, struct proc *p)); */ #ifndef _SYS_SYSPROTO_H_ struct mount_args { - int type; + char *type; char *path; int flags; caddr_t data; @@ -95,29 +99,32 @@ struct mount_args { int mount(p, uap, retval) struct proc *p; - register struct mount_args *uap; - int *retval; + register struct mount_args /* { + syscallarg(char *) type; + syscallarg(char *) path; + syscallarg(int) flags; + syscallarg(caddr_t) data; + } */ *uap; + register_t *retval; { - register struct vnode *vp; - register struct mount *mp; + struct vnode *vp; + struct mount *mp; + struct vfsconf *vfsp; int error, flag = 0; + struct vattr va; + u_long fstypenum; struct nameidata nd; + char fstypename[MFSNAMELEN]; /* - * Must be super user - */ - error = suser(p->p_ucred, &p->p_acflag); - if (error) - return (error); - /* * Get vnode to be covered */ - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + SCARG(uap, path), p); + if (error = namei(&nd)) return (error); vp = nd.ni_vp; - if (uap->flags & MNT_UPDATE) { + if (SCARG(uap, flags) & MNT_UPDATE) { if ((vp->v_flag & VROOT) == 0) { vput(vp); return (EINVAL); @@ -128,68 +135,135 @@ mount(p, uap, retval) * We only allow the filesystem to be reloaded if it * is currently mounted read-only. */ - if ((uap->flags & MNT_RELOAD) && + if ((SCARG(uap, flags) & MNT_RELOAD) && ((mp->mnt_flag & MNT_RDONLY) == 0)) { vput(vp); return (EOPNOTSUPP); /* Needs translation */ } mp->mnt_flag |= - uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); - VOP_UNLOCK(vp); + SCARG(uap, flags) & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); + /* + * Only root, or the user that did the original mount is + * permitted to update it. + */ + if (mp->mnt_stat.f_owner != p->p_ucred->cr_uid && + (error = suser(p->p_ucred, &p->p_acflag))) { + vput(vp); + return (error); + } + /* + * Do not allow NFS export by non-root users. Silently + * enforce MNT_NOSUID and MNT_NODEV for non-root users. + */ + if (p->p_ucred->cr_uid != 0) { + if (SCARG(uap, flags) & MNT_EXPORTED) { + vput(vp); + return (EPERM); + } + SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV; + } + if (vfs_busy(mp, LK_NOWAIT, 0, p)) { + vput(vp); + return (EBUSY); + } + VOP_UNLOCK(vp, 0, p); goto update; } - error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0); - if (error) + /* + * If the user is not root, ensure that they own the directory + * onto which we are attempting to mount. + */ + if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) || + (va.va_uid != p->p_ucred->cr_uid && + (error = suser(p->p_ucred, &p->p_acflag)))) { + vput(vp); + return (error); + } + /* + * Do not allow NFS export by non-root users. Silently + * enforce MNT_NOSUID and MNT_NODEV for non-root users. + */ + if (p->p_ucred->cr_uid != 0) { + if (SCARG(uap, flags) & MNT_EXPORTED) { + vput(vp); + return (EPERM); + } + SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV; + } + if (error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0)) return (error); if (vp->v_type != VDIR) { vput(vp); return (ENOTDIR); } - if ((u_long)uap->type > MOUNT_MAXTYPE || vfssw[uap->type] == NULL) { - vput(vp); - return (ENODEV); - } - +#ifdef COMPAT_43 /* - * Allocate and initialize the file system. + * Historically filesystem types were identified by number. If we + * get an integer for the filesystem type instead of a string, we + * check to see if it matches one of the historic filesystem types. */ - mp = (struct mount *)malloc((u_long)sizeof(struct mount), - M_MOUNT, M_WAITOK); - bzero((char *)mp, (u_long)sizeof(struct mount)); - mp->mnt_op = vfssw[uap->type]; - mp->mnt_vfc = vfsconf[uap->type]; - error = vfs_lock(mp); - if (error) { - free((caddr_t)mp, M_MOUNT); + fstypenum = (u_long)SCARG(uap, type); + if (fstypenum < maxvfsconf) { + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) + if (vfsp->vfc_typenum == fstypenum) + break; + if (vfsp == NULL) { + vput(vp); + return (ENODEV); + } + strncpy(fstypename, vfsp->vfc_name, MFSNAMELEN); + } else +#endif /* COMPAT_43 */ + if (error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL)) { vput(vp); return (error); } + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) + if (!strcmp(vfsp->vfc_name, fstypename)) + break; + if (vfsp == NULL) { + vput(vp); + return (ENODEV); + } if (vp->v_mountedhere != NULL) { - vfs_unlock(mp); - free((caddr_t)mp, M_MOUNT); vput(vp); return (EBUSY); } + + /* + * Allocate and initialize the filesystem. + */ + mp = (struct mount *)malloc((u_long)sizeof(struct mount), + M_MOUNT, M_WAITOK); + bzero((char *)mp, (u_long)sizeof(struct mount)); + lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); + (void)vfs_busy(mp, LK_NOWAIT, 0, p); + mp->mnt_op = vfsp->vfc_vfsops; + mp->mnt_vfc = vfsp; + vfsp->vfc_refcount++; + mp->mnt_stat.f_type = vfsp->vfc_typenum; + mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; + strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); vp->v_mountedhere = mp; mp->mnt_vnodecovered = vp; - vfsconf[uap->type]->vfc_refcount++; - + mp->mnt_stat.f_owner = p->p_ucred->cr_uid; update: /* * Set the mount level flags. */ - if (uap->flags & MNT_RDONLY) + if (SCARG(uap, flags) & MNT_RDONLY) mp->mnt_flag |= MNT_RDONLY; else if (mp->mnt_flag & MNT_RDONLY) mp->mnt_flag |= MNT_WANTRDWR; mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME); - mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | - MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | MNT_NOATIME); + mp->mnt_flag |= SCARG(uap, flags) & (MNT_NOSUID | MNT_NOEXEC | + MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | + MNT_NOATIME); /* * Mount the filesystem. */ - error = VFS_MOUNT(mp, uap->path, uap->data, &nd, p); + error = VFS_MOUNT(mp, SCARG(uap, path), SCARG(uap, data), &nd, p); if (mp->mnt_flag & MNT_UPDATE) { vrele(vp); if (mp->mnt_flag & MNT_WANTRDWR) @@ -198,6 +272,7 @@ update: (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_WANTRDWR); if (error) mp->mnt_flag = flag; + vfs_unbusy(mp, p); return (error); } /* @@ -205,23 +280,63 @@ update: */ cache_purge(vp); if (!error) { + simple_lock(&mountlist_slock); CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); - VOP_UNLOCK(vp); - vfs_unlock(mp); - error = VFS_START(mp, 0, p); - if (error) + simple_unlock(&mountlist_slock); + checkdirs(vp); + VOP_UNLOCK(vp, 0, p); + vfs_unbusy(mp, p); + if (error = VFS_START(mp, 0, p)) vrele(vp); } else { mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0; - vfs_unlock(mp); + mp->mnt_vfc->vfc_refcount--; + vfs_unbusy(mp, p); free((caddr_t)mp, M_MOUNT); vput(vp); - vfsconf[uap->type]->vfc_refcount--; } return (error); } /* + * Scan all active processes to see if any of them have a current + * or root directory onto which the new filesystem has just been + * mounted. If so, replace them with the new mount point. + */ +static void +checkdirs(olddp) + struct vnode *olddp; +{ + struct filedesc *fdp; + struct vnode *newdp; + struct proc *p; + + if (olddp->v_usecount == 1) + return; + if (VFS_ROOT(olddp->v_mountedhere, &newdp)) + panic("mount: lost mount"); + for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { + fdp = p->p_fd; + if (fdp->fd_cdir == olddp) { + vrele(fdp->fd_cdir); + VREF(newdp); + fdp->fd_cdir = newdp; + } + if (fdp->fd_rdir == olddp) { + vrele(fdp->fd_rdir); + VREF(newdp); + fdp->fd_rdir = newdp; + } + } + if (rootvnode == olddp) { + vrele(rootvnode); + VREF(newdp); + rootvnode = newdp; + } + vput(newdp); +} + +/* * Unmount a file system. * * Note: unmount takes a path to the vnode mounted on as argument, @@ -237,47 +352,51 @@ struct unmount_args { int unmount(p, uap, retval) struct proc *p; - register struct unmount_args *uap; - int *retval; + register struct unmount_args /* { + syscallarg(char *) path; + syscallarg(int) flags; + } */ *uap; + register_t *retval; { register struct vnode *vp; struct mount *mp; int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + SCARG(uap, path), p); + if (error = namei(&nd)) return (error); vp = nd.ni_vp; + mp = vp->v_mount; /* - * Unless this is a user mount, then must - * have suser privilege. + * Only root, or the user that did the original mount is + * permitted to unmount this filesystem. */ - if (((vp->v_mount->mnt_flag & MNT_USER) == 0) && + if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) && (error = suser(p->p_ucred, &p->p_acflag))) { vput(vp); return (error); } /* - * Must be the root of the filesystem + * Don't allow unmounting the root file system. */ - if ((vp->v_flag & VROOT) == 0) { + if (mp->mnt_flag & MNT_ROOTFS) { vput(vp); return (EINVAL); } - mp = vp->v_mount; - vput(vp); /* - * Don't allow unmount of the root filesystem + * Must be the root of the filesystem */ - if (mp->mnt_flag & MNT_ROOTFS) + if ((vp->v_flag & VROOT) == 0) { + vput(vp); return (EINVAL); - - return (dounmount(mp, uap->flags, p)); + } + vput(vp); + return (dounmount(mp, SCARG(uap, flags), p)); } /* @@ -292,74 +411,86 @@ dounmount(mp, flags, p) struct vnode *coveredvp; int error; - coveredvp = mp->mnt_vnodecovered; - if (vfs_busy(mp)) - return (EBUSY); + simple_lock(&mountlist_slock); mp->mnt_flag |= MNT_UNMOUNT; - error = vfs_lock(mp); - if (error) - return (error); - + lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock, p); mp->mnt_flag &=~ MNT_ASYNC; vfs_msync(mp, MNT_NOWAIT); vnode_pager_umount(mp); /* release cached vnodes */ cache_purgevfs(mp); /* remove cache entries for this file sys */ - if ((error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0 || + if (((mp->mnt_flag & MNT_RDONLY) || + (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) || (flags & MNT_FORCE)) error = VFS_UNMOUNT(mp, flags, p); - mp->mnt_flag &= ~MNT_UNMOUNT; - vfs_unbusy(mp); + simple_lock(&mountlist_slock); if (error) { - vfs_unlock(mp); - } else { + mp->mnt_flag &= ~MNT_UNMOUNT; + lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE, + &mountlist_slock, p); + return (error); + } + CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); + if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) { + coveredvp->v_mountedhere = (struct mount *)0; vrele(coveredvp); - CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); - mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0; - vfs_unlock(mp); - mp->mnt_vfc->vfc_refcount--; - if (mp->mnt_vnodelist.lh_first != NULL) - panic("unmount: dangling vnode"); - free((caddr_t)mp, M_MOUNT); } - return (error); + mp->mnt_vfc->vfc_refcount--; + if (mp->mnt_vnodelist.lh_first != NULL) + panic("unmount: dangling vnode"); + lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock, p); + if (mp->mnt_flag & MNT_MWAIT) + wakeup((caddr_t)mp); + free((caddr_t)mp, M_MOUNT); + return (0); } /* * Sync each mounted filesystem. */ - #ifndef _SYS_SYSPROTO_H_ struct sync_args { int dummy; }; #endif +#ifdef DEBUG +int syncprt = 0; +SYSCTL_INT(_debug, 0, syncprt, CTLFLAG_RW, &syncprt, 0, ""); +#endif + /* ARGSUSED */ int sync(p, uap, retval) struct proc *p; struct sync_args *uap; - int *retval; + register_t *retval; { - register struct mount *mp; + register struct mount *mp, *nmp; int asyncflag; - for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = mp->mnt_list.cqe_next) { - /* - * The lock check below is to avoid races with mount - * and unmount. - */ - if ((mp->mnt_flag & (MNT_MLOCK|MNT_RDONLY|MNT_MPBUSY)) == 0 && - !vfs_busy(mp)) { + simple_lock(&mountlist_slock); + for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { + if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { + nmp = mp->mnt_list.cqe_next; + continue; + } + if ((mp->mnt_flag & MNT_RDONLY) == 0) { asyncflag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; vfs_msync(mp, MNT_NOWAIT); VFS_SYNC(mp, MNT_NOWAIT, p != NULL ? p->p_ucred : NOCRED, p); if (asyncflag) mp->mnt_flag |= MNT_ASYNC; - vfs_unbusy(mp); } + simple_lock(&mountlist_slock); + nmp = mp->mnt_list.cqe_next; + vfs_unbusy(mp, p); } + simple_unlock(&mountlist_slock); +#ifdef DIAGNOSTIC + if (syncprt) + vfs_bufstats(); +#endif /* DIAGNOSTIC */ return (0); } @@ -378,20 +509,25 @@ struct quotactl_args { int quotactl(p, uap, retval) struct proc *p; - register struct quotactl_args *uap; - int *retval; + register struct quotactl_args /* { + syscallarg(char *) path; + syscallarg(int) cmd; + syscallarg(int) uid; + syscallarg(caddr_t) arg; + } */ *uap; + register_t *retval; { register struct mount *mp; int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + if (error = namei(&nd)) return (error); mp = nd.ni_vp->v_mount; vrele(nd.ni_vp); - return (VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, p)); + return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid), + SCARG(uap, arg), p)); } /* @@ -407,17 +543,19 @@ struct statfs_args { int statfs(p, uap, retval) struct proc *p; - register struct statfs_args *uap; - int *retval; + register struct statfs_args /* { + syscallarg(char *) path; + syscallarg(struct statfs *) buf; + } */ *uap; + register_t *retval; { register struct mount *mp; register struct statfs *sp; int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + if (error = namei(&nd)) return (error); mp = nd.ni_vp->v_mount; sp = &mp->mnt_stat; @@ -426,7 +564,7 @@ statfs(p, uap, retval) if (error) return (error); sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; - return (copyout((caddr_t)sp, (caddr_t)uap->buf, sizeof(*sp))); + return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp))); } /* @@ -442,16 +580,18 @@ struct fstatfs_args { int fstatfs(p, uap, retval) struct proc *p; - register struct fstatfs_args *uap; - int *retval; + register struct fstatfs_args /* { + syscallarg(int) fd; + syscallarg(struct statfs *) buf; + } */ *uap; + register_t *retval; { struct file *fp; struct mount *mp; register struct statfs *sp; int error; - error = getvnode(p->p_fd, uap->fd, &fp); - if (error) + if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); mp = ((struct vnode *)fp->f_data)->v_mount; sp = &mp->mnt_stat; @@ -459,7 +599,7 @@ fstatfs(p, uap, retval) if (error) return (error); sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; - return (copyout((caddr_t)sp, (caddr_t)uap->buf, sizeof(*sp))); + return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp))); } /* @@ -475,48 +615,55 @@ struct getfsstat_args { int getfsstat(p, uap, retval) struct proc *p; - register struct getfsstat_args *uap; - int *retval; + register struct getfsstat_args /* { + syscallarg(struct statfs *) buf; + syscallarg(long) bufsize; + syscallarg(int) flags; + } */ *uap; + register_t *retval; { register struct mount *mp, *nmp; register struct statfs *sp; caddr_t sfsp; long count, maxcount, error; - maxcount = uap->bufsize / sizeof(struct statfs); - sfsp = (caddr_t)uap->buf; + maxcount = SCARG(uap, bufsize) / sizeof(struct statfs); + sfsp = (caddr_t)SCARG(uap, buf); count = 0; + simple_lock(&mountlist_slock); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { - if (vfs_busy(mp)) { + if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { nmp = mp->mnt_list.cqe_next; continue; } - if (sfsp && count < maxcount && - ((mp->mnt_flag & MNT_MLOCK) == 0)) { + if (sfsp && count < maxcount) { sp = &mp->mnt_stat; /* * If MNT_NOWAIT is specified, do not refresh the * fsstat cache. MNT_WAIT overrides MNT_NOWAIT. */ - if (((uap->flags & MNT_NOWAIT) == 0 || - (uap->flags & MNT_WAIT)) && + if (((SCARG(uap, flags) & MNT_NOWAIT) == 0 || + (SCARG(uap, flags) & MNT_WAIT)) && (error = VFS_STATFS(mp, sp, p))) { + simple_lock(&mountlist_slock); nmp = mp->mnt_list.cqe_next; - vfs_unbusy(mp); + vfs_unbusy(mp, p); continue; } sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; error = copyout((caddr_t)sp, sfsp, sizeof(*sp)); if (error) { - vfs_unbusy(mp); + vfs_unbusy(mp, p); return (error); } sfsp += sizeof(*sp); } count++; + simple_lock(&mountlist_slock); nmp = mp->mnt_list.cqe_next; - vfs_unbusy(mp); + vfs_unbusy(mp, p); } + simple_unlock(&mountlist_slock); if (sfsp && count > maxcount) *retval = maxcount; else @@ -536,27 +683,41 @@ struct fchdir_args { int fchdir(p, uap, retval) struct proc *p; - struct fchdir_args *uap; - int *retval; + struct fchdir_args /* { + syscallarg(int) fd; + } */ *uap; + register_t *retval; { register struct filedesc *fdp = p->p_fd; - register struct vnode *vp; + struct vnode *vp, *tdp; + struct mount *mp; struct file *fp; int error; - error = getvnode(fdp, uap->fd, &fp); - if (error) + if (error = getvnode(fdp, SCARG(uap, fd), &fp)) return (error); vp = (struct vnode *)fp->f_data; - VOP_LOCK(vp); + VREF(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type != VDIR) error = ENOTDIR; else error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p); - VOP_UNLOCK(vp); - if (error) + while (!error && (mp = vp->v_mountedhere) != NULL) { + if (vfs_busy(mp, 0, 0, p)) + continue; + error = VFS_ROOT(mp, &tdp); + vfs_unbusy(mp, p); + if (error) + break; + vput(vp); + vp = tdp; + } + if (error) { + vput(vp); return (error); - VREF(vp); + } + VOP_UNLOCK(vp, 0, p); vrele(fdp->fd_cdir); fdp->fd_cdir = vp; return (0); @@ -574,16 +735,18 @@ struct chdir_args { int chdir(p, uap, retval) struct proc *p; - struct chdir_args *uap; - int *retval; + struct chdir_args /* { + syscallarg(char *) path; + } */ *uap; + register_t *retval; { register struct filedesc *fdp = p->p_fd; int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p); - error = change_dir(&nd, p); - if (error) + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + SCARG(uap, path), p); + if (error = change_dir(&nd, p)) return (error); vrele(fdp->fd_cdir); fdp->fd_cdir = nd.ni_vp; @@ -602,8 +765,10 @@ struct chroot_args { int chroot(p, uap, retval) struct proc *p; - struct chroot_args *uap; - int *retval; + struct chroot_args /* { + syscallarg(char *) path; + } */ *uap; + register_t *retval; { register struct filedesc *fdp = p->p_fd; int error; @@ -612,9 +777,9 @@ chroot(p, uap, retval) error = suser(p->p_ucred, &p->p_acflag); if (error) return (error); - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p); - error = change_dir(&nd, p); - if (error) + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + SCARG(uap, path), p); + if (error = change_dir(&nd, p)) return (error); if (fdp->fd_rdir != NULL) vrele(fdp->fd_rdir); @@ -641,9 +806,10 @@ change_dir(ndp, p) error = ENOTDIR; else error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p); - VOP_UNLOCK(vp); if (error) - vrele(vp); + vput(vp); + else + VOP_UNLOCK(vp, 0, p); return (error); } @@ -661,8 +827,12 @@ struct open_args { int open(p, uap, retval) struct proc *p; - register struct open_args *uap; - int *retval; + register struct open_args /* { + syscallarg(char *) path; + syscallarg(int) flags; + syscallarg(int) mode; + } */ *uap; + register_t *retval; { register struct filedesc *fdp = p->p_fd; register struct file *fp; @@ -677,17 +847,17 @@ open(p, uap, retval) if (error) return (error); fp = nfp; - flags = FFLAGS(uap->flags); - cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p); + flags = FFLAGS(SCARG(uap, flags)); + cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); p->p_dupfd = -indx - 1; /* XXX check for fdopen */ error = vn_open(&nd, flags, cmode); if (error) { ffree(fp); if ((error == ENODEV || error == ENXIO) && - p->p_dupfd >= 0 && /* XXX from fdopen */ + p->p_dupfd >= 0 && /* XXX from fdopen */ (error = - dupfdopen(fdp, indx, p->p_dupfd, flags, error)) == 0) { + dupfdopen(fdp, indx, p->p_dupfd, flags, error)) == 0) { *retval = indx; return (0); } @@ -714,18 +884,17 @@ open(p, uap, retval) type = F_FLOCK; if ((flags & FNONBLOCK) == 0) type |= F_WAIT; - VOP_UNLOCK(vp); - error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type); - if (error) { + VOP_UNLOCK(vp, 0, p); + if (error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) { (void) vn_close(vp, fp->f_flag, fp->f_cred, p); ffree(fp); fdp->fd_ofiles[indx] = NULL; return (error); } - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); fp->f_flag |= FHASLOCK; } - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); *retval = indx; return (0); } @@ -743,15 +912,22 @@ struct ocreat_args { int ocreat(p, uap, retval) struct proc *p; - register struct ocreat_args *uap; - int *retval; + register struct ocreat_args /* { + syscallarg(char *) path; + syscallarg(int) mode; + } */ *uap; + register_t *retval; { - struct open_args openuap; - - openuap.path = uap->path; - openuap.mode = uap->mode; - openuap.flags = O_WRONLY | O_CREAT | O_TRUNC; - return (open(p, &openuap, retval)); + struct open_args /* { + syscallarg(char *) path; + syscallarg(int) flags; + syscallarg(int) mode; + } */ nuap; + + SCARG(&nuap, path) = SCARG(uap, path); + SCARG(&nuap, mode) = SCARG(uap, mode); + SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC; + return (open(p, &nuap, retval)); } #endif /* COMPAT_43 */ @@ -769,30 +945,35 @@ struct mknod_args { int mknod(p, uap, retval) struct proc *p; - register struct mknod_args *uap; - int *retval; + register struct mknod_args /* { + syscallarg(char *) path; + syscallarg(int) mode; + syscallarg(int) dev; + } */ *uap; + register_t *retval; { register struct vnode *vp; struct vattr vattr; int error; + int whiteout; struct nameidata nd; error = suser(p->p_ucred, &p->p_acflag); if (error) return (error); - NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); + if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (vp != NULL) error = EEXIST; else { VATTR_NULL(&vattr); - vattr.va_mode = (uap->mode & ALLPERMS) &~ p->p_fd->fd_cmask; - vattr.va_rdev = uap->dev; + vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask; + vattr.va_rdev = SCARG(uap, dev); + whiteout = 0; - switch (uap->mode & S_IFMT) { + switch (SCARG(uap, mode) & S_IFMT) { case S_IFMT: /* used by badsect to flag bad sectors */ vattr.va_type = VBAD; break; @@ -802,14 +983,25 @@ mknod(p, uap, retval) case S_IFBLK: vattr.va_type = VBLK; break; + case S_IFWHT: + whiteout = 1; + break; default: error = EINVAL; break; } } if (!error) { - LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); + VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); + if (whiteout) { + error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); + if (error) + VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); + vput(nd.ni_dvp); + } else { + error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, + &nd.ni_cnd, &vattr); + } } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == vp) @@ -823,7 +1015,7 @@ mknod(p, uap, retval) } /* - * Create named pipe. + * Create a named pipe. */ #ifndef _SYS_SYSPROTO_H_ struct mkfifo_args { @@ -835,16 +1027,21 @@ struct mkfifo_args { int mkfifo(p, uap, retval) struct proc *p; - register struct mkfifo_args *uap; - int *retval; + register struct mkfifo_args /* { + syscallarg(char *) path; + syscallarg(int) mode; + } */ *uap; + register_t *retval; { +#ifndef FIFO + return (EOPNOTSUPP); +#else struct vattr vattr; int error; struct nameidata nd; - NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); + if (error = namei(&nd)) return (error); if (nd.ni_vp != NULL) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); @@ -857,9 +1054,10 @@ mkfifo(p, uap, retval) } VATTR_NULL(&vattr); vattr.va_type = VFIFO; - vattr.va_mode = (uap->mode & ALLPERMS) &~ p->p_fd->fd_cmask; - LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); + vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask; + VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); return (VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr)); +#endif /* FIFO */ } /* @@ -875,22 +1073,24 @@ struct link_args { int link(p, uap, retval) struct proc *p; - register struct link_args *uap; - int *retval; + register struct link_args /* { + syscallarg(char *) path; + syscallarg(char *) link; + } */ *uap; + register_t *retval; { register struct vnode *vp; struct nameidata nd; int error; - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (vp->v_type == VDIR) error = EPERM; /* POSIX */ else { - NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->link, p); + NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), p); error = namei(&nd); if (!error) { if (nd.ni_vp != NULL) { @@ -903,10 +1103,9 @@ link(p, uap, retval) vrele(nd.ni_vp); error = EEXIST; } else { - LEASE_CHECK(nd.ni_dvp, - p, p->p_ucred, LEASE_WRITE); - LEASE_CHECK(vp, - p, p->p_ucred, LEASE_WRITE); + VOP_LEASE(nd.ni_dvp, p, p->p_ucred, + LEASE_WRITE); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); } } @@ -928,8 +1127,11 @@ struct symlink_args { int symlink(p, uap, retval) struct proc *p; - register struct symlink_args *uap; - int *retval; + register struct symlink_args /* { + syscallarg(char *) path; + syscallarg(char *) link; + } */ *uap; + register_t *retval; { struct vattr vattr; char *path; @@ -937,12 +1139,10 @@ symlink(p, uap, retval) struct nameidata nd; MALLOC(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); - error = copyinstr(uap->path, path, MAXPATHLEN, NULL); - if (error) + if (error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) goto out; - NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->link, p); - error = namei(&nd); - if (error) + NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), p); + if (error = namei(&nd)) goto out; if (nd.ni_vp) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); @@ -956,7 +1156,7 @@ symlink(p, uap, retval) } VATTR_NULL(&vattr); vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask; - LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); + VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); out: FREE(path, M_NAMEI); @@ -964,6 +1164,45 @@ out: } /* + * Delete a whiteout from the filesystem. + */ +/* ARGSUSED */ +int +undelete(p, uap, retval) + struct proc *p; + register struct undelete_args /* { + syscallarg(char *) path; + } */ *uap; + register_t *retval; +{ + int error; + struct nameidata nd; + + NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE, + SCARG(uap, path), p); + error = namei(&nd); + if (error) + return (error); + + if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { + VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + if (nd.ni_vp) + vrele(nd.ni_vp); + return (EEXIST); + } + + VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); + if (error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) + VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); + vput(nd.ni_dvp); + return (error); +} + +/* * Delete a name from the filesystem. */ #ifndef _SYS_SYSPROTO_H_ @@ -975,20 +1214,21 @@ struct unlink_args { int unlink(p, uap, retval) struct proc *p; - struct unlink_args *uap; - int *retval; + struct unlink_args /* { + syscallarg(char *) path; + } */ *uap; + register_t *retval; { register struct vnode *vp; int error; struct nameidata nd; - NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); + if (error = namei(&nd)) return (error); vp = nd.ni_vp; - LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type == VDIR) error = EPERM; /* POSIX */ @@ -1001,11 +1241,11 @@ unlink(p, uap, retval) if (vp->v_flag & VROOT) error = EBUSY; else - (void) vnode_pager_uncache(vp); + (void) vnode_pager_uncache(vp, p); } if (!error) { - LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); + VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); @@ -1013,7 +1253,8 @@ unlink(p, uap, retval) vrele(nd.ni_dvp); else vput(nd.ni_dvp); - vput(vp); + if (vp != NULLVP) + vput(vp); } return (error); } @@ -1032,8 +1273,13 @@ struct lseek_args { int lseek(p, uap, retval) struct proc *p; - register struct lseek_args *uap; - int *retval; + register struct lseek_args /* { + syscallarg(int) fd; + syscallarg(int) pad; + syscallarg(off_t) offset; + syscallarg(int) whence; + } */ *uap; + register_t *retval; /* XXX */ { struct ucred *cred = p->p_ucred; register struct filedesc *fdp = p->p_fd; @@ -1041,23 +1287,23 @@ lseek(p, uap, retval) struct vattr vattr; int error; - if ((u_int)uap->fd >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[uap->fd]) == NULL) + if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL) return (EBADF); if (fp->f_type != DTYPE_VNODE) return (ESPIPE); - switch (uap->whence) { + switch (SCARG(uap, whence)) { case L_INCR: - fp->f_offset += uap->offset; + fp->f_offset += SCARG(uap, offset); break; case L_XTND: error=VOP_GETATTR((struct vnode *)fp->f_data, &vattr, cred, p); if (error) return (error); - fp->f_offset = uap->offset + vattr.va_size; + fp->f_offset = SCARG(uap, offset) + vattr.va_size; break; case L_SET: - fp->f_offset = uap->offset; + fp->f_offset = SCARG(uap, offset); break; default: return (EINVAL); @@ -1080,17 +1326,26 @@ struct olseek_args { int olseek(p, uap, retval) struct proc *p; - register struct olseek_args *uap; - int *retval; + register struct olseek_args /* { + syscallarg(int) fd; + syscallarg(long) offset; + syscallarg(int) whence; + } */ *uap; + register_t *retval; { - struct lseek_args nuap; + struct lseek_args /* { + syscallarg(int) fd; + syscallarg(int) pad; + syscallarg(off_t) offset; + syscallarg(int) whence; + } */ nuap; off_t qret; int error; - nuap.fd = uap->fd; - nuap.offset = uap->offset; - nuap.whence = uap->whence; - error = lseek(p, &nuap, (int *)&qret); + SCARG(&nuap, fd) = SCARG(uap, fd); + SCARG(&nuap, offset) = SCARG(uap, offset); + SCARG(&nuap, whence) = SCARG(uap, whence); + error = lseek(p, &nuap, (register_t *) &qret); *(long *)retval = qret; return (error); } @@ -1108,8 +1363,11 @@ struct access_args { int access(p, uap, retval) struct proc *p; - register struct access_args *uap; - int *retval; + register struct access_args /* { + syscallarg(char *) path; + syscallarg(int) flags; + } */ *uap; + register_t *retval; { register struct ucred *cred = p->p_ucred; register struct vnode *vp; @@ -1120,20 +1378,20 @@ access(p, uap, retval) t_gid = cred->cr_groups[0]; cred->cr_uid = p->p_cred->p_ruid; cred->cr_groups[0] = p->p_cred->p_rgid; - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + SCARG(uap, path), p); + if (error = namei(&nd)) goto out1; vp = nd.ni_vp; /* Flags == 0 means only check for existence. */ - if (uap->flags) { + if (SCARG(uap, flags)) { flags = 0; - if (uap->flags & R_OK) + if (SCARG(uap, flags) & R_OK) flags |= VREAD; - if (uap->flags & W_OK) + if (SCARG(uap, flags) & W_OK) flags |= VWRITE; - if (uap->flags & X_OK) + if (SCARG(uap, flags) & X_OK) flags |= VEXEC; if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) error = VOP_ACCESS(vp, flags, cred, p); @@ -1159,24 +1417,27 @@ struct ostat_args { int ostat(p, uap, retval) struct proc *p; - register struct ostat_args *uap; - int *retval; + register struct ostat_args /* { + syscallarg(char *) path; + syscallarg(struct ostat *) ub; + } */ *uap; + register_t *retval; { struct stat sb; struct ostat osb; int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + SCARG(uap, path), p); + if (error = namei(&nd)) return (error); error = vn_stat(nd.ni_vp, &sb, p); vput(nd.ni_vp); if (error) return (error); cvtstat(&sb, &osb); - error = copyout((caddr_t)&osb, (caddr_t)uap->ub, sizeof (osb)); + error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb)); return (error); } @@ -1193,8 +1454,11 @@ struct olstat_args { int olstat(p, uap, retval) struct proc *p; - register struct olstat_args *uap; - int *retval; + register struct olstat_args /* { + syscallarg(char *) path; + syscallarg(struct ostat *) ub; + } */ *uap; + register_t *retval; { struct vnode *vp, *dvp; struct stat sb, sb1; @@ -1203,9 +1467,8 @@ olstat(p, uap, retval) struct nameidata nd; NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKPARENT, UIO_USERSPACE, - uap->path, p); - error = namei(&nd); - if (error) + SCARG(uap, path), p); + if (error = namei(&nd)) return (error); /* * For symbolic links, always return the attributes of its @@ -1240,7 +1503,7 @@ olstat(p, uap, retval) sb.st_blocks = sb1.st_blocks; } cvtstat(&sb, &osb); - error = copyout((caddr_t)&osb, (caddr_t)uap->ub, sizeof (osb)); + error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb)); return (error); } @@ -1287,22 +1550,25 @@ struct stat_args { int stat(p, uap, retval) struct proc *p; - register struct stat_args *uap; - int *retval; + register struct stat_args /* { + syscallarg(char *) path; + syscallarg(struct stat *) ub; + } */ *uap; + register_t *retval; { struct stat sb; int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + SCARG(uap, path), p); + if (error = namei(&nd)) return (error); error = vn_stat(nd.ni_vp, &sb, p); vput(nd.ni_vp); if (error) return (error); - error = copyout((caddr_t)&sb, (caddr_t)uap->ub, sizeof (sb)); + error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb)); return (error); } @@ -1319,8 +1585,11 @@ struct lstat_args { int lstat(p, uap, retval) struct proc *p; - register struct lstat_args *uap; - int *retval; + register struct lstat_args /* { + syscallarg(char *) path; + syscallarg(struct stat *) ub; + } */ *uap; + register_t *retval; { int error; struct vnode *vp, *dvp; @@ -1328,13 +1597,12 @@ lstat(p, uap, retval) struct nameidata nd; NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKPARENT, UIO_USERSPACE, - uap->path, p); - error = namei(&nd); - if (error) + SCARG(uap, path), p); + if (error = namei(&nd)) return (error); /* - * For symbolic links, always return the attributes of its - * containing directory, except for mode, size, and links. + * For symbolic links, always return the attributes of its containing + * directory, except for mode, size, inode number, and links. */ vp = nd.ni_vp; dvp = nd.ni_dvp; @@ -1363,8 +1631,9 @@ lstat(p, uap, retval) sb.st_nlink = sb1.st_nlink; sb.st_size = sb1.st_size; sb.st_blocks = sb1.st_blocks; + sb.st_ino = sb1.st_ino; } - error = copyout((caddr_t)&sb, (caddr_t)uap->ub, sizeof (sb)); + error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb)); return (error); } @@ -1381,17 +1650,20 @@ struct pathconf_args { int pathconf(p, uap, retval) struct proc *p; - register struct pathconf_args *uap; - int *retval; + register struct pathconf_args /* { + syscallarg(char *) path; + syscallarg(int) name; + } */ *uap; + register_t *retval; { int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + SCARG(uap, path), p); + if (error = namei(&nd)) return (error); - error = VOP_PATHCONF(nd.ni_vp, uap->name, retval); + error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); vput(nd.ni_vp); return (error); } @@ -1410,8 +1682,12 @@ struct readlink_args { int readlink(p, uap, retval) struct proc *p; - register struct readlink_args *uap; - int *retval; + register struct readlink_args /* { + syscallarg(char *) path; + syscallarg(char *) buf; + syscallarg(int) count; + } */ *uap; + register_t *retval; { register struct vnode *vp; struct iovec aiov; @@ -1419,27 +1695,27 @@ readlink(p, uap, retval) int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, + SCARG(uap, path), p); + if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (vp->v_type != VLNK) error = EINVAL; else { - aiov.iov_base = uap->buf; - aiov.iov_len = uap->count; + aiov.iov_base = SCARG(uap, buf); + aiov.iov_len = SCARG(uap, count); auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_USERSPACE; auio.uio_procp = p; - auio.uio_resid = uap->count; + auio.uio_resid = SCARG(uap, count); error = VOP_READLINK(vp, &auio, p->p_ucred); } vput(vp); - *retval = uap->count - auio.uio_resid; + *retval = SCARG(uap, count) - auio.uio_resid; return (error); } @@ -1456,23 +1732,25 @@ struct chflags_args { int chflags(p, uap, retval) struct proc *p; - register struct chflags_args *uap; - int *retval; + register struct chflags_args /* { + syscallarg(char *) path; + syscallarg(int) flags; + } */ *uap; + register_t *retval; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + if (error = namei(&nd)) return (error); vp = nd.ni_vp; - LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); - vattr.va_flags = uap->flags; + vattr.va_flags = SCARG(uap, flags); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); vput(vp); return (error); @@ -1491,24 +1769,26 @@ struct fchflags_args { int fchflags(p, uap, retval) struct proc *p; - register struct fchflags_args *uap; - int *retval; + register struct fchflags_args /* { + syscallarg(int) fd; + syscallarg(int) flags; + } */ *uap; + register_t *retval; { struct vattr vattr; struct vnode *vp; struct file *fp; int error; - error = getvnode(p->p_fd, uap->fd, &fp); - if (error) + if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); vp = (struct vnode *)fp->f_data; - LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); - vattr.va_flags = uap->flags; + vattr.va_flags = SCARG(uap, flags); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (error); } @@ -1525,23 +1805,25 @@ struct chmod_args { int chmod(p, uap, retval) struct proc *p; - register struct chmod_args *uap; - int *retval; + register struct chmod_args /* { + syscallarg(char *) path; + syscallarg(int) mode; + } */ *uap; + register_t *retval; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + if (error = namei(&nd)) return (error); vp = nd.ni_vp; - LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); - vattr.va_mode = uap->mode & ALLPERMS; + vattr.va_mode = SCARG(uap, mode) & ALLPERMS; error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); vput(vp); return (error); @@ -1560,24 +1842,26 @@ struct fchmod_args { int fchmod(p, uap, retval) struct proc *p; - register struct fchmod_args *uap; - int *retval; + register struct fchmod_args /* { + syscallarg(int) fd; + syscallarg(int) mode; + } */ *uap; + register_t *retval; { struct vattr vattr; struct vnode *vp; struct file *fp; int error; - error = getvnode(p->p_fd, uap->fd, &fp); - if (error) + if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); vp = (struct vnode *)fp->f_data; - LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); - vattr.va_mode = uap->mode & ALLPERMS; + vattr.va_mode = SCARG(uap, mode) & ALLPERMS; error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (error); } @@ -1595,24 +1879,27 @@ struct chown_args { int chown(p, uap, retval) struct proc *p; - register struct chown_args *uap; - int *retval; + register struct chown_args /* { + syscallarg(char *) path; + syscallarg(int) uid; + syscallarg(int) gid; + } */ *uap; + register_t *retval; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + if (error = namei(&nd)) return (error); vp = nd.ni_vp; - LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); - vattr.va_uid = uap->uid; - vattr.va_gid = uap->gid; + vattr.va_uid = SCARG(uap, uid); + vattr.va_gid = SCARG(uap, gid); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); vput(vp); return (error); @@ -1632,25 +1919,28 @@ struct fchown_args { int fchown(p, uap, retval) struct proc *p; - register struct fchown_args *uap; - int *retval; + register struct fchown_args /* { + syscallarg(int) fd; + syscallarg(int) uid; + syscallarg(int) gid; + } */ *uap; + register_t *retval; { struct vattr vattr; struct vnode *vp; struct file *fp; int error; - error = getvnode(p->p_fd, uap->fd, &fp); - if (error) + if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); vp = (struct vnode *)fp->f_data; - LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); - vattr.va_uid = uap->uid; - vattr.va_gid = uap->gid; + vattr.va_uid = SCARG(uap, uid); + vattr.va_gid = SCARG(uap, gid); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (error); } @@ -1667,8 +1957,11 @@ struct utimes_args { int utimes(p, uap, retval) struct proc *p; - register struct utimes_args *uap; - int *retval; + register struct utimes_args /* { + syscallarg(char *) path; + syscallarg(struct timeval *) tptr; + } */ *uap; + register_t *retval; { register struct vnode *vp; struct timeval tv[2]; @@ -1677,22 +1970,19 @@ utimes(p, uap, retval) struct nameidata nd; VATTR_NULL(&vattr); - if (uap->tptr == NULL) { + if (SCARG(uap, tptr) == NULL) { microtime(&tv[0]); tv[1] = tv[0]; vattr.va_vaflags |= VA_UTIMES_NULL; - } else { - error = copyin((caddr_t)uap->tptr, (caddr_t)tv, sizeof (tv)); - if (error) - return (error); - } - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + } else if (error = copyin((caddr_t)SCARG(uap, tptr), (caddr_t)tv, + sizeof (tv))) + return (error); + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + if (error = namei(&nd)) return (error); vp = nd.ni_vp; - LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); vattr.va_atime.tv_sec = tv[0].tv_sec; vattr.va_atime.tv_nsec = tv[0].tv_usec * 1000; vattr.va_mtime.tv_sec = tv[1].tv_sec; @@ -1716,8 +2006,12 @@ struct truncate_args { int truncate(p, uap, retval) struct proc *p; - register struct truncate_args *uap; - int *retval; + register struct truncate_args /* { + syscallarg(char *) path; + syscallarg(int) pad; + syscallarg(off_t) length; + } */ *uap; + register_t *retval; { register struct vnode *vp; struct vattr vattr; @@ -1726,19 +2020,18 @@ truncate(p, uap, retval) if (uap->length < 0) return(EINVAL); - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + if (error = namei(&nd)) return (error); vp = nd.ni_vp; - LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type == VDIR) error = EISDIR; else if ((error = vn_writechk(vp)) == 0 && (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0) { VATTR_NULL(&vattr); - vattr.va_size = uap->length; + vattr.va_size = SCARG(uap, length); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); } vput(vp); @@ -1759,8 +2052,12 @@ struct ftruncate_args { int ftruncate(p, uap, retval) struct proc *p; - register struct ftruncate_args *uap; - int *retval; + register struct ftruncate_args /* { + syscallarg(int) fd; + syscallarg(int) pad; + syscallarg(off_t) length; + } */ *uap; + register_t *retval; { struct vattr vattr; struct vnode *vp; @@ -1769,22 +2066,21 @@ ftruncate(p, uap, retval) if (uap->length < 0) return(EINVAL); - error = getvnode(p->p_fd, uap->fd, &fp); - if (error) + if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); if ((fp->f_flag & FWRITE) == 0) return (EINVAL); vp = (struct vnode *)fp->f_data; - LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type == VDIR) error = EISDIR; else if ((error = vn_writechk(vp)) == 0) { VATTR_NULL(&vattr); - vattr.va_size = uap->length; + vattr.va_size = SCARG(uap, length); error = VOP_SETATTR(vp, &vattr, fp->f_cred, p); } - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (error); } @@ -1802,13 +2098,20 @@ struct otruncate_args { int otruncate(p, uap, retval) struct proc *p; - register struct otruncate_args *uap; - int *retval; + register struct otruncate_args /* { + syscallarg(char *) path; + syscallarg(long) length; + } */ *uap; + register_t *retval; { - struct truncate_args nuap; - - nuap.path = uap->path; - nuap.length = uap->length; + struct truncate_args /* { + syscallarg(char *) path; + syscallarg(int) pad; + syscallarg(off_t) length; + } */ nuap; + + SCARG(&nuap, path) = SCARG(uap, path); + SCARG(&nuap, length) = SCARG(uap, length); return (truncate(p, &nuap, retval)); } @@ -1825,13 +2128,20 @@ struct oftruncate_args { int oftruncate(p, uap, retval) struct proc *p; - register struct oftruncate_args *uap; - int *retval; + register struct oftruncate_args /* { + syscallarg(int) fd; + syscallarg(long) length; + } */ *uap; + register_t *retval; { - struct ftruncate_args nuap; - - nuap.fd = uap->fd; - nuap.length = uap->length; + struct ftruncate_args /* { + syscallarg(int) fd; + syscallarg(int) pad; + syscallarg(off_t) length; + } */ nuap; + + SCARG(&nuap, fd) = SCARG(uap, fd); + SCARG(&nuap, length) = SCARG(uap, length); return (ftruncate(p, &nuap, retval)); } #endif /* COMPAT_43 || COMPAT_SUNOS */ @@ -1848,24 +2158,25 @@ struct fsync_args { int fsync(p, uap, retval) struct proc *p; - struct fsync_args *uap; - int *retval; + struct fsync_args /* { + syscallarg(int) fd; + } */ *uap; + register_t *retval; { register struct vnode *vp; struct file *fp; int error; - error = getvnode(p->p_fd, uap->fd, &fp); - if (error) + if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); vp = (struct vnode *)fp->f_data; - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_object) { vm_object_page_clean(vp->v_object, 0, 0 ,0, FALSE); } error = VOP_FSYNC(vp, fp->f_cred, (vp->v_mount->mnt_flag & MNT_ASYNC) ? MNT_NOWAIT : MNT_WAIT, p); - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (error); } @@ -1883,29 +2194,29 @@ struct rename_args { int rename(p, uap, retval) struct proc *p; - register struct rename_args *uap; - int *retval; + register struct rename_args /* { + syscallarg(char *) from; + syscallarg(char *) to; + } */ *uap; + register_t *retval; { register struct vnode *tvp, *fvp, *tdvp; struct nameidata fromnd, tond; int error; NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE, - uap->from, p); - error = namei(&fromnd); - if (error) + SCARG(uap, from), p); + if (error = namei(&fromnd)) return (error); fvp = fromnd.ni_vp; NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART, - UIO_USERSPACE, uap->to, p); + UIO_USERSPACE, SCARG(uap, to), p); if (fromnd.ni_vp->v_type == VDIR) tond.ni_cnd.cn_flags |= WILLBEDIR; - error = namei(&tond); - if (error) { + if (error = namei(&tond)) { /* Translate error code for rename("dir1", "dir2/."). */ if (error == EISDIR && fvp->v_type == VDIR) error = EINVAL; - VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); vrele(fromnd.ni_dvp); vrele(fvp); @@ -1936,13 +2247,12 @@ rename(p, uap, retval) error = -1; out: if (!error) { - LEASE_CHECK(tdvp, p, p->p_ucred, LEASE_WRITE); - if (fromnd.ni_dvp != tdvp) { - LEASE_CHECK(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - } + VOP_LEASE(tdvp, p, p->p_ucred, LEASE_WRITE); + if (fromnd.ni_dvp != tdvp) + VOP_LEASE(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE); if (tvp) { - LEASE_CHECK(tvp, p, p->p_ucred, LEASE_WRITE); - (void) vnode_pager_uncache(tvp); + VOP_LEASE(tvp, p, p->p_ucred, LEASE_WRITE); + (void) vnode_pager_uncache(tvp, p); } error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); @@ -1982,18 +2292,20 @@ struct mkdir_args { int mkdir(p, uap, retval) struct proc *p; - register struct mkdir_args *uap; - int *retval; + register struct mkdir_args /* { + syscallarg(char *) path; + syscallarg(int) mode; + } */ *uap; + register_t *retval; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; - NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->path, p); + NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); nd.ni_cnd.cn_flags |= WILLBEDIR; - error = namei(&nd); - if (error) + if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (vp != NULL) { @@ -2007,8 +2319,8 @@ mkdir(p, uap, retval) } VATTR_NULL(&vattr); vattr.va_type = VDIR; - vattr.va_mode = (uap->mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; - LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); + vattr.va_mode = (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_fd->fd_cmask; + VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); if (!error) vput(nd.ni_vp); @@ -2027,16 +2339,18 @@ struct rmdir_args { int rmdir(p, uap, retval) struct proc *p; - struct rmdir_args *uap; - int *retval; + struct rmdir_args /* { + syscallarg(char *) path; + } */ *uap; + register_t *retval; { register struct vnode *vp; int error; struct nameidata nd; - NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE, + SCARG(uap, path), p); + if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (vp->v_type != VDIR) { @@ -2057,8 +2371,8 @@ rmdir(p, uap, retval) error = EBUSY; out: if (!error) { - LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE); + VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); @@ -2086,8 +2400,13 @@ struct ogetdirentries_args { int ogetdirentries(p, uap, retval) struct proc *p; - register struct ogetdirentries_args *uap; - int *retval; + register struct ogetdirentries_args /* { + syscallarg(int) fd; + syscallarg(char *) buf; + syscallarg(u_int) count; + syscallarg(long *) basep; + } */ *uap; + register_t *retval; { register struct vnode *vp; struct file *fp; @@ -2095,30 +2414,31 @@ ogetdirentries(p, uap, retval) struct iovec aiov, kiov; struct dirent *dp, *edp; caddr_t dirbuf; - int error, readcnt; + int error, eofflag, readcnt; long loff; - error = getvnode(p->p_fd, uap->fd, &fp); - if (error) + if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); if ((fp->f_flag & FREAD) == 0) return (EBADF); vp = (struct vnode *)fp->f_data; +unionread: if (vp->v_type != VDIR) return (EINVAL); - aiov.iov_base = uap->buf; - aiov.iov_len = uap->count; + aiov.iov_base = SCARG(uap, buf); + aiov.iov_len = SCARG(uap, count); auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_USERSPACE; auio.uio_procp = p; - auio.uio_resid = uap->count; - VOP_LOCK(vp); + auio.uio_resid = SCARG(uap, count); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); loff = auio.uio_offset = fp->f_offset; # if (BYTE_ORDER != LITTLE_ENDIAN) if (vp->v_mount->mnt_maxsymlinklen <= 0) { - error = VOP_READDIR(vp, &auio, fp->f_cred, NULL, NULL, NULL); + error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, + NULL, NULL); fp->f_offset = auio.uio_offset; } else # endif @@ -2126,13 +2446,14 @@ ogetdirentries(p, uap, retval) kuio = auio; kuio.uio_iov = &kiov; kuio.uio_segflg = UIO_SYSSPACE; - kiov.iov_len = uap->count; - MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK); + kiov.iov_len = SCARG(uap, count); + MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK); kiov.iov_base = dirbuf; - error = VOP_READDIR(vp, &kuio, fp->f_cred, NULL, NULL, NULL); + error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, + NULL, NULL); fp->f_offset = kuio.uio_offset; if (error == 0) { - readcnt = uap->count - kuio.uio_resid; + readcnt = SCARG(uap, count) - kuio.uio_resid; edp = (struct dirent *)&dirbuf[readcnt]; for (dp = (struct dirent *)dirbuf; dp < edp; ) { # if (BYTE_ORDER == LITTLE_ENDIAN) @@ -2165,14 +2486,70 @@ ogetdirentries(p, uap, retval) } FREE(dirbuf, M_TEMP); } - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); if (error) return (error); - error = copyout((caddr_t)&loff, (caddr_t)uap->basep, sizeof(long)); - *retval = uap->count - auio.uio_resid; + +#ifdef UNION +{ + extern int (**union_vnodeop_p)(); + extern struct vnode *union_dircache __P((struct vnode*, struct proc*)); + + if ((SCARG(uap, count) == auio.uio_resid) && + (vp->v_op == union_vnodeop_p)) { + struct vnode *lvp; + + lvp = union_dircache(vp, p); + if (lvp != NULLVP) { + struct vattr va; + + /* + * If the directory is opaque, + * then don't show lower entries + */ + error = VOP_GETATTR(vp, &va, fp->f_cred, p); + if (va.va_flags & OPAQUE) { + vput(lvp); + lvp = NULL; + } + } + + if (lvp != NULLVP) { + error = VOP_OPEN(lvp, FREAD, fp->f_cred, p); + if (error) { + vput(lvp); + return (error); + } + VOP_UNLOCK(lvp, 0, p); + fp->f_data = (caddr_t) lvp; + fp->f_offset = 0; + error = vn_close(vp, FREAD, fp->f_cred, p); + if (error) + return (error); + vp = lvp; + goto unionread; + } + } +} +#endif /* UNION */ + + if ((SCARG(uap, count) == auio.uio_resid) && + (vp->v_flag & VROOT) && + (vp->v_mount->mnt_flag & MNT_UNION)) { + struct vnode *tvp = vp; + vp = vp->v_mount->mnt_vnodecovered; + VREF(vp); + fp->f_data = (caddr_t) vp; + fp->f_offset = 0; + vrele(tvp); + goto unionread; + } + error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep), + sizeof(long)); + *retval = SCARG(uap, count) - auio.uio_resid; return (error); } -#endif +#endif /* COMPAT_43 */ /* * Read a block of directory entries in a file system independent format. @@ -2188,18 +2565,22 @@ struct getdirentries_args { int getdirentries(p, uap, retval) struct proc *p; - register struct getdirentries_args *uap; - int *retval; + register struct getdirentries_args /* { + syscallarg(int) fd; + syscallarg(char *) buf; + syscallarg(u_int) count; + syscallarg(long *) basep; + } */ *uap; + register_t *retval; { register struct vnode *vp; struct file *fp; struct uio auio; struct iovec aiov; long loff; - int error; + int error, eofflag; - error = getvnode(p->p_fd, uap->fd, &fp); - if (error) + if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); if ((fp->f_flag & FREAD) == 0) return (EBADF); @@ -2207,51 +2588,66 @@ getdirentries(p, uap, retval) unionread: if (vp->v_type != VDIR) return (EINVAL); - aiov.iov_base = uap->buf; - aiov.iov_len = uap->count; + aiov.iov_base = SCARG(uap, buf); + aiov.iov_len = SCARG(uap, count); auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_USERSPACE; auio.uio_procp = p; - auio.uio_resid = uap->count; - VOP_LOCK(vp); + auio.uio_resid = SCARG(uap, count); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); loff = auio.uio_offset = fp->f_offset; - error = VOP_READDIR(vp, &auio, fp->f_cred, NULL, NULL, NULL); + error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); fp->f_offset = auio.uio_offset; - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); if (error) return (error); #ifdef UNION { - if ((uap->count == auio.uio_resid) && + extern int (**union_vnodeop_p)(); + extern struct vnode *union_dircache __P((struct vnode*, struct proc*)); + + if ((SCARG(uap, count) == auio.uio_resid) && (vp->v_op == union_vnodeop_p)) { - struct vnode *tvp = vp; + struct vnode *lvp; + + lvp = union_dircache(vp, p); + if (lvp != NULLVP) { + struct vattr va; - vp = union_lowervp(vp); - if (vp != NULLVP) { - VOP_LOCK(vp); - error = VOP_OPEN(vp, FREAD, fp->f_cred, p); - VOP_UNLOCK(vp); + /* + * If the directory is opaque, + * then don't show lower entries + */ + error = VOP_GETATTR(vp, &va, fp->f_cred, p); + if (va.va_flags & OPAQUE) { + vput(lvp); + lvp = NULL; + } + } + if (lvp != NULLVP) { + error = VOP_OPEN(lvp, FREAD, fp->f_cred, p); if (error) { - vrele(vp); + vput(lvp); return (error); } - fp->f_data = (caddr_t) vp; + VOP_UNLOCK(lvp, 0, p); + fp->f_data = (caddr_t) lvp; fp->f_offset = 0; - error = vn_close(tvp, FREAD, fp->f_cred, p); + error = vn_close(vp, FREAD, fp->f_cred, p); if (error) return (error); + vp = lvp; goto unionread; } } } -#endif +#endif /* UNION */ - if ((uap->count == auio.uio_resid) && - vp && + if ((SCARG(uap, count) == auio.uio_resid) && (vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) { struct vnode *tvp = vp; @@ -2262,8 +2658,9 @@ unionread: vrele(tvp); goto unionread; } - error = copyout((caddr_t)&loff, (caddr_t)uap->basep, sizeof(long)); - *retval = uap->count - auio.uio_resid; + error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep), + sizeof(long)); + *retval = SCARG(uap, count) - auio.uio_resid; return (error); } @@ -2275,17 +2672,19 @@ struct umask_args { int newmask; }; #endif -mode_t /* XXX */ +int umask(p, uap, retval) struct proc *p; - struct umask_args *uap; - int *retval; + struct umask_args /* { + syscallarg(int) newmask; + } */ *uap; + int *retval; /* XXX */ { register struct filedesc *fdp; fdp = p->p_fd; *retval = fdp->fd_cmask; - fdp->fd_cmask = uap->newmask & ALLPERMS; + fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS; return (0); } @@ -2302,31 +2701,27 @@ struct revoke_args { int revoke(p, uap, retval) struct proc *p; - register struct revoke_args *uap; - int *retval; + register struct revoke_args /* { + syscallarg(char *) path; + } */ *uap; + register_t *retval; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + if (error = namei(&nd)) return (error); vp = nd.ni_vp; - if (vp->v_type != VCHR && vp->v_type != VBLK) { - error = EINVAL; - goto out; - } - error = VOP_GETATTR(vp, &vattr, p->p_ucred, p); - if (error) + if (error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) goto out; if (p->p_ucred->cr_uid != vattr.va_uid && (error = suser(p->p_ucred, &p->p_acflag))) goto out; if (vp->v_usecount > 1 || (vp->v_flag & VALIASED)) - vgoneall(vp); + VOP_REVOKE(vp, REVOKEALL); out: vrele(vp); return (error); diff --git a/sys/kern/vfs_init.c b/sys/kern/vfs_init.c index f4e96e4..161ff0c 100644 --- a/sys/kern/vfs_init.c +++ b/sys/kern/vfs_init.c @@ -76,8 +76,6 @@ extern struct linker_set vfs_opv_descs_; #define vfs_opv_descs ((struct vnodeopv_desc **)vfs_opv_descs_.ls_items) extern struct linker_set vfs_set; -struct vfsops *vfssw[MOUNT_MAXTYPE + 1]; -struct vfsconf *vfsconf[MOUNT_MAXTYPE + 1]; extern struct vnodeop_desc *vfs_op_descs[]; /* and the operations they perform */ @@ -239,23 +237,8 @@ static void vfsinit(dummy) void *dummy; { - struct vfsops **vfsp; struct vfsconf **vfc; - int i; - - /* - * Initialize the VFS switch table - */ - for(i = 0; i < MOUNT_MAXTYPE + 1; i++) { - vfsconf[i] = &void_vfsconf; - } - - vfc = (struct vfsconf **)vfs_set.ls_items; - while(*vfc) { - vfssw[(**vfc).vfc_index] = (**vfc).vfc_vfsops; - vfsconf[(**vfc).vfc_index] = *vfc; - vfc++; - } + int maxtypenum; /* * Initialize the vnode table @@ -274,11 +257,20 @@ vfsinit(dummy) * Initialize each file system type. */ vattr_null(&va_null); - for (vfsp = &vfssw[0]; vfsp <= &vfssw[MOUNT_MAXTYPE]; vfsp++) { - if (*vfsp == NULL) - continue; - (*(*vfsp)->vfs_init)(); + maxtypenum = 0; + vfc = (struct vfsconf **)vfs_set.ls_items; + vfsconf = *vfc; /* simulate Lite2 vfsconf array */ + while (*vfc) { + struct vfsconf *vfsp = *vfc; + + vfc++; + vfsp->vfc_next = *vfc; + if (maxtypenum <= vfsp->vfc_typenum) + maxtypenum = vfsp->vfc_typenum + 1; + (*vfsp->vfc_vfsops->vfs_init)(vfsp); } + /* next vfc_typenum to be used */ + maxvfsconf = maxtypenum; } /* @@ -286,29 +278,81 @@ vfsinit(dummy) */ static int -sysctl_vfs_vfsconf SYSCTL_HANDLER_ARGS +sysctl_vfs_conf SYSCTL_HANDLER_ARGS { - int i, error; + int error; + struct vfsconf *vfsp; if (req->newptr) return EINVAL; - for(i = 0; i < MOUNT_MAXTYPE + 1; i++) { - error = SYSCTL_OUT(req, vfsconf[i], sizeof *vfsconf[i]); - if(error) + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { + error = SYSCTL_OUT(req, vfsp, sizeof *vfsp); + if (error) return error; } - return (error); - + return 0; } SYSCTL_PROC(_vfs, VFS_VFSCONF, vfsconf, CTLTYPE_OPAQUE|CTLFLAG_RD, - 0, 0, sysctl_vfs_vfsconf, "S,vfsconf", ""); + 0, 0, sysctl_vfs_conf, "S,vfsconf", ""); + +#ifdef COMPAT_PRELITE2 + +#define OVFS_MAXNAMELEN 32 +struct ovfsconf { + void *vfc_vfsops; + char vfc_name[OVFS_MAXNAMELEN]; + int vfc_index; + int vfc_refcount; + int vfc_flags; +}; + +static int +sysctl_ovfs_conf SYSCTL_HANDLER_ARGS +{ + int error; + struct vfsconf *vfsp; + + if (req->newptr) + return EINVAL; + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { + struct ovfsconf ovfs; + ovfs.vfc_vfsops = NULL; + strcpy(ovfs.vfc_name, vfsp->vfc_name); + ovfs.vfc_index = vfsp->vfc_typenum; + ovfs.vfc_refcount = vfsp->vfc_refcount; + ovfs.vfc_flags = vfsp->vfc_flags; + error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); + if (error) + return error; + } + return 0; +} + +SYSCTL_PROC(_vfs, VFS_OVFSCONF, ovfsconf, CTLTYPE_OPAQUE|CTLFLAG_RD, + 0, 0, sysctl_ovfs_conf, "S,ovfsconf", ""); + +#endif /* COMPAT_PRELITE2 */ /* * This goop is here to support a loadable NFS module... grumble... */ -void (*lease_check) __P((struct vnode *, struct proc *, struct ucred *, int)) +int (*lease_check_hook) __P((struct vop_lease_args *)) = 0; void (*lease_updatetime) __P((int)) = 0; +int +lease_check(ap) + struct vop_lease_args /* { + struct vnode *a_vp; + struct proc *a_p; + struct ucred *a_cred; + int a_flag; + } */ *ap; +{ + if (lease_check_hook) + return (*lease_check_hook)(ap); + else + return 0; +} diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c index 1cadfd0..120821d 100644 --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -88,6 +88,7 @@ namei(ndp) struct uio auio; int error, linklen; struct componentname *cnp = &ndp->ni_cnd; + struct proc *p = cnp->cn_proc; ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_proc->p_ucred; #ifdef DIAGNOSTIC @@ -169,7 +170,7 @@ namei(ndp) return (0); } if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1) - VOP_UNLOCK(ndp->ni_dvp); + VOP_UNLOCK(ndp->ni_dvp, 0, p); if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { error = ELOOP; break; @@ -269,6 +270,7 @@ lookup(ndp) int trailing_slash; int error = 0; struct componentname *cnp = &ndp->ni_cnd; + struct proc *p = cnp->cn_proc; /* * Setup: break out flag bits into variables. @@ -283,7 +285,7 @@ lookup(ndp) cnp->cn_flags &= ~ISSYMLINK; dp = ndp->ni_startdir; ndp->ni_startdir = NULLVP; - VOP_LOCK(dp); + vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, p); dirloop: /* @@ -351,21 +353,21 @@ dirloop: * e.g. like "/." or ".". */ if (cnp->cn_nameptr[0] == '\0') { - if (cnp->cn_nameiop != LOOKUP) { - error = EISDIR; - goto bad; - } if (dp->v_type != VDIR) { error = ENOTDIR; goto bad; } + if (cnp->cn_nameiop != LOOKUP) { + error = EISDIR; + goto bad; + } if (wantparent) { ndp->ni_dvp = dp; VREF(dp); } ndp->ni_vp = dp; if (!(cnp->cn_flags & (LOCKPARENT | LOCKLEAF))) - VOP_UNLOCK(dp); + VOP_UNLOCK(dp, 0, p); if (cnp->cn_flags & SAVESTART) panic("lookup: SAVESTART"); return (0); @@ -396,7 +398,7 @@ dirloop: dp = dp->v_mount->mnt_vnodecovered; vput(tdp); VREF(dp); - VOP_LOCK(dp); + vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, p); } } @@ -405,8 +407,8 @@ dirloop: */ unionlookup: ndp->ni_dvp = dp; - error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp); - if (error) { + ndp->ni_vp = NULL; + if (error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp)) { #ifdef DIAGNOSTIC if (ndp->ni_vp != NULL) panic("leaf should be empty"); @@ -421,7 +423,7 @@ unionlookup: dp = dp->v_mount->mnt_vnodecovered; vput(tdp); VREF(dp); - VOP_LOCK(dp); + vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, p); goto unionlookup; } @@ -474,12 +476,10 @@ unionlookup: */ while (dp->v_type == VDIR && (mp = dp->v_mountedhere) && (cnp->cn_flags & NOCROSSMOUNT) == 0) { - if (mp->mnt_flag & MNT_MLOCK) { - mp->mnt_flag |= MNT_MWAIT; - (void) tsleep((caddr_t)mp, PVFS, "lookup", 0); + if (vfs_busy(mp, 0, 0, p)) continue; - } - error = VFS_ROOT(dp->v_mountedhere, &tdp); + error = VFS_ROOT(mp, &tdp); + vfs_unbusy(mp, p); if (error) goto bad2; vput(dp); @@ -533,12 +533,12 @@ nextname: if (!wantparent) vrele(ndp->ni_dvp); if ((cnp->cn_flags & LOCKLEAF) == 0) - VOP_UNLOCK(dp); + VOP_UNLOCK(dp, 0, p); return (0); bad2: if ((cnp->cn_flags & LOCKPARENT) && *ndp->ni_next == '\0') - VOP_UNLOCK(ndp->ni_dvp); + VOP_UNLOCK(ndp->ni_dvp, 0, p); vrele(ndp->ni_dvp); bad: vput(dp); @@ -555,7 +555,8 @@ relookup(dvp, vpp, cnp) struct vnode *dvp, **vpp; struct componentname *cnp; { - register struct vnode *dp = 0; /* the directory we are searching */ + struct proc *p = cnp->cn_proc; + struct vnode *dp = 0; /* the directory we are searching */ int docache; /* == 0 do not cache last component */ int wantparent; /* 1 => wantparent or lockparent flag */ int rdonly; /* lookup read-only flag bit */ @@ -576,7 +577,7 @@ relookup(dvp, vpp, cnp) rdonly = cnp->cn_flags & RDONLY; cnp->cn_flags &= ~ISSYMLINK; dp = dvp; - VOP_LOCK(dp); + vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, p); /* dirloop: */ /* @@ -615,7 +616,7 @@ relookup(dvp, vpp, cnp) goto bad; } if (!(cnp->cn_flags & LOCKLEAF)) - VOP_UNLOCK(dp); + VOP_UNLOCK(dp, 0, p); *vpp = dp; if (cnp->cn_flags & SAVESTART) panic("lookup: SAVESTART"); @@ -628,8 +629,7 @@ relookup(dvp, vpp, cnp) /* * We now have a segment name to search for, and a directory to search. */ - error = VOP_LOOKUP(dp, vpp, cnp); - if (error) { + if (error = VOP_LOOKUP(dp, vpp, cnp)) { #ifdef DIAGNOSTIC if (*vpp != NULL) panic("leaf should be empty"); @@ -675,16 +675,16 @@ relookup(dvp, vpp, cnp) /* ASSERT(dvp == ndp->ni_startdir) */ if (cnp->cn_flags & SAVESTART) VREF(dvp); - + if (!wantparent) vrele(dvp); if ((cnp->cn_flags & LOCKLEAF) == 0) - VOP_UNLOCK(dp); + VOP_UNLOCK(dp, 0, p); return (0); bad2: if ((cnp->cn_flags & LOCKPARENT) && (cnp->cn_flags & ISLASTCN)) - VOP_UNLOCK(dvp); + VOP_UNLOCK(dvp, 0, p); vrele(dvp); bad: vput(dp); diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c index fb8061b..5a84570 100644 --- a/sys/kern/vfs_mount.c +++ b/sys/kern/vfs_mount.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 1989, 1993 + * Copyright (c) 1989, 1993, 1995 * The Regents of the University of California. All rights reserved. * Copyright (c) 1995 Artisoft, Inc. All Rights Reserved. * @@ -61,26 +61,33 @@ /* * GLOBALS */ -int (*mountroot) __P((void *)); + +/* + * These define the root filesystem, device, and root filesystem type. + */ +struct mount *rootfs; struct vnode *rootvnode; -struct vfsops *mountrootvfsops; +char *mountrootfsname; +/* + * vfs_init() will set maxvfsconf + * to the highest defined type number. + */ +int maxvfsconf; +struct vfsconf *vfsconf; /* * Common root mount code shared by all filesystems */ -#define ROOTDIR "/" #define ROOTNAME "root_device" - - /* - * vfs_mountroot + * vfs_mountrootfs * * Common entry point for root mounts * * PARAMETERS: - * data pointer to the vfs_ops for the FS type mounting + * fsname name of the filesystem * * RETURNS: 0 Success * !0 error number (errno.h) @@ -97,67 +104,44 @@ struct vfsops *mountrootvfsops; * fixing the other file systems, not this code! */ int -vfs_mountroot(data) - void *data; +vfs_mountrootfs(fsname) + char *fsname; { struct mount *mp; - u_int size; int err = 0; struct proc *p = curproc; /* XXX */ - struct vfsops *mnt_op = (struct vfsops *)data; /* * New root mount structure */ - mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); - bzero((char *)mp, (u_long)sizeof(struct mount)); - mp->mnt_op = mnt_op; - mp->mnt_flag = MNT_ROOTFS; - mp->mnt_vnodecovered = NULLVP; - - /* - * Lock mount point - */ - if( ( err = vfs_lock(mp)) != 0) - goto error_1; - - /* Save "last mounted on" info for mount point (NULL pad)*/ - copystr( ROOTDIR, /* mount point*/ - mp->mnt_stat.f_mntonname, /* save area*/ - MNAMELEN - 1, /* max size*/ - &size); /* real size*/ - bzero( mp->mnt_stat.f_mntonname + size, MNAMELEN - size); - - /* Save "mounted from" info for mount point (NULL pad)*/ - copystr( ROOTNAME, /* device name*/ - mp->mnt_stat.f_mntfromname, /* save area*/ - MNAMELEN - 1, /* max size*/ - &size); /* real size*/ - bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); + err = vfs_rootmountalloc(fsname, ROOTNAME, &mp); + if (err) + return (err); + mp->mnt_flag |= MNT_ROOTFS; /* * Attempt the mount */ - err = VFS_MOUNT( mp, NULL, NULL, NULL, p); - if( err) + err = VFS_MOUNT(mp, NULL, NULL, NULL, p); + if (err) goto error_2; + simple_lock(&mountlist_slock); /* Add fs to list of mounted file systems*/ - CIRCLEQ_INSERT_TAIL( &mountlist, mp, mnt_list); + CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); + simple_unlock(&mountlist_slock); - /* Unlock mount point*/ - vfs_unlock(mp); + vfs_unbusy(mp, p); /* root mount, update system time from FS specific data*/ - inittodr( mp->mnt_time); + inittodr(mp->mnt_time); goto success; error_2: /* mount error*/ - /* unlock before failing*/ - vfs_unlock( mp); + vfs_unbusy(mp, p); error_1: /* lock error*/ diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 43f8669..0dea7bd 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -35,7 +35,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 + * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 * $FreeBSD$ */ @@ -75,7 +75,9 @@ #ifdef DDB extern void printlockedvnodes __P((void)); #endif -extern void vclean __P((struct vnode *vp, int flags)); +static void vclean __P((struct vnode *vp, int flags, struct proc *p)); +extern void vgonel __P((struct vnode *vp, struct proc *p)); +unsigned long numvnodes; extern void vfs_unmountroot __P((struct mount *rootfs)); enum vtype iftovt_tab[16] = { @@ -91,15 +93,19 @@ int vttoif_tab[9] = { * Insq/Remq for the vnode usage lists. */ #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) -#define bufremvn(bp) { \ - LIST_REMOVE(bp, b_vnbufs); \ - (bp)->b_vnbufs.le_next = NOLIST; \ +#define bufremvn(bp) { \ + LIST_REMOVE(bp, b_vnbufs); \ + (bp)->b_vnbufs.le_next = NOLIST; \ } - TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ static u_long freevnodes = 0; struct mntlist mountlist; /* mounted filesystem list */ +struct simplelock mountlist_slock; +static struct simplelock mntid_slock; +struct simplelock mntvnode_slock; +struct simplelock vnode_free_list_slock; +static struct simplelock spechash_slock; int desiredvnodes; SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, ""); @@ -117,164 +123,153 @@ vntblinit() { desiredvnodes = maxproc + vm_object_cache_max; + simple_lock_init(&mntvnode_slock); + simple_lock_init(&mntid_slock); + simple_lock_init(&spechash_slock); TAILQ_INIT(&vnode_free_list); + simple_lock_init(&vnode_free_list_slock); CIRCLEQ_INIT(&mountlist); } /* - * Lock a filesystem. - * Used to prevent access to it while mounting and unmounting. + * Mark a mount point as busy. Used to synchronize access and to delay + * unmounting. Interlock is not released on failure. */ int -vfs_lock(mp) - register struct mount *mp; +vfs_busy(mp, flags, interlkp, p) + struct mount *mp; + int flags; + struct simplelock *interlkp; + struct proc *p; { + int lkflags; - while (mp->mnt_flag & MNT_MLOCK) { + if (mp->mnt_flag & MNT_UNMOUNT) { + if (flags & LK_NOWAIT) + return (ENOENT); mp->mnt_flag |= MNT_MWAIT; - (void) tsleep((caddr_t) mp, PVFS, "vfslck", 0); + if (interlkp) { + simple_unlock(interlkp); + } + /* + * Since all busy locks are shared except the exclusive + * lock granted when unmounting, the only place that a + * wakeup needs to be done is at the release of the + * exclusive lock at the end of dounmount. + */ + tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); + if (interlkp) { + simple_lock(interlkp); + } + return (ENOENT); } - mp->mnt_flag |= MNT_MLOCK; + lkflags = LK_SHARED; + if (interlkp) + lkflags |= LK_INTERLOCK; + if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) + panic("vfs_busy: unexpected lock failure"); return (0); } /* - * Unlock a locked filesystem. - * Panic if filesystem is not locked. + * Free a busy filesystem. */ void -vfs_unlock(mp) - register struct mount *mp; +vfs_unbusy(mp, p) + struct mount *mp; + struct proc *p; { - if ((mp->mnt_flag & MNT_MLOCK) == 0) - panic("vfs_unlock: not locked"); - mp->mnt_flag &= ~MNT_MLOCK; - if (mp->mnt_flag & MNT_MWAIT) { - mp->mnt_flag &= ~MNT_MWAIT; - wakeup((caddr_t) mp); - } + lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); } /* - * Mark a mount point as busy. - * Used to synchronize access and to delay unmounting. + * Lookup a filesystem type, and if found allocate and initialize + * a mount structure for it. + * + * Devname is usually updated by mount(8) after booting. */ int -vfs_busy(mp) - register struct mount *mp; +vfs_rootmountalloc(fstypename, devname, mpp) + char *fstypename; + char *devname; + struct mount **mpp; { + struct proc *p = curproc; /* XXX */ + struct vfsconf *vfsp; + struct mount *mp; - while (mp->mnt_flag & MNT_MPBUSY) { - mp->mnt_flag |= MNT_MPWANT; - (void) tsleep((caddr_t) &mp->mnt_flag, PVFS, "vfsbsy", 0); - } - if (mp->mnt_flag & MNT_UNMOUNT) - return (1); - mp->mnt_flag |= MNT_MPBUSY; + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) + if (!strcmp(vfsp->vfc_name, fstypename)) + break; + if (vfsp == NULL) + return (ENODEV); + mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); + bzero((char *)mp, (u_long)sizeof(struct mount)); + lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); + (void)vfs_busy(mp, LK_NOWAIT, 0, p); + LIST_INIT(&mp->mnt_vnodelist); + mp->mnt_vfc = vfsp; + mp->mnt_op = vfsp->vfc_vfsops; + mp->mnt_flag = MNT_RDONLY; + mp->mnt_vnodecovered = NULLVP; + vfsp->vfc_refcount++; + mp->mnt_stat.f_type = vfsp->vfc_typenum; + mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; + strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); + mp->mnt_stat.f_mntonname[0] = '/'; + mp->mnt_stat.f_mntonname[1] = 0; + (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); + *mpp = mp; return (0); } /* - * Free a busy filesystem. - * Panic if filesystem is not busy. - */ -void -vfs_unbusy(mp) - register struct mount *mp; -{ - - if ((mp->mnt_flag & MNT_MPBUSY) == 0) - panic("vfs_unbusy: not busy"); - mp->mnt_flag &= ~MNT_MPBUSY; - if (mp->mnt_flag & MNT_MPWANT) { - mp->mnt_flag &= ~MNT_MPWANT; - wakeup((caddr_t) &mp->mnt_flag); - } -} - -void -vfs_unmountroot(struct mount *rootfs) -{ - struct mount *mp = rootfs; - int error; - - if (vfs_busy(mp)) { - printf("failed to unmount root\n"); - return; - } - mp->mnt_flag |= MNT_UNMOUNT; - if ((error = vfs_lock(mp))) { - printf("lock of root filesystem failed (%d)\n", error); - return; - } - vnode_pager_umount(mp); /* release cached vnodes */ - cache_purgevfs(mp); /* remove cache entries for this file sys */ - - if ((error = VFS_SYNC(mp, MNT_WAIT, initproc->p_ucred, initproc))) - printf("sync of root filesystem failed (%d)\n", error); - - if ((error = VFS_UNMOUNT(mp, MNT_FORCE, initproc))) { - printf("unmount of root filesystem failed ("); - if (error == EBUSY) - printf("BUSY)\n"); - else - printf("%d)\n", error); - } - mp->mnt_flag &= ~MNT_UNMOUNT; - vfs_unbusy(mp); -} - -/* - * Unmount all filesystems. Should only be called by halt(). + * Find an appropriate filesystem to use for the root. If a filesystem + * has not been preselected, walk through the list of known filesystems + * trying those that have mountroot routines, and try them until one + * works or we have tried them all. */ -void -vfs_unmountall() +#ifdef notdef /* XXX JH */ +int +lite2_vfs_mountroot(void) { - struct mount *mp, *nmp, *rootfs = NULL; + struct vfsconf *vfsp; + extern int (*lite2_mountroot)(void); int error; - /* unmount all but rootfs */ - for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { - nmp = mp->mnt_list.cqe_prev; - - if (mp->mnt_flag & MNT_ROOTFS) { - rootfs = mp; + if (lite2_mountroot != NULL) + return ((*lite2_mountroot)()); + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { + if (vfsp->vfc_mountroot == NULL) continue; - } - error = dounmount(mp, MNT_FORCE, initproc); - if (error) { - printf("unmount of %s failed (", mp->mnt_stat.f_mntonname); - if (error == EBUSY) - printf("BUSY)\n"); - else - printf("%d)\n", error); - } - } - - /* and finally... */ - if (rootfs) { - vfs_unmountroot(rootfs); - } else { - printf("no root filesystem\n"); + if ((error = (*vfsp->vfc_mountroot)()) == 0) + return (0); + printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); } + return (ENODEV); } +#endif /* * Lookup a mount point by filesystem identifier. */ struct mount * -getvfs(fsid) +vfs_getvfs(fsid) fsid_t *fsid; { register struct mount *mp; + simple_lock(&mountlist_slock); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = mp->mnt_list.cqe_next) { if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && - mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) + mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { + simple_unlock(&mountlist_slock); return (mp); + } } + simple_unlock(&mountlist_slock); return ((struct mount *) 0); } @@ -282,14 +277,16 @@ getvfs(fsid) * Get a new unique fsid */ void -getnewfsid(mp, mtype) +vfs_getnewfsid(mp) struct mount *mp; - int mtype; { static u_short xxxfs_mntid; fsid_t tfsid; + int mtype; + simple_lock(&mntid_slock); + mtype = mp->mnt_vfc->vfc_typenum; mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); mp->mnt_stat.f_fsid.val[1] = mtype; if (xxxfs_mntid == 0) @@ -297,12 +294,13 @@ getnewfsid(mp, mtype) tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); tfsid.val[1] = mtype; if (mountlist.cqh_first != (void *)&mountlist) { - while (getvfs(&tfsid)) { + while (vfs_getvfs(&tfsid)) { tfsid.val[0]++; xxxfs_mntid++; } } mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; + simple_unlock(&mntid_slock); } /* @@ -326,6 +324,35 @@ vattr_null(vap) vap->va_vaflags = 0; } +void +vfs_unmountroot(struct mount *rootfs) +{ + struct proc *p = curproc; /* XXX */ + struct mount *mp = rootfs; + int error; + + if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { + printf("failed to unmount root\n"); + return; + } + mp->mnt_flag |= MNT_UNMOUNT; + vnode_pager_umount(mp); /* release cached vnodes */ + cache_purgevfs(mp); /* remove cache entries for this file sys */ + + if ((error = VFS_SYNC(mp, MNT_WAIT, initproc->p_ucred, initproc))) + printf("sync of root filesystem failed (%d)\n", error); + + if ((error = VFS_UNMOUNT(mp, MNT_FORCE, initproc))) { + printf("unmount of root filesystem failed ("); + if (error == EBUSY) + printf("BUSY)\n"); + else + printf("%d)\n", error); + } + mp->mnt_flag &= ~MNT_UNMOUNT; + vfs_unbusy(mp, p); +} + /* * Routines having to do with the management of the vnode table. */ @@ -341,10 +368,11 @@ getnewvnode(tag, mp, vops, vpp) vop_t **vops; struct vnode **vpp; { - register struct vnode *vp; + struct proc *p = curproc; /* XXX */ + struct vnode *vp; + simple_lock(&vnode_free_list_slock); retry: - vp = vnode_free_list.tqh_first; /* * we allocate a new vnode if * 1. we don't have any free @@ -357,12 +385,31 @@ retry: */ if (freevnodes < (numvnodes >> 2) || numvnodes < desiredvnodes || - vp == NULL) { + vnode_free_list.tqh_first == NULL) { + simple_unlock(&vnode_free_list_slock); vp = (struct vnode *) malloc((u_long) sizeof *vp, M_VNODE, M_WAITOK); bzero((char *) vp, sizeof *vp); numvnodes++; } else { + for (vp = vnode_free_list.tqh_first; + vp != NULLVP; vp = vp->v_freelist.tqe_next) { + if (simple_lock_try(&vp->v_interlock)) + break; + } + /* + * Unless this is a bad time of the month, at most + * the first NCPUS items on the free list are + * locked, so this is close enough to being empty. + */ + if (vp == NULLVP) { + simple_unlock(&vnode_free_list_slock); + tablefull("vnode"); + *vpp = 0; + return (ENFILE); + } + if (vp->v_usecount) + panic("free vnode isn't"); TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); if (vp->v_usage > 0) { --vp->v_usage; @@ -370,14 +417,16 @@ retry: goto retry; } freevnodes--; - if (vp->v_usecount) - panic("free vnode isn't"); /* see comment on why 0xdeadb is set at end of vgone (below) */ vp->v_freelist.tqe_prev = (struct vnode **) 0xdeadb; + simple_unlock(&vnode_free_list_slock); vp->v_lease = NULL; if (vp->v_type != VBAD) - vgone(vp); + vgonel(vp, p); + else { + simple_unlock(&vp->v_interlock); + } #ifdef DIAGNOSTIC { @@ -421,6 +470,7 @@ insmntque(vp, mp) register struct mount *mp; { + simple_lock(&mntvnode_slock); /* * Delete from old mount point vnode list, if on one. */ @@ -429,9 +479,12 @@ insmntque(vp, mp) /* * Insert into list of vnodes for the new mount point, if available. */ - if ((vp->v_mount = mp) == NULL) + if ((vp->v_mount = mp) == NULL) { + simple_unlock(&mntvnode_slock); return; + } LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); + simple_unlock(&mntvnode_slock); } /* @@ -723,7 +776,8 @@ checkalias(nvp, nvp_rdev, mp) dev_t nvp_rdev; struct mount *mp; { - register struct vnode *vp; + struct proc *p = curproc; /* XXX */ + struct vnode *vp; struct vnode **vpp; if (nvp->v_type != VBLK && nvp->v_type != VCHR) @@ -731,18 +785,24 @@ checkalias(nvp, nvp_rdev, mp) vpp = &speclisth[SPECHASH(nvp_rdev)]; loop: + simple_lock(&spechash_slock); for (vp = *vpp; vp; vp = vp->v_specnext) { if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) continue; /* * Alias, but not in use, so flush it out. */ + simple_lock(&vp->v_interlock); if (vp->v_usecount == 0) { - vgone(vp); + simple_unlock(&spechash_slock); + vgonel(vp, p); goto loop; } - if (vget(vp, 1)) + simple_unlock(&spechash_slock); + if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { goto loop; + } + simple_lock(&spechash_slock); break; } @@ -753,16 +813,19 @@ loop: nvp->v_hashchain = vpp; nvp->v_specnext = *vpp; nvp->v_specflags = 0; + simple_unlock(&spechash_slock); *vpp = nvp; - if (vp != NULL) { + if (vp != NULLVP) { nvp->v_flag |= VALIASED; vp->v_flag |= VALIASED; vput(vp); } return (NULLVP); } - VOP_UNLOCK(vp); - vclean(vp, 0); + simple_unlock(&spechash_slock); + VOP_UNLOCK(vp, 0, p); + simple_lock(&vp->v_interlock); + vclean(vp, 0, p); vp->v_op = nvp->v_op; vp->v_tag = nvp->v_tag; nvp->v_type = VNON; @@ -779,47 +842,162 @@ loop: * been changed to a new file system type). */ int -vget(vp, lockflag) +vget(vp, flags, p) register struct vnode *vp; - int lockflag; + int flags; + struct proc *p; { + int error; /* - * If the vnode is in the process of being cleaned out for another - * use, we wait for the cleaning to finish and then return failure. - * Cleaning is determined either by checking that the VXLOCK flag is - * set, or that the use count is zero with the back pointer set to - * show that it has been removed from the free list by getnewvnode. - * The VXLOCK flag may not have been set yet because vclean is blocked - * in the VOP_LOCK call waiting for the VOP_INACTIVE to complete. + * If the vnode is in the process of being cleaned out for + * another use, we wait for the cleaning to finish and then + * return failure. Cleaning is determined by checking that + * the VXLOCK flag is set. */ - if ((vp->v_flag & VXLOCK) || - (vp->v_usecount == 0 && - vp->v_freelist.tqe_prev == (struct vnode **) 0xdeadb)) { + if ((flags & LK_INTERLOCK) == 0) { + simple_lock(&vp->v_interlock); + } + if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; - (void) tsleep((caddr_t) vp, PINOD, "vget", 0); - return (1); + simple_unlock(&vp->v_interlock); + tsleep((caddr_t)vp, PINOD, "vget", 0); + return (ENOENT); } if (vp->v_usecount == 0) { + simple_lock(&vnode_free_list_slock); TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); + simple_unlock(&vnode_free_list_slock); freevnodes--; } vp->v_usecount++; - /* * Create the VM object, if needed */ if ((vp->v_type == VREG) && ((vp->v_object == NULL) || (vp->v_object->flags & OBJ_VFS_REF) == 0)) { + /* + * XXX vfs_object_create probably needs the interlock. + */ + simple_unlock(&vp->v_interlock); vfs_object_create(vp, curproc, curproc->p_ucred, 0); + simple_lock(&vp->v_interlock); + } + if (flags & LK_TYPE_MASK) { + if (error = vn_lock(vp, flags | LK_INTERLOCK, p)) + vrele(vp); + return (error); } - if (lockflag) - VOP_LOCK(vp); + simple_unlock(&vp->v_interlock); + return (0); +} + +/* + * Stubs to use when there is no locking to be done on the underlying object. + * A minimal shared lock is necessary to ensure that the underlying object + * is not revoked while an operation is in progress. So, an active shared + * count is maintained in an auxillary vnode lock structure. + */ +int +vop_nolock(ap) + struct vop_lock_args /* { + struct vnode *a_vp; + int a_flags; + struct proc *a_p; + } */ *ap; +{ +#ifdef notyet + /* + * This code cannot be used until all the non-locking filesystems + * (notably NFS) are converted to properly lock and release nodes. + * Also, certain vnode operations change the locking state within + * the operation (create, mknod, remove, link, rename, mkdir, rmdir, + * and symlink). Ideally these operations should not change the + * lock state, but should be changed to let the caller of the + * function unlock them. Otherwise all intermediate vnode layers + * (such as union, umapfs, etc) must catch these functions to do + * the necessary locking at their layer. Note that the inactive + * and lookup operations also change their lock state, but this + * cannot be avoided, so these two operations will always need + * to be handled in intermediate layers. + */ + struct vnode *vp = ap->a_vp; + int vnflags, flags = ap->a_flags; + if (vp->v_vnlock == NULL) { + if ((flags & LK_TYPE_MASK) == LK_DRAIN) + return (0); + MALLOC(vp->v_vnlock, struct lock *, sizeof(struct lock), + M_VNODE, M_WAITOK); + lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); + } + switch (flags & LK_TYPE_MASK) { + case LK_DRAIN: + vnflags = LK_DRAIN; + break; + case LK_EXCLUSIVE: + case LK_SHARED: + vnflags = LK_SHARED; + break; + case LK_UPGRADE: + case LK_EXCLUPGRADE: + case LK_DOWNGRADE: + return (0); + case LK_RELEASE: + default: + panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK); + } + if (flags & LK_INTERLOCK) + vnflags |= LK_INTERLOCK; + return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p)); +#else /* for now */ + /* + * Since we are not using the lock manager, we must clear + * the interlock here. + */ + if (ap->a_flags & LK_INTERLOCK) { + simple_unlock(&ap->a_vp->v_interlock); + } return (0); +#endif +} + +/* + * Decrement the active use count. + */ +int +vop_nounlock(ap) + struct vop_unlock_args /* { + struct vnode *a_vp; + int a_flags; + struct proc *a_p; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + + if (vp->v_vnlock == NULL) + return (0); + return (lockmgr(vp->v_vnlock, LK_RELEASE, NULL, ap->a_p)); +} + +/* + * Return whether or not the node is in use. + */ +int +vop_noislocked(ap) + struct vop_islocked_args /* { + struct vnode *a_vp; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + + if (vp->v_vnlock == NULL) + return (0); + return (lockstatus(vp->v_vnlock)); } +/* #ifdef DIAGNOSTIC */ /* * Vnode reference, just increment the count */ @@ -827,6 +1005,7 @@ void vref(vp) struct vnode *vp; { + simple_lock(&vp->v_interlock); if (vp->v_usecount <= 0) panic("vref used where vget required"); @@ -840,8 +1019,11 @@ vref(vp) * the object is created. This is necessary to * keep the system from re-entrantly doing it * multiple times. + * XXX vfs_object_create probably needs the interlock? */ + simple_unlock(&vp->v_interlock); vfs_object_create(vp, curproc, curproc->p_ucred, 0); + simple_lock(&vp->v_interlock); } } @@ -850,9 +1032,9 @@ vref(vp) */ void vput(vp) - register struct vnode *vp; + struct vnode *vp; { - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, curproc); vrele(vp); } @@ -862,33 +1044,38 @@ vput(vp) */ void vrele(vp) - register struct vnode *vp; + struct vnode *vp; { + struct proc *p = curproc; /* XXX */ #ifdef DIAGNOSTIC if (vp == NULL) panic("vrele: null vp"); #endif - + simple_lock(&vp->v_interlock); vp->v_usecount--; if ((vp->v_usecount == 1) && vp->v_object && (vp->v_object->flags & OBJ_VFS_REF)) { vp->v_object->flags &= ~OBJ_VFS_REF; + simple_unlock(&vp->v_interlock); vm_object_deallocate(vp->v_object); return; } - if (vp->v_usecount > 0) + if (vp->v_usecount > 0) { + simple_unlock(&vp->v_interlock); return; + } if (vp->v_usecount < 0) { #ifdef DIAGNOSTIC vprint("vrele: negative ref count", vp); #endif - panic("vrele: negative reference cnt"); + panic("vrele: negative ref cnt"); } + simple_lock(&vnode_free_list_slock); if (vp->v_flag & VAGE) { if(vp->v_tag != VT_TFS) TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); @@ -898,9 +1085,12 @@ vrele(vp) if(vp->v_tag != VT_TFS) TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); } + simple_unlock(&vnode_free_list_slock); + freevnodes++; - VOP_INACTIVE(vp); + if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) + VOP_INACTIVE(vp, p); } #ifdef DIAGNOSTIC @@ -912,7 +1102,9 @@ vhold(vp) register struct vnode *vp; { + simple_lock(&vp->v_interlock); vp->v_holdcnt++; + simple_unlock(&vp->v_interlock); } /* @@ -923,9 +1115,11 @@ holdrele(vp) register struct vnode *vp; { + simple_lock(&vp->v_interlock); if (vp->v_holdcnt <= 0) panic("holdrele: holdcnt"); vp->v_holdcnt--; + simple_unlock(&vp->v_interlock); } #endif /* DIAGNOSTIC */ @@ -948,11 +1142,11 @@ vflush(mp, skipvp, flags) struct vnode *skipvp; int flags; { - register struct vnode *vp, *nvp; + struct proc *p = curproc; /* XXX */ + struct vnode *vp, *nvp; int busy = 0; - if ((mp->mnt_flag & MNT_MPBUSY) == 0) - panic("vflush: not busy"); + simple_lock(&mntvnode_slock); loop: for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { /* @@ -967,24 +1161,34 @@ loop: */ if (vp == skipvp) continue; + + simple_lock(&vp->v_interlock); /* * Skip over a vnodes marked VSYSTEM. */ - if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) + if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { + simple_unlock(&vp->v_interlock); continue; + } /* * If WRITECLOSE is set, only flush out regular file vnodes * open for writing. */ if ((flags & WRITECLOSE) && - (vp->v_writecount == 0 || vp->v_type != VREG)) + (vp->v_writecount == 0 || vp->v_type != VREG)) { + simple_unlock(&vp->v_interlock); continue; + } if (vp->v_object && (vp->v_object->flags & OBJ_VFS_REF)) { + simple_unlock(&vp->v_interlock); + simple_unlock(&mntvnode_slock); vm_object_reference(vp->v_object); pager_cache(vp->v_object, FALSE); vp->v_object->flags &= ~OBJ_VFS_REF; vm_object_deallocate(vp->v_object); + simple_lock(&mntvnode_slock); + simple_lock(&vp->v_interlock); } /* @@ -992,7 +1196,9 @@ loop: * vnode data structures and we are done. */ if (vp->v_usecount == 0) { - vgone(vp); + simple_unlock(&mntvnode_slock); + vgonel(vp, p); + simple_lock(&mntvnode_slock); continue; } @@ -1002,21 +1208,25 @@ loop: * all other files, just kill them. */ if (flags & FORCECLOSE) { + simple_unlock(&mntvnode_slock); if (vp->v_type != VBLK && vp->v_type != VCHR) { - vgone(vp); + vgonel(vp, p); } else { - vclean(vp, 0); + vclean(vp, 0, p); vp->v_op = spec_vnodeop_p; insmntque(vp, (struct mount *) 0); } + simple_lock(&mntvnode_slock); continue; } #ifdef DIAGNOSTIC if (busyprt) vprint("vflush: busy vnode", vp); #endif + simple_unlock(&vp->v_interlock); busy++; } + simple_unlock(&mntvnode_slock); if (busy) return (EBUSY); return (0); @@ -1025,8 +1235,8 @@ loop: /* * Disassociate the underlying file system from a vnode. */ -void -vclean(struct vnode *vp, int flags) +static void +vclean(struct vnode *vp, int flags, struct proc *p) { int active; @@ -1036,15 +1246,7 @@ vclean(struct vnode *vp, int flags) * generate a race against ourselves to recycle it. */ if ((active = vp->v_usecount)) - VREF(vp); - /* - * Even if the count is zero, the VOP_INACTIVE routine may still have - * the object locked while it cleans it out. The VOP_LOCK ensures that - * the VOP_INACTIVE routine is done with its work. For active vnodes, - * it ensures that no other activity can occur while the underlying - * object is being cleaned out. - */ - VOP_LOCK(vp); + vp->v_usecount++; /* * Prevent the vnode from being recycled or brought into use while we * clean it out. @@ -1053,31 +1255,48 @@ vclean(struct vnode *vp, int flags) panic("vclean: deadlock"); vp->v_flag |= VXLOCK; /* - * Clean out any buffers associated with the vnode. + * Even if the count is zero, the VOP_INACTIVE routine may still + * have the object locked while it cleans it out. The VOP_LOCK + * ensures that the VOP_INACTIVE routine is done with its work. + * For active vnodes, it ensures that no other activity can + * occur while the underlying object is being cleaned out. */ - if (flags & DOCLOSE) - vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); + VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); /* - * Any other processes trying to obtain this lock must first wait for - * VXLOCK to clear, then call the new lock operation. + * Clean out any buffers associated with the vnode. */ - VOP_UNLOCK(vp); + if (flags & DOCLOSE) + vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); /* - * If purging an active vnode, it must be closed and deactivated - * before being reclaimed. + * If purging an active vnode, it must be closed and + * deactivated before being reclaimed. Note that the + * VOP_INACTIVE will unlock the vnode. */ if (active) { if (flags & DOCLOSE) - VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL); - VOP_INACTIVE(vp); + VOP_CLOSE(vp, IO_NDELAY, NOCRED, p); + VOP_INACTIVE(vp, p); + } else { + /* + * Any other processes trying to obtain this lock must first + * wait for VXLOCK to clear, then call the new lock operation. + */ + VOP_UNLOCK(vp, 0, p); } /* * Reclaim the vnode. */ - if (VOP_RECLAIM(vp)) + if (VOP_RECLAIM(vp, p)) panic("vclean: cannot reclaim"); if (active) vrele(vp); + cache_purge(vp); + if (vp->v_vnlock) { + if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) + vprint("vclean: lock not drained", vp); + FREE(vp->v_vnlock, M_VNODE); + vp->v_vnlock = NULL; + } /* * Done with purge, notify sleepers of the grim news. @@ -1092,46 +1311,91 @@ vclean(struct vnode *vp, int flags) } /* - * Eliminate all activity associated with the requested vnode + * Eliminate all activity associated with the requested vnode * and with all vnodes aliased to the requested vnode. */ -void -vgoneall(vp) - register struct vnode *vp; +int +vop_revoke(ap) + struct vop_revoke_args /* { + struct vnode *a_vp; + int a_flags; + } */ *ap; { - register struct vnode *vq; + struct vnode *vp, *vq; + struct proc *p = curproc; /* XXX */ + +#ifdef DIAGNOSTIC + if ((ap->a_flags & REVOKEALL) == 0) + panic("vop_revoke"); +#endif + + vp = ap->a_vp; + simple_lock(&vp->v_interlock); if (vp->v_flag & VALIASED) { /* - * If a vgone (or vclean) is already in progress, wait until - * it is done and return. + * If a vgone (or vclean) is already in progress, + * wait until it is done and return. */ if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; - (void) tsleep((caddr_t) vp, PINOD, "vgall", 0); - return; + simple_unlock(&vp->v_interlock); + tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); + return (0); } /* - * Ensure that vp will not be vgone'd while we are eliminating - * its aliases. + * Ensure that vp will not be vgone'd while we + * are eliminating its aliases. */ vp->v_flag |= VXLOCK; + simple_unlock(&vp->v_interlock); while (vp->v_flag & VALIASED) { + simple_lock(&spechash_slock); for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type || vp == vq) continue; + simple_unlock(&spechash_slock); vgone(vq); break; } + if (vq == NULLVP) { + simple_unlock(&spechash_slock); + } } /* - * Remove the lock so that vgone below will really eliminate - * the vnode after which time vgone will awaken any sleepers. + * Remove the lock so that vgone below will + * really eliminate the vnode after which time + * vgone will awaken any sleepers. */ + simple_lock(&vp->v_interlock); vp->v_flag &= ~VXLOCK; } - vgone(vp); + vgonel(vp, p); + return (0); +} + +/* + * Recycle an unused vnode to the front of the free list. + * Release the passed interlock if the vnode will be recycled. + */ +int +vrecycle(vp, inter_lkp, p) + struct vnode *vp; + struct simplelock *inter_lkp; + struct proc *p; +{ + + simple_lock(&vp->v_interlock); + if (vp->v_usecount == 0) { + if (inter_lkp) { + simple_unlock(inter_lkp); + } + vgonel(vp, p); + return (1); + } + simple_unlock(&vp->v_interlock); + return (0); } /* @@ -1142,16 +1406,31 @@ void vgone(vp) register struct vnode *vp; { - register struct vnode *vq; + struct proc *p = curproc; /* XXX */ + + simple_lock(&vp->v_interlock); + vgonel(vp, p); +} + +/* + * vgone, with the vp interlock held. + */ +void +vgonel(vp, p) + struct vnode *vp; + struct proc *p; +{ + struct vnode *vq; struct vnode *vx; /* - * If a vgone (or vclean) is already in progress, wait until it is - * done and return. + * If a vgone (or vclean) is already in progress, + * wait until it is done and return. */ if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; - (void) tsleep((caddr_t) vp, PINOD, "vgone", 0); + simple_unlock(&vp->v_interlock); + tsleep((caddr_t)vp, PINOD, "vgone", 0); return; } @@ -1162,18 +1441,18 @@ vgone(vp) /* * Clean out the filesystem specific data. */ - vclean(vp, DOCLOSE); + vclean(vp, DOCLOSE, p); /* * Delete from old mount point vnode list, if on one. */ - if (vp->v_mount != NULL) { - LIST_REMOVE(vp, v_mntvnodes); - vp->v_mount = NULL; - } + if (vp->v_mount != NULL) + insmntque(vp, (struct mount *)0); /* - * If special device, remove it from special device alias list. + * If special device, remove it from special device alias list + * if it is on one. */ - if (vp->v_type == VBLK || vp->v_type == VCHR) { + if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { + simple_lock(&spechash_slock); if (*vp->v_hashchain == vp) { *vp->v_hashchain = vp->v_specnext; } else { @@ -1202,28 +1481,34 @@ vgone(vp) vx->v_flag &= ~VALIASED; vp->v_flag &= ~VALIASED; } + simple_unlock(&spechash_slock); FREE(vp->v_specinfo, M_VNODE); vp->v_specinfo = NULL; } + /* - * If it is on the freelist and not already at the head, move it to - * the head of the list. The test of the back pointer and the - * reference count of zero is because it will be removed from the free - * list by getnewvnode, but will not have its reference count - * incremented until after calling vgone. If the reference count were - * incremented first, vgone would (incorrectly) try to close the - * previous instance of the underlying object. So, the back pointer is - * explicitly set to `0xdeadb' in getnewvnode after removing it from - * the freelist to ensure that we do not try to move it here. + * If it is on the freelist and not already at the head, + * move it to the head of the list. The test of the back + * pointer and the reference count of zero is because + * it will be removed from the free list by getnewvnode, + * but will not have its reference count incremented until + * after calling vgone. If the reference count were + * incremented first, vgone would (incorrectly) try to + * close the previous instance of the underlying object. + * So, the back pointer is explicitly set to `0xdeadb' in + * getnewvnode after removing it from the freelist to ensure + * that we do not try to move it here. */ - if (vp->v_usecount == 0 && - vp->v_freelist.tqe_prev != (struct vnode **) 0xdeadb && - vnode_free_list.tqh_first != vp) { - if(vp->v_tag != VT_TFS) { + if (vp->v_usecount == 0) { + simple_lock(&vnode_free_list_slock); + if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && + vnode_free_list.tqh_first != vp) { TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); } + simple_unlock(&vnode_free_list_slock); } + vp->v_type = VBAD; } @@ -1254,7 +1539,7 @@ int vcount(vp) register struct vnode *vp; { - register struct vnode *vq, *vnext; + struct vnode *vq, *vnext; int count; loop: @@ -1354,6 +1639,7 @@ int kinfo_vgetfailed; static int sysctl_vnode SYSCTL_HANDLER_ARGS { + struct proc *p = curproc; /* XXX */ register struct mount *mp, *nmp; struct vnode *vp; int error; @@ -1368,7 +1654,7 @@ sysctl_vnode SYSCTL_HANDLER_ARGS for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { nmp = mp->mnt_list.cqe_next; - if (vfs_busy(mp)) + if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) continue; again: for (vp = mp->mnt_vnodelist.lh_first; @@ -1386,11 +1672,11 @@ again: } if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) || (error = SYSCTL_OUT(req, vp, VNODESZ))) { - vfs_unbusy(mp); + vfs_unbusy(mp, p); return (error); } } - vfs_unbusy(mp); + vfs_unbusy(mp, p); } return (0); @@ -1404,22 +1690,63 @@ SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, */ int vfs_mountedon(vp) - register struct vnode *vp; + struct vnode *vp; { - register struct vnode *vq; + struct vnode *vq; + int error = 0; if (vp->v_specflags & SI_MOUNTEDON) return (EBUSY); if (vp->v_flag & VALIASED) { + simple_lock(&spechash_slock); for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) continue; - if (vq->v_specflags & SI_MOUNTEDON) - return (EBUSY); + if (vq->v_specflags & SI_MOUNTEDON) { + error = EBUSY; + break; + } } + simple_unlock(&spechash_slock); + } + return (error); +} + +/* + * Unmount all filesystems. The list is traversed in reverse order + * of mounting to avoid dependencies. Should only be called by halt(). + */ +void +vfs_unmountall() +{ + struct mount *mp, *nmp, *rootfs = NULL; + int error; + + /* unmount all but rootfs */ + for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { + nmp = mp->mnt_list.cqe_prev; + + if (mp->mnt_flag & MNT_ROOTFS) { + rootfs = mp; + continue; + } + error = dounmount(mp, MNT_FORCE, initproc); + if (error) { + printf("unmount of %s failed (", mp->mnt_stat.f_mntonname); + if (error == EBUSY) + printf("BUSY)\n"); + else + printf("%d)\n", error); + } + } + + /* and finally... */ + if (rootfs) { + vfs_unmountroot(rootfs); + } else { + printf("no root filesystem\n"); } - return (0); } /* @@ -1565,8 +1892,8 @@ vfs_export_lookup(mp, nep, nam) rnh = nep->ne_rtable[saddr->sa_family]; if (rnh != NULL) { np = (struct netcred *) - (*rnh->rnh_matchaddr) ((caddr_t) saddr, - rnh); + (*rnh->rnh_matchaddr)((caddr_t)saddr, + rnh); if (np && np->netc_rnodes->rn_flags & RNF_ROOT) np = NULL; } @@ -1580,7 +1907,6 @@ vfs_export_lookup(mp, nep, nam) return (np); } - /* * perform msync on all vnodes under a mount point * the mount point must be locked. @@ -1639,10 +1965,10 @@ retry: } else { if (object->flags & OBJ_DEAD) { if (waslocked) - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); tsleep(object, PVM, "vodead", 0); if (waslocked) - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); goto retry; } if ((object->flags & OBJ_VFS_REF) == 0) { diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 22e16d84..83b6dec 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -50,6 +50,7 @@ #include <sys/param.h> #include <sys/systm.h> +#include <sys/sysent.h> #include <sys/sysproto.h> #include <sys/namei.h> #include <sys/filedesc.h> @@ -64,9 +65,11 @@ #include <sys/malloc.h> #include <sys/dirent.h> +/* see if this is needed XXX JH #ifdef UNION #include <miscfs/union/union.h> #endif +*/ #include <vm/vm.h> #include <vm/vm_param.h> @@ -74,7 +77,8 @@ #include <vm/vm_extern.h> #include <sys/sysctl.h> -static int change_dir __P((struct nameidata *ndp, struct proc *p)); +static int change_dir __P((struct nameidata *ndp, struct proc *p)); +static void checkdirs __P((struct vnode *olddp)); /* * Virtual File System System Calls @@ -85,7 +89,7 @@ static int change_dir __P((struct nameidata *ndp, struct proc *p)); */ #ifndef _SYS_SYSPROTO_H_ struct mount_args { - int type; + char *type; char *path; int flags; caddr_t data; @@ -95,29 +99,32 @@ struct mount_args { int mount(p, uap, retval) struct proc *p; - register struct mount_args *uap; - int *retval; + register struct mount_args /* { + syscallarg(char *) type; + syscallarg(char *) path; + syscallarg(int) flags; + syscallarg(caddr_t) data; + } */ *uap; + register_t *retval; { - register struct vnode *vp; - register struct mount *mp; + struct vnode *vp; + struct mount *mp; + struct vfsconf *vfsp; int error, flag = 0; + struct vattr va; + u_long fstypenum; struct nameidata nd; + char fstypename[MFSNAMELEN]; /* - * Must be super user - */ - error = suser(p->p_ucred, &p->p_acflag); - if (error) - return (error); - /* * Get vnode to be covered */ - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + SCARG(uap, path), p); + if (error = namei(&nd)) return (error); vp = nd.ni_vp; - if (uap->flags & MNT_UPDATE) { + if (SCARG(uap, flags) & MNT_UPDATE) { if ((vp->v_flag & VROOT) == 0) { vput(vp); return (EINVAL); @@ -128,68 +135,135 @@ mount(p, uap, retval) * We only allow the filesystem to be reloaded if it * is currently mounted read-only. */ - if ((uap->flags & MNT_RELOAD) && + if ((SCARG(uap, flags) & MNT_RELOAD) && ((mp->mnt_flag & MNT_RDONLY) == 0)) { vput(vp); return (EOPNOTSUPP); /* Needs translation */ } mp->mnt_flag |= - uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); - VOP_UNLOCK(vp); + SCARG(uap, flags) & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); + /* + * Only root, or the user that did the original mount is + * permitted to update it. + */ + if (mp->mnt_stat.f_owner != p->p_ucred->cr_uid && + (error = suser(p->p_ucred, &p->p_acflag))) { + vput(vp); + return (error); + } + /* + * Do not allow NFS export by non-root users. Silently + * enforce MNT_NOSUID and MNT_NODEV for non-root users. + */ + if (p->p_ucred->cr_uid != 0) { + if (SCARG(uap, flags) & MNT_EXPORTED) { + vput(vp); + return (EPERM); + } + SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV; + } + if (vfs_busy(mp, LK_NOWAIT, 0, p)) { + vput(vp); + return (EBUSY); + } + VOP_UNLOCK(vp, 0, p); goto update; } - error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0); - if (error) + /* + * If the user is not root, ensure that they own the directory + * onto which we are attempting to mount. + */ + if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) || + (va.va_uid != p->p_ucred->cr_uid && + (error = suser(p->p_ucred, &p->p_acflag)))) { + vput(vp); + return (error); + } + /* + * Do not allow NFS export by non-root users. Silently + * enforce MNT_NOSUID and MNT_NODEV for non-root users. + */ + if (p->p_ucred->cr_uid != 0) { + if (SCARG(uap, flags) & MNT_EXPORTED) { + vput(vp); + return (EPERM); + } + SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV; + } + if (error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0)) return (error); if (vp->v_type != VDIR) { vput(vp); return (ENOTDIR); } - if ((u_long)uap->type > MOUNT_MAXTYPE || vfssw[uap->type] == NULL) { - vput(vp); - return (ENODEV); - } - +#ifdef COMPAT_43 /* - * Allocate and initialize the file system. + * Historically filesystem types were identified by number. If we + * get an integer for the filesystem type instead of a string, we + * check to see if it matches one of the historic filesystem types. */ - mp = (struct mount *)malloc((u_long)sizeof(struct mount), - M_MOUNT, M_WAITOK); - bzero((char *)mp, (u_long)sizeof(struct mount)); - mp->mnt_op = vfssw[uap->type]; - mp->mnt_vfc = vfsconf[uap->type]; - error = vfs_lock(mp); - if (error) { - free((caddr_t)mp, M_MOUNT); + fstypenum = (u_long)SCARG(uap, type); + if (fstypenum < maxvfsconf) { + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) + if (vfsp->vfc_typenum == fstypenum) + break; + if (vfsp == NULL) { + vput(vp); + return (ENODEV); + } + strncpy(fstypename, vfsp->vfc_name, MFSNAMELEN); + } else +#endif /* COMPAT_43 */ + if (error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL)) { vput(vp); return (error); } + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) + if (!strcmp(vfsp->vfc_name, fstypename)) + break; + if (vfsp == NULL) { + vput(vp); + return (ENODEV); + } if (vp->v_mountedhere != NULL) { - vfs_unlock(mp); - free((caddr_t)mp, M_MOUNT); vput(vp); return (EBUSY); } + + /* + * Allocate and initialize the filesystem. + */ + mp = (struct mount *)malloc((u_long)sizeof(struct mount), + M_MOUNT, M_WAITOK); + bzero((char *)mp, (u_long)sizeof(struct mount)); + lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); + (void)vfs_busy(mp, LK_NOWAIT, 0, p); + mp->mnt_op = vfsp->vfc_vfsops; + mp->mnt_vfc = vfsp; + vfsp->vfc_refcount++; + mp->mnt_stat.f_type = vfsp->vfc_typenum; + mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; + strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); vp->v_mountedhere = mp; mp->mnt_vnodecovered = vp; - vfsconf[uap->type]->vfc_refcount++; - + mp->mnt_stat.f_owner = p->p_ucred->cr_uid; update: /* * Set the mount level flags. */ - if (uap->flags & MNT_RDONLY) + if (SCARG(uap, flags) & MNT_RDONLY) mp->mnt_flag |= MNT_RDONLY; else if (mp->mnt_flag & MNT_RDONLY) mp->mnt_flag |= MNT_WANTRDWR; mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME); - mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | - MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | MNT_NOATIME); + mp->mnt_flag |= SCARG(uap, flags) & (MNT_NOSUID | MNT_NOEXEC | + MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | + MNT_NOATIME); /* * Mount the filesystem. */ - error = VFS_MOUNT(mp, uap->path, uap->data, &nd, p); + error = VFS_MOUNT(mp, SCARG(uap, path), SCARG(uap, data), &nd, p); if (mp->mnt_flag & MNT_UPDATE) { vrele(vp); if (mp->mnt_flag & MNT_WANTRDWR) @@ -198,6 +272,7 @@ update: (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_WANTRDWR); if (error) mp->mnt_flag = flag; + vfs_unbusy(mp, p); return (error); } /* @@ -205,23 +280,63 @@ update: */ cache_purge(vp); if (!error) { + simple_lock(&mountlist_slock); CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); - VOP_UNLOCK(vp); - vfs_unlock(mp); - error = VFS_START(mp, 0, p); - if (error) + simple_unlock(&mountlist_slock); + checkdirs(vp); + VOP_UNLOCK(vp, 0, p); + vfs_unbusy(mp, p); + if (error = VFS_START(mp, 0, p)) vrele(vp); } else { mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0; - vfs_unlock(mp); + mp->mnt_vfc->vfc_refcount--; + vfs_unbusy(mp, p); free((caddr_t)mp, M_MOUNT); vput(vp); - vfsconf[uap->type]->vfc_refcount--; } return (error); } /* + * Scan all active processes to see if any of them have a current + * or root directory onto which the new filesystem has just been + * mounted. If so, replace them with the new mount point. + */ +static void +checkdirs(olddp) + struct vnode *olddp; +{ + struct filedesc *fdp; + struct vnode *newdp; + struct proc *p; + + if (olddp->v_usecount == 1) + return; + if (VFS_ROOT(olddp->v_mountedhere, &newdp)) + panic("mount: lost mount"); + for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { + fdp = p->p_fd; + if (fdp->fd_cdir == olddp) { + vrele(fdp->fd_cdir); + VREF(newdp); + fdp->fd_cdir = newdp; + } + if (fdp->fd_rdir == olddp) { + vrele(fdp->fd_rdir); + VREF(newdp); + fdp->fd_rdir = newdp; + } + } + if (rootvnode == olddp) { + vrele(rootvnode); + VREF(newdp); + rootvnode = newdp; + } + vput(newdp); +} + +/* * Unmount a file system. * * Note: unmount takes a path to the vnode mounted on as argument, @@ -237,47 +352,51 @@ struct unmount_args { int unmount(p, uap, retval) struct proc *p; - register struct unmount_args *uap; - int *retval; + register struct unmount_args /* { + syscallarg(char *) path; + syscallarg(int) flags; + } */ *uap; + register_t *retval; { register struct vnode *vp; struct mount *mp; int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + SCARG(uap, path), p); + if (error = namei(&nd)) return (error); vp = nd.ni_vp; + mp = vp->v_mount; /* - * Unless this is a user mount, then must - * have suser privilege. + * Only root, or the user that did the original mount is + * permitted to unmount this filesystem. */ - if (((vp->v_mount->mnt_flag & MNT_USER) == 0) && + if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) && (error = suser(p->p_ucred, &p->p_acflag))) { vput(vp); return (error); } /* - * Must be the root of the filesystem + * Don't allow unmounting the root file system. */ - if ((vp->v_flag & VROOT) == 0) { + if (mp->mnt_flag & MNT_ROOTFS) { vput(vp); return (EINVAL); } - mp = vp->v_mount; - vput(vp); /* - * Don't allow unmount of the root filesystem + * Must be the root of the filesystem */ - if (mp->mnt_flag & MNT_ROOTFS) + if ((vp->v_flag & VROOT) == 0) { + vput(vp); return (EINVAL); - - return (dounmount(mp, uap->flags, p)); + } + vput(vp); + return (dounmount(mp, SCARG(uap, flags), p)); } /* @@ -292,74 +411,86 @@ dounmount(mp, flags, p) struct vnode *coveredvp; int error; - coveredvp = mp->mnt_vnodecovered; - if (vfs_busy(mp)) - return (EBUSY); + simple_lock(&mountlist_slock); mp->mnt_flag |= MNT_UNMOUNT; - error = vfs_lock(mp); - if (error) - return (error); - + lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock, p); mp->mnt_flag &=~ MNT_ASYNC; vfs_msync(mp, MNT_NOWAIT); vnode_pager_umount(mp); /* release cached vnodes */ cache_purgevfs(mp); /* remove cache entries for this file sys */ - if ((error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0 || + if (((mp->mnt_flag & MNT_RDONLY) || + (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) || (flags & MNT_FORCE)) error = VFS_UNMOUNT(mp, flags, p); - mp->mnt_flag &= ~MNT_UNMOUNT; - vfs_unbusy(mp); + simple_lock(&mountlist_slock); if (error) { - vfs_unlock(mp); - } else { + mp->mnt_flag &= ~MNT_UNMOUNT; + lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE, + &mountlist_slock, p); + return (error); + } + CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); + if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) { + coveredvp->v_mountedhere = (struct mount *)0; vrele(coveredvp); - CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); - mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0; - vfs_unlock(mp); - mp->mnt_vfc->vfc_refcount--; - if (mp->mnt_vnodelist.lh_first != NULL) - panic("unmount: dangling vnode"); - free((caddr_t)mp, M_MOUNT); } - return (error); + mp->mnt_vfc->vfc_refcount--; + if (mp->mnt_vnodelist.lh_first != NULL) + panic("unmount: dangling vnode"); + lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock, p); + if (mp->mnt_flag & MNT_MWAIT) + wakeup((caddr_t)mp); + free((caddr_t)mp, M_MOUNT); + return (0); } /* * Sync each mounted filesystem. */ - #ifndef _SYS_SYSPROTO_H_ struct sync_args { int dummy; }; #endif +#ifdef DEBUG +int syncprt = 0; +SYSCTL_INT(_debug, 0, syncprt, CTLFLAG_RW, &syncprt, 0, ""); +#endif + /* ARGSUSED */ int sync(p, uap, retval) struct proc *p; struct sync_args *uap; - int *retval; + register_t *retval; { - register struct mount *mp; + register struct mount *mp, *nmp; int asyncflag; - for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = mp->mnt_list.cqe_next) { - /* - * The lock check below is to avoid races with mount - * and unmount. - */ - if ((mp->mnt_flag & (MNT_MLOCK|MNT_RDONLY|MNT_MPBUSY)) == 0 && - !vfs_busy(mp)) { + simple_lock(&mountlist_slock); + for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { + if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { + nmp = mp->mnt_list.cqe_next; + continue; + } + if ((mp->mnt_flag & MNT_RDONLY) == 0) { asyncflag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; vfs_msync(mp, MNT_NOWAIT); VFS_SYNC(mp, MNT_NOWAIT, p != NULL ? p->p_ucred : NOCRED, p); if (asyncflag) mp->mnt_flag |= MNT_ASYNC; - vfs_unbusy(mp); } + simple_lock(&mountlist_slock); + nmp = mp->mnt_list.cqe_next; + vfs_unbusy(mp, p); } + simple_unlock(&mountlist_slock); +#ifdef DIAGNOSTIC + if (syncprt) + vfs_bufstats(); +#endif /* DIAGNOSTIC */ return (0); } @@ -378,20 +509,25 @@ struct quotactl_args { int quotactl(p, uap, retval) struct proc *p; - register struct quotactl_args *uap; - int *retval; + register struct quotactl_args /* { + syscallarg(char *) path; + syscallarg(int) cmd; + syscallarg(int) uid; + syscallarg(caddr_t) arg; + } */ *uap; + register_t *retval; { register struct mount *mp; int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + if (error = namei(&nd)) return (error); mp = nd.ni_vp->v_mount; vrele(nd.ni_vp); - return (VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, p)); + return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid), + SCARG(uap, arg), p)); } /* @@ -407,17 +543,19 @@ struct statfs_args { int statfs(p, uap, retval) struct proc *p; - register struct statfs_args *uap; - int *retval; + register struct statfs_args /* { + syscallarg(char *) path; + syscallarg(struct statfs *) buf; + } */ *uap; + register_t *retval; { register struct mount *mp; register struct statfs *sp; int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + if (error = namei(&nd)) return (error); mp = nd.ni_vp->v_mount; sp = &mp->mnt_stat; @@ -426,7 +564,7 @@ statfs(p, uap, retval) if (error) return (error); sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; - return (copyout((caddr_t)sp, (caddr_t)uap->buf, sizeof(*sp))); + return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp))); } /* @@ -442,16 +580,18 @@ struct fstatfs_args { int fstatfs(p, uap, retval) struct proc *p; - register struct fstatfs_args *uap; - int *retval; + register struct fstatfs_args /* { + syscallarg(int) fd; + syscallarg(struct statfs *) buf; + } */ *uap; + register_t *retval; { struct file *fp; struct mount *mp; register struct statfs *sp; int error; - error = getvnode(p->p_fd, uap->fd, &fp); - if (error) + if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); mp = ((struct vnode *)fp->f_data)->v_mount; sp = &mp->mnt_stat; @@ -459,7 +599,7 @@ fstatfs(p, uap, retval) if (error) return (error); sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; - return (copyout((caddr_t)sp, (caddr_t)uap->buf, sizeof(*sp))); + return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp))); } /* @@ -475,48 +615,55 @@ struct getfsstat_args { int getfsstat(p, uap, retval) struct proc *p; - register struct getfsstat_args *uap; - int *retval; + register struct getfsstat_args /* { + syscallarg(struct statfs *) buf; + syscallarg(long) bufsize; + syscallarg(int) flags; + } */ *uap; + register_t *retval; { register struct mount *mp, *nmp; register struct statfs *sp; caddr_t sfsp; long count, maxcount, error; - maxcount = uap->bufsize / sizeof(struct statfs); - sfsp = (caddr_t)uap->buf; + maxcount = SCARG(uap, bufsize) / sizeof(struct statfs); + sfsp = (caddr_t)SCARG(uap, buf); count = 0; + simple_lock(&mountlist_slock); for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { - if (vfs_busy(mp)) { + if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { nmp = mp->mnt_list.cqe_next; continue; } - if (sfsp && count < maxcount && - ((mp->mnt_flag & MNT_MLOCK) == 0)) { + if (sfsp && count < maxcount) { sp = &mp->mnt_stat; /* * If MNT_NOWAIT is specified, do not refresh the * fsstat cache. MNT_WAIT overrides MNT_NOWAIT. */ - if (((uap->flags & MNT_NOWAIT) == 0 || - (uap->flags & MNT_WAIT)) && + if (((SCARG(uap, flags) & MNT_NOWAIT) == 0 || + (SCARG(uap, flags) & MNT_WAIT)) && (error = VFS_STATFS(mp, sp, p))) { + simple_lock(&mountlist_slock); nmp = mp->mnt_list.cqe_next; - vfs_unbusy(mp); + vfs_unbusy(mp, p); continue; } sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; error = copyout((caddr_t)sp, sfsp, sizeof(*sp)); if (error) { - vfs_unbusy(mp); + vfs_unbusy(mp, p); return (error); } sfsp += sizeof(*sp); } count++; + simple_lock(&mountlist_slock); nmp = mp->mnt_list.cqe_next; - vfs_unbusy(mp); + vfs_unbusy(mp, p); } + simple_unlock(&mountlist_slock); if (sfsp && count > maxcount) *retval = maxcount; else @@ -536,27 +683,41 @@ struct fchdir_args { int fchdir(p, uap, retval) struct proc *p; - struct fchdir_args *uap; - int *retval; + struct fchdir_args /* { + syscallarg(int) fd; + } */ *uap; + register_t *retval; { register struct filedesc *fdp = p->p_fd; - register struct vnode *vp; + struct vnode *vp, *tdp; + struct mount *mp; struct file *fp; int error; - error = getvnode(fdp, uap->fd, &fp); - if (error) + if (error = getvnode(fdp, SCARG(uap, fd), &fp)) return (error); vp = (struct vnode *)fp->f_data; - VOP_LOCK(vp); + VREF(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type != VDIR) error = ENOTDIR; else error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p); - VOP_UNLOCK(vp); - if (error) + while (!error && (mp = vp->v_mountedhere) != NULL) { + if (vfs_busy(mp, 0, 0, p)) + continue; + error = VFS_ROOT(mp, &tdp); + vfs_unbusy(mp, p); + if (error) + break; + vput(vp); + vp = tdp; + } + if (error) { + vput(vp); return (error); - VREF(vp); + } + VOP_UNLOCK(vp, 0, p); vrele(fdp->fd_cdir); fdp->fd_cdir = vp; return (0); @@ -574,16 +735,18 @@ struct chdir_args { int chdir(p, uap, retval) struct proc *p; - struct chdir_args *uap; - int *retval; + struct chdir_args /* { + syscallarg(char *) path; + } */ *uap; + register_t *retval; { register struct filedesc *fdp = p->p_fd; int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p); - error = change_dir(&nd, p); - if (error) + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + SCARG(uap, path), p); + if (error = change_dir(&nd, p)) return (error); vrele(fdp->fd_cdir); fdp->fd_cdir = nd.ni_vp; @@ -602,8 +765,10 @@ struct chroot_args { int chroot(p, uap, retval) struct proc *p; - struct chroot_args *uap; - int *retval; + struct chroot_args /* { + syscallarg(char *) path; + } */ *uap; + register_t *retval; { register struct filedesc *fdp = p->p_fd; int error; @@ -612,9 +777,9 @@ chroot(p, uap, retval) error = suser(p->p_ucred, &p->p_acflag); if (error) return (error); - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p); - error = change_dir(&nd, p); - if (error) + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + SCARG(uap, path), p); + if (error = change_dir(&nd, p)) return (error); if (fdp->fd_rdir != NULL) vrele(fdp->fd_rdir); @@ -641,9 +806,10 @@ change_dir(ndp, p) error = ENOTDIR; else error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p); - VOP_UNLOCK(vp); if (error) - vrele(vp); + vput(vp); + else + VOP_UNLOCK(vp, 0, p); return (error); } @@ -661,8 +827,12 @@ struct open_args { int open(p, uap, retval) struct proc *p; - register struct open_args *uap; - int *retval; + register struct open_args /* { + syscallarg(char *) path; + syscallarg(int) flags; + syscallarg(int) mode; + } */ *uap; + register_t *retval; { register struct filedesc *fdp = p->p_fd; register struct file *fp; @@ -677,17 +847,17 @@ open(p, uap, retval) if (error) return (error); fp = nfp; - flags = FFLAGS(uap->flags); - cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p); + flags = FFLAGS(SCARG(uap, flags)); + cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); p->p_dupfd = -indx - 1; /* XXX check for fdopen */ error = vn_open(&nd, flags, cmode); if (error) { ffree(fp); if ((error == ENODEV || error == ENXIO) && - p->p_dupfd >= 0 && /* XXX from fdopen */ + p->p_dupfd >= 0 && /* XXX from fdopen */ (error = - dupfdopen(fdp, indx, p->p_dupfd, flags, error)) == 0) { + dupfdopen(fdp, indx, p->p_dupfd, flags, error)) == 0) { *retval = indx; return (0); } @@ -714,18 +884,17 @@ open(p, uap, retval) type = F_FLOCK; if ((flags & FNONBLOCK) == 0) type |= F_WAIT; - VOP_UNLOCK(vp); - error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type); - if (error) { + VOP_UNLOCK(vp, 0, p); + if (error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) { (void) vn_close(vp, fp->f_flag, fp->f_cred, p); ffree(fp); fdp->fd_ofiles[indx] = NULL; return (error); } - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); fp->f_flag |= FHASLOCK; } - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); *retval = indx; return (0); } @@ -743,15 +912,22 @@ struct ocreat_args { int ocreat(p, uap, retval) struct proc *p; - register struct ocreat_args *uap; - int *retval; + register struct ocreat_args /* { + syscallarg(char *) path; + syscallarg(int) mode; + } */ *uap; + register_t *retval; { - struct open_args openuap; - - openuap.path = uap->path; - openuap.mode = uap->mode; - openuap.flags = O_WRONLY | O_CREAT | O_TRUNC; - return (open(p, &openuap, retval)); + struct open_args /* { + syscallarg(char *) path; + syscallarg(int) flags; + syscallarg(int) mode; + } */ nuap; + + SCARG(&nuap, path) = SCARG(uap, path); + SCARG(&nuap, mode) = SCARG(uap, mode); + SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC; + return (open(p, &nuap, retval)); } #endif /* COMPAT_43 */ @@ -769,30 +945,35 @@ struct mknod_args { int mknod(p, uap, retval) struct proc *p; - register struct mknod_args *uap; - int *retval; + register struct mknod_args /* { + syscallarg(char *) path; + syscallarg(int) mode; + syscallarg(int) dev; + } */ *uap; + register_t *retval; { register struct vnode *vp; struct vattr vattr; int error; + int whiteout; struct nameidata nd; error = suser(p->p_ucred, &p->p_acflag); if (error) return (error); - NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); + if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (vp != NULL) error = EEXIST; else { VATTR_NULL(&vattr); - vattr.va_mode = (uap->mode & ALLPERMS) &~ p->p_fd->fd_cmask; - vattr.va_rdev = uap->dev; + vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask; + vattr.va_rdev = SCARG(uap, dev); + whiteout = 0; - switch (uap->mode & S_IFMT) { + switch (SCARG(uap, mode) & S_IFMT) { case S_IFMT: /* used by badsect to flag bad sectors */ vattr.va_type = VBAD; break; @@ -802,14 +983,25 @@ mknod(p, uap, retval) case S_IFBLK: vattr.va_type = VBLK; break; + case S_IFWHT: + whiteout = 1; + break; default: error = EINVAL; break; } } if (!error) { - LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); + VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); + if (whiteout) { + error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); + if (error) + VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); + vput(nd.ni_dvp); + } else { + error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, + &nd.ni_cnd, &vattr); + } } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == vp) @@ -823,7 +1015,7 @@ mknod(p, uap, retval) } /* - * Create named pipe. + * Create a named pipe. */ #ifndef _SYS_SYSPROTO_H_ struct mkfifo_args { @@ -835,16 +1027,21 @@ struct mkfifo_args { int mkfifo(p, uap, retval) struct proc *p; - register struct mkfifo_args *uap; - int *retval; + register struct mkfifo_args /* { + syscallarg(char *) path; + syscallarg(int) mode; + } */ *uap; + register_t *retval; { +#ifndef FIFO + return (EOPNOTSUPP); +#else struct vattr vattr; int error; struct nameidata nd; - NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); + if (error = namei(&nd)) return (error); if (nd.ni_vp != NULL) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); @@ -857,9 +1054,10 @@ mkfifo(p, uap, retval) } VATTR_NULL(&vattr); vattr.va_type = VFIFO; - vattr.va_mode = (uap->mode & ALLPERMS) &~ p->p_fd->fd_cmask; - LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); + vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask; + VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); return (VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr)); +#endif /* FIFO */ } /* @@ -875,22 +1073,24 @@ struct link_args { int link(p, uap, retval) struct proc *p; - register struct link_args *uap; - int *retval; + register struct link_args /* { + syscallarg(char *) path; + syscallarg(char *) link; + } */ *uap; + register_t *retval; { register struct vnode *vp; struct nameidata nd; int error; - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (vp->v_type == VDIR) error = EPERM; /* POSIX */ else { - NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->link, p); + NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), p); error = namei(&nd); if (!error) { if (nd.ni_vp != NULL) { @@ -903,10 +1103,9 @@ link(p, uap, retval) vrele(nd.ni_vp); error = EEXIST; } else { - LEASE_CHECK(nd.ni_dvp, - p, p->p_ucred, LEASE_WRITE); - LEASE_CHECK(vp, - p, p->p_ucred, LEASE_WRITE); + VOP_LEASE(nd.ni_dvp, p, p->p_ucred, + LEASE_WRITE); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); } } @@ -928,8 +1127,11 @@ struct symlink_args { int symlink(p, uap, retval) struct proc *p; - register struct symlink_args *uap; - int *retval; + register struct symlink_args /* { + syscallarg(char *) path; + syscallarg(char *) link; + } */ *uap; + register_t *retval; { struct vattr vattr; char *path; @@ -937,12 +1139,10 @@ symlink(p, uap, retval) struct nameidata nd; MALLOC(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); - error = copyinstr(uap->path, path, MAXPATHLEN, NULL); - if (error) + if (error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) goto out; - NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->link, p); - error = namei(&nd); - if (error) + NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), p); + if (error = namei(&nd)) goto out; if (nd.ni_vp) { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); @@ -956,7 +1156,7 @@ symlink(p, uap, retval) } VATTR_NULL(&vattr); vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask; - LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); + VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); out: FREE(path, M_NAMEI); @@ -964,6 +1164,45 @@ out: } /* + * Delete a whiteout from the filesystem. + */ +/* ARGSUSED */ +int +undelete(p, uap, retval) + struct proc *p; + register struct undelete_args /* { + syscallarg(char *) path; + } */ *uap; + register_t *retval; +{ + int error; + struct nameidata nd; + + NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE, + SCARG(uap, path), p); + error = namei(&nd); + if (error) + return (error); + + if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { + VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); + if (nd.ni_dvp == nd.ni_vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + if (nd.ni_vp) + vrele(nd.ni_vp); + return (EEXIST); + } + + VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); + if (error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) + VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); + vput(nd.ni_dvp); + return (error); +} + +/* * Delete a name from the filesystem. */ #ifndef _SYS_SYSPROTO_H_ @@ -975,20 +1214,21 @@ struct unlink_args { int unlink(p, uap, retval) struct proc *p; - struct unlink_args *uap; - int *retval; + struct unlink_args /* { + syscallarg(char *) path; + } */ *uap; + register_t *retval; { register struct vnode *vp; int error; struct nameidata nd; - NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); + if (error = namei(&nd)) return (error); vp = nd.ni_vp; - LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type == VDIR) error = EPERM; /* POSIX */ @@ -1001,11 +1241,11 @@ unlink(p, uap, retval) if (vp->v_flag & VROOT) error = EBUSY; else - (void) vnode_pager_uncache(vp); + (void) vnode_pager_uncache(vp, p); } if (!error) { - LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); + VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); @@ -1013,7 +1253,8 @@ unlink(p, uap, retval) vrele(nd.ni_dvp); else vput(nd.ni_dvp); - vput(vp); + if (vp != NULLVP) + vput(vp); } return (error); } @@ -1032,8 +1273,13 @@ struct lseek_args { int lseek(p, uap, retval) struct proc *p; - register struct lseek_args *uap; - int *retval; + register struct lseek_args /* { + syscallarg(int) fd; + syscallarg(int) pad; + syscallarg(off_t) offset; + syscallarg(int) whence; + } */ *uap; + register_t *retval; /* XXX */ { struct ucred *cred = p->p_ucred; register struct filedesc *fdp = p->p_fd; @@ -1041,23 +1287,23 @@ lseek(p, uap, retval) struct vattr vattr; int error; - if ((u_int)uap->fd >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[uap->fd]) == NULL) + if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL) return (EBADF); if (fp->f_type != DTYPE_VNODE) return (ESPIPE); - switch (uap->whence) { + switch (SCARG(uap, whence)) { case L_INCR: - fp->f_offset += uap->offset; + fp->f_offset += SCARG(uap, offset); break; case L_XTND: error=VOP_GETATTR((struct vnode *)fp->f_data, &vattr, cred, p); if (error) return (error); - fp->f_offset = uap->offset + vattr.va_size; + fp->f_offset = SCARG(uap, offset) + vattr.va_size; break; case L_SET: - fp->f_offset = uap->offset; + fp->f_offset = SCARG(uap, offset); break; default: return (EINVAL); @@ -1080,17 +1326,26 @@ struct olseek_args { int olseek(p, uap, retval) struct proc *p; - register struct olseek_args *uap; - int *retval; + register struct olseek_args /* { + syscallarg(int) fd; + syscallarg(long) offset; + syscallarg(int) whence; + } */ *uap; + register_t *retval; { - struct lseek_args nuap; + struct lseek_args /* { + syscallarg(int) fd; + syscallarg(int) pad; + syscallarg(off_t) offset; + syscallarg(int) whence; + } */ nuap; off_t qret; int error; - nuap.fd = uap->fd; - nuap.offset = uap->offset; - nuap.whence = uap->whence; - error = lseek(p, &nuap, (int *)&qret); + SCARG(&nuap, fd) = SCARG(uap, fd); + SCARG(&nuap, offset) = SCARG(uap, offset); + SCARG(&nuap, whence) = SCARG(uap, whence); + error = lseek(p, &nuap, (register_t *) &qret); *(long *)retval = qret; return (error); } @@ -1108,8 +1363,11 @@ struct access_args { int access(p, uap, retval) struct proc *p; - register struct access_args *uap; - int *retval; + register struct access_args /* { + syscallarg(char *) path; + syscallarg(int) flags; + } */ *uap; + register_t *retval; { register struct ucred *cred = p->p_ucred; register struct vnode *vp; @@ -1120,20 +1378,20 @@ access(p, uap, retval) t_gid = cred->cr_groups[0]; cred->cr_uid = p->p_cred->p_ruid; cred->cr_groups[0] = p->p_cred->p_rgid; - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + SCARG(uap, path), p); + if (error = namei(&nd)) goto out1; vp = nd.ni_vp; /* Flags == 0 means only check for existence. */ - if (uap->flags) { + if (SCARG(uap, flags)) { flags = 0; - if (uap->flags & R_OK) + if (SCARG(uap, flags) & R_OK) flags |= VREAD; - if (uap->flags & W_OK) + if (SCARG(uap, flags) & W_OK) flags |= VWRITE; - if (uap->flags & X_OK) + if (SCARG(uap, flags) & X_OK) flags |= VEXEC; if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) error = VOP_ACCESS(vp, flags, cred, p); @@ -1159,24 +1417,27 @@ struct ostat_args { int ostat(p, uap, retval) struct proc *p; - register struct ostat_args *uap; - int *retval; + register struct ostat_args /* { + syscallarg(char *) path; + syscallarg(struct ostat *) ub; + } */ *uap; + register_t *retval; { struct stat sb; struct ostat osb; int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + SCARG(uap, path), p); + if (error = namei(&nd)) return (error); error = vn_stat(nd.ni_vp, &sb, p); vput(nd.ni_vp); if (error) return (error); cvtstat(&sb, &osb); - error = copyout((caddr_t)&osb, (caddr_t)uap->ub, sizeof (osb)); + error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb)); return (error); } @@ -1193,8 +1454,11 @@ struct olstat_args { int olstat(p, uap, retval) struct proc *p; - register struct olstat_args *uap; - int *retval; + register struct olstat_args /* { + syscallarg(char *) path; + syscallarg(struct ostat *) ub; + } */ *uap; + register_t *retval; { struct vnode *vp, *dvp; struct stat sb, sb1; @@ -1203,9 +1467,8 @@ olstat(p, uap, retval) struct nameidata nd; NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKPARENT, UIO_USERSPACE, - uap->path, p); - error = namei(&nd); - if (error) + SCARG(uap, path), p); + if (error = namei(&nd)) return (error); /* * For symbolic links, always return the attributes of its @@ -1240,7 +1503,7 @@ olstat(p, uap, retval) sb.st_blocks = sb1.st_blocks; } cvtstat(&sb, &osb); - error = copyout((caddr_t)&osb, (caddr_t)uap->ub, sizeof (osb)); + error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb)); return (error); } @@ -1287,22 +1550,25 @@ struct stat_args { int stat(p, uap, retval) struct proc *p; - register struct stat_args *uap; - int *retval; + register struct stat_args /* { + syscallarg(char *) path; + syscallarg(struct stat *) ub; + } */ *uap; + register_t *retval; { struct stat sb; int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + SCARG(uap, path), p); + if (error = namei(&nd)) return (error); error = vn_stat(nd.ni_vp, &sb, p); vput(nd.ni_vp); if (error) return (error); - error = copyout((caddr_t)&sb, (caddr_t)uap->ub, sizeof (sb)); + error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb)); return (error); } @@ -1319,8 +1585,11 @@ struct lstat_args { int lstat(p, uap, retval) struct proc *p; - register struct lstat_args *uap; - int *retval; + register struct lstat_args /* { + syscallarg(char *) path; + syscallarg(struct stat *) ub; + } */ *uap; + register_t *retval; { int error; struct vnode *vp, *dvp; @@ -1328,13 +1597,12 @@ lstat(p, uap, retval) struct nameidata nd; NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKPARENT, UIO_USERSPACE, - uap->path, p); - error = namei(&nd); - if (error) + SCARG(uap, path), p); + if (error = namei(&nd)) return (error); /* - * For symbolic links, always return the attributes of its - * containing directory, except for mode, size, and links. + * For symbolic links, always return the attributes of its containing + * directory, except for mode, size, inode number, and links. */ vp = nd.ni_vp; dvp = nd.ni_dvp; @@ -1363,8 +1631,9 @@ lstat(p, uap, retval) sb.st_nlink = sb1.st_nlink; sb.st_size = sb1.st_size; sb.st_blocks = sb1.st_blocks; + sb.st_ino = sb1.st_ino; } - error = copyout((caddr_t)&sb, (caddr_t)uap->ub, sizeof (sb)); + error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb)); return (error); } @@ -1381,17 +1650,20 @@ struct pathconf_args { int pathconf(p, uap, retval) struct proc *p; - register struct pathconf_args *uap; - int *retval; + register struct pathconf_args /* { + syscallarg(char *) path; + syscallarg(int) name; + } */ *uap; + register_t *retval; { int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + SCARG(uap, path), p); + if (error = namei(&nd)) return (error); - error = VOP_PATHCONF(nd.ni_vp, uap->name, retval); + error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); vput(nd.ni_vp); return (error); } @@ -1410,8 +1682,12 @@ struct readlink_args { int readlink(p, uap, retval) struct proc *p; - register struct readlink_args *uap; - int *retval; + register struct readlink_args /* { + syscallarg(char *) path; + syscallarg(char *) buf; + syscallarg(int) count; + } */ *uap; + register_t *retval; { register struct vnode *vp; struct iovec aiov; @@ -1419,27 +1695,27 @@ readlink(p, uap, retval) int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, + SCARG(uap, path), p); + if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (vp->v_type != VLNK) error = EINVAL; else { - aiov.iov_base = uap->buf; - aiov.iov_len = uap->count; + aiov.iov_base = SCARG(uap, buf); + aiov.iov_len = SCARG(uap, count); auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_USERSPACE; auio.uio_procp = p; - auio.uio_resid = uap->count; + auio.uio_resid = SCARG(uap, count); error = VOP_READLINK(vp, &auio, p->p_ucred); } vput(vp); - *retval = uap->count - auio.uio_resid; + *retval = SCARG(uap, count) - auio.uio_resid; return (error); } @@ -1456,23 +1732,25 @@ struct chflags_args { int chflags(p, uap, retval) struct proc *p; - register struct chflags_args *uap; - int *retval; + register struct chflags_args /* { + syscallarg(char *) path; + syscallarg(int) flags; + } */ *uap; + register_t *retval; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + if (error = namei(&nd)) return (error); vp = nd.ni_vp; - LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); - vattr.va_flags = uap->flags; + vattr.va_flags = SCARG(uap, flags); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); vput(vp); return (error); @@ -1491,24 +1769,26 @@ struct fchflags_args { int fchflags(p, uap, retval) struct proc *p; - register struct fchflags_args *uap; - int *retval; + register struct fchflags_args /* { + syscallarg(int) fd; + syscallarg(int) flags; + } */ *uap; + register_t *retval; { struct vattr vattr; struct vnode *vp; struct file *fp; int error; - error = getvnode(p->p_fd, uap->fd, &fp); - if (error) + if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); vp = (struct vnode *)fp->f_data; - LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); - vattr.va_flags = uap->flags; + vattr.va_flags = SCARG(uap, flags); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (error); } @@ -1525,23 +1805,25 @@ struct chmod_args { int chmod(p, uap, retval) struct proc *p; - register struct chmod_args *uap; - int *retval; + register struct chmod_args /* { + syscallarg(char *) path; + syscallarg(int) mode; + } */ *uap; + register_t *retval; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + if (error = namei(&nd)) return (error); vp = nd.ni_vp; - LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); - vattr.va_mode = uap->mode & ALLPERMS; + vattr.va_mode = SCARG(uap, mode) & ALLPERMS; error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); vput(vp); return (error); @@ -1560,24 +1842,26 @@ struct fchmod_args { int fchmod(p, uap, retval) struct proc *p; - register struct fchmod_args *uap; - int *retval; + register struct fchmod_args /* { + syscallarg(int) fd; + syscallarg(int) mode; + } */ *uap; + register_t *retval; { struct vattr vattr; struct vnode *vp; struct file *fp; int error; - error = getvnode(p->p_fd, uap->fd, &fp); - if (error) + if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); vp = (struct vnode *)fp->f_data; - LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); - vattr.va_mode = uap->mode & ALLPERMS; + vattr.va_mode = SCARG(uap, mode) & ALLPERMS; error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (error); } @@ -1595,24 +1879,27 @@ struct chown_args { int chown(p, uap, retval) struct proc *p; - register struct chown_args *uap; - int *retval; + register struct chown_args /* { + syscallarg(char *) path; + syscallarg(int) uid; + syscallarg(int) gid; + } */ *uap; + register_t *retval; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + if (error = namei(&nd)) return (error); vp = nd.ni_vp; - LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); - vattr.va_uid = uap->uid; - vattr.va_gid = uap->gid; + vattr.va_uid = SCARG(uap, uid); + vattr.va_gid = SCARG(uap, gid); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); vput(vp); return (error); @@ -1632,25 +1919,28 @@ struct fchown_args { int fchown(p, uap, retval) struct proc *p; - register struct fchown_args *uap; - int *retval; + register struct fchown_args /* { + syscallarg(int) fd; + syscallarg(int) uid; + syscallarg(int) gid; + } */ *uap; + register_t *retval; { struct vattr vattr; struct vnode *vp; struct file *fp; int error; - error = getvnode(p->p_fd, uap->fd, &fp); - if (error) + if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); vp = (struct vnode *)fp->f_data; - LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); VATTR_NULL(&vattr); - vattr.va_uid = uap->uid; - vattr.va_gid = uap->gid; + vattr.va_uid = SCARG(uap, uid); + vattr.va_gid = SCARG(uap, gid); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (error); } @@ -1667,8 +1957,11 @@ struct utimes_args { int utimes(p, uap, retval) struct proc *p; - register struct utimes_args *uap; - int *retval; + register struct utimes_args /* { + syscallarg(char *) path; + syscallarg(struct timeval *) tptr; + } */ *uap; + register_t *retval; { register struct vnode *vp; struct timeval tv[2]; @@ -1677,22 +1970,19 @@ utimes(p, uap, retval) struct nameidata nd; VATTR_NULL(&vattr); - if (uap->tptr == NULL) { + if (SCARG(uap, tptr) == NULL) { microtime(&tv[0]); tv[1] = tv[0]; vattr.va_vaflags |= VA_UTIMES_NULL; - } else { - error = copyin((caddr_t)uap->tptr, (caddr_t)tv, sizeof (tv)); - if (error) - return (error); - } - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + } else if (error = copyin((caddr_t)SCARG(uap, tptr), (caddr_t)tv, + sizeof (tv))) + return (error); + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + if (error = namei(&nd)) return (error); vp = nd.ni_vp; - LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); vattr.va_atime.tv_sec = tv[0].tv_sec; vattr.va_atime.tv_nsec = tv[0].tv_usec * 1000; vattr.va_mtime.tv_sec = tv[1].tv_sec; @@ -1716,8 +2006,12 @@ struct truncate_args { int truncate(p, uap, retval) struct proc *p; - register struct truncate_args *uap; - int *retval; + register struct truncate_args /* { + syscallarg(char *) path; + syscallarg(int) pad; + syscallarg(off_t) length; + } */ *uap; + register_t *retval; { register struct vnode *vp; struct vattr vattr; @@ -1726,19 +2020,18 @@ truncate(p, uap, retval) if (uap->length < 0) return(EINVAL); - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + if (error = namei(&nd)) return (error); vp = nd.ni_vp; - LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type == VDIR) error = EISDIR; else if ((error = vn_writechk(vp)) == 0 && (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0) { VATTR_NULL(&vattr); - vattr.va_size = uap->length; + vattr.va_size = SCARG(uap, length); error = VOP_SETATTR(vp, &vattr, p->p_ucred, p); } vput(vp); @@ -1759,8 +2052,12 @@ struct ftruncate_args { int ftruncate(p, uap, retval) struct proc *p; - register struct ftruncate_args *uap; - int *retval; + register struct ftruncate_args /* { + syscallarg(int) fd; + syscallarg(int) pad; + syscallarg(off_t) length; + } */ *uap; + register_t *retval; { struct vattr vattr; struct vnode *vp; @@ -1769,22 +2066,21 @@ ftruncate(p, uap, retval) if (uap->length < 0) return(EINVAL); - error = getvnode(p->p_fd, uap->fd, &fp); - if (error) + if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); if ((fp->f_flag & FWRITE) == 0) return (EINVAL); vp = (struct vnode *)fp->f_data; - LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE); - VOP_LOCK(vp); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_type == VDIR) error = EISDIR; else if ((error = vn_writechk(vp)) == 0) { VATTR_NULL(&vattr); - vattr.va_size = uap->length; + vattr.va_size = SCARG(uap, length); error = VOP_SETATTR(vp, &vattr, fp->f_cred, p); } - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (error); } @@ -1802,13 +2098,20 @@ struct otruncate_args { int otruncate(p, uap, retval) struct proc *p; - register struct otruncate_args *uap; - int *retval; + register struct otruncate_args /* { + syscallarg(char *) path; + syscallarg(long) length; + } */ *uap; + register_t *retval; { - struct truncate_args nuap; - - nuap.path = uap->path; - nuap.length = uap->length; + struct truncate_args /* { + syscallarg(char *) path; + syscallarg(int) pad; + syscallarg(off_t) length; + } */ nuap; + + SCARG(&nuap, path) = SCARG(uap, path); + SCARG(&nuap, length) = SCARG(uap, length); return (truncate(p, &nuap, retval)); } @@ -1825,13 +2128,20 @@ struct oftruncate_args { int oftruncate(p, uap, retval) struct proc *p; - register struct oftruncate_args *uap; - int *retval; + register struct oftruncate_args /* { + syscallarg(int) fd; + syscallarg(long) length; + } */ *uap; + register_t *retval; { - struct ftruncate_args nuap; - - nuap.fd = uap->fd; - nuap.length = uap->length; + struct ftruncate_args /* { + syscallarg(int) fd; + syscallarg(int) pad; + syscallarg(off_t) length; + } */ nuap; + + SCARG(&nuap, fd) = SCARG(uap, fd); + SCARG(&nuap, length) = SCARG(uap, length); return (ftruncate(p, &nuap, retval)); } #endif /* COMPAT_43 || COMPAT_SUNOS */ @@ -1848,24 +2158,25 @@ struct fsync_args { int fsync(p, uap, retval) struct proc *p; - struct fsync_args *uap; - int *retval; + struct fsync_args /* { + syscallarg(int) fd; + } */ *uap; + register_t *retval; { register struct vnode *vp; struct file *fp; int error; - error = getvnode(p->p_fd, uap->fd, &fp); - if (error) + if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); vp = (struct vnode *)fp->f_data; - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); if (vp->v_object) { vm_object_page_clean(vp->v_object, 0, 0 ,0, FALSE); } error = VOP_FSYNC(vp, fp->f_cred, (vp->v_mount->mnt_flag & MNT_ASYNC) ? MNT_NOWAIT : MNT_WAIT, p); - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (error); } @@ -1883,29 +2194,29 @@ struct rename_args { int rename(p, uap, retval) struct proc *p; - register struct rename_args *uap; - int *retval; + register struct rename_args /* { + syscallarg(char *) from; + syscallarg(char *) to; + } */ *uap; + register_t *retval; { register struct vnode *tvp, *fvp, *tdvp; struct nameidata fromnd, tond; int error; NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE, - uap->from, p); - error = namei(&fromnd); - if (error) + SCARG(uap, from), p); + if (error = namei(&fromnd)) return (error); fvp = fromnd.ni_vp; NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART, - UIO_USERSPACE, uap->to, p); + UIO_USERSPACE, SCARG(uap, to), p); if (fromnd.ni_vp->v_type == VDIR) tond.ni_cnd.cn_flags |= WILLBEDIR; - error = namei(&tond); - if (error) { + if (error = namei(&tond)) { /* Translate error code for rename("dir1", "dir2/."). */ if (error == EISDIR && fvp->v_type == VDIR) error = EINVAL; - VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); vrele(fromnd.ni_dvp); vrele(fvp); @@ -1936,13 +2247,12 @@ rename(p, uap, retval) error = -1; out: if (!error) { - LEASE_CHECK(tdvp, p, p->p_ucred, LEASE_WRITE); - if (fromnd.ni_dvp != tdvp) { - LEASE_CHECK(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - } + VOP_LEASE(tdvp, p, p->p_ucred, LEASE_WRITE); + if (fromnd.ni_dvp != tdvp) + VOP_LEASE(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE); if (tvp) { - LEASE_CHECK(tvp, p, p->p_ucred, LEASE_WRITE); - (void) vnode_pager_uncache(tvp); + VOP_LEASE(tvp, p, p->p_ucred, LEASE_WRITE); + (void) vnode_pager_uncache(tvp, p); } error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); @@ -1982,18 +2292,20 @@ struct mkdir_args { int mkdir(p, uap, retval) struct proc *p; - register struct mkdir_args *uap; - int *retval; + register struct mkdir_args /* { + syscallarg(char *) path; + syscallarg(int) mode; + } */ *uap; + register_t *retval; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; - NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->path, p); + NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p); nd.ni_cnd.cn_flags |= WILLBEDIR; - error = namei(&nd); - if (error) + if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (vp != NULL) { @@ -2007,8 +2319,8 @@ mkdir(p, uap, retval) } VATTR_NULL(&vattr); vattr.va_type = VDIR; - vattr.va_mode = (uap->mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; - LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); + vattr.va_mode = (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_fd->fd_cmask; + VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); if (!error) vput(nd.ni_vp); @@ -2027,16 +2339,18 @@ struct rmdir_args { int rmdir(p, uap, retval) struct proc *p; - struct rmdir_args *uap; - int *retval; + struct rmdir_args /* { + syscallarg(char *) path; + } */ *uap; + register_t *retval; { register struct vnode *vp; int error; struct nameidata nd; - NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE, + SCARG(uap, path), p); + if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (vp->v_type != VDIR) { @@ -2057,8 +2371,8 @@ rmdir(p, uap, retval) error = EBUSY; out: if (!error) { - LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE); + VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); + VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE); error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); @@ -2086,8 +2400,13 @@ struct ogetdirentries_args { int ogetdirentries(p, uap, retval) struct proc *p; - register struct ogetdirentries_args *uap; - int *retval; + register struct ogetdirentries_args /* { + syscallarg(int) fd; + syscallarg(char *) buf; + syscallarg(u_int) count; + syscallarg(long *) basep; + } */ *uap; + register_t *retval; { register struct vnode *vp; struct file *fp; @@ -2095,30 +2414,31 @@ ogetdirentries(p, uap, retval) struct iovec aiov, kiov; struct dirent *dp, *edp; caddr_t dirbuf; - int error, readcnt; + int error, eofflag, readcnt; long loff; - error = getvnode(p->p_fd, uap->fd, &fp); - if (error) + if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); if ((fp->f_flag & FREAD) == 0) return (EBADF); vp = (struct vnode *)fp->f_data; +unionread: if (vp->v_type != VDIR) return (EINVAL); - aiov.iov_base = uap->buf; - aiov.iov_len = uap->count; + aiov.iov_base = SCARG(uap, buf); + aiov.iov_len = SCARG(uap, count); auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_USERSPACE; auio.uio_procp = p; - auio.uio_resid = uap->count; - VOP_LOCK(vp); + auio.uio_resid = SCARG(uap, count); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); loff = auio.uio_offset = fp->f_offset; # if (BYTE_ORDER != LITTLE_ENDIAN) if (vp->v_mount->mnt_maxsymlinklen <= 0) { - error = VOP_READDIR(vp, &auio, fp->f_cred, NULL, NULL, NULL); + error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, + NULL, NULL); fp->f_offset = auio.uio_offset; } else # endif @@ -2126,13 +2446,14 @@ ogetdirentries(p, uap, retval) kuio = auio; kuio.uio_iov = &kiov; kuio.uio_segflg = UIO_SYSSPACE; - kiov.iov_len = uap->count; - MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK); + kiov.iov_len = SCARG(uap, count); + MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK); kiov.iov_base = dirbuf; - error = VOP_READDIR(vp, &kuio, fp->f_cred, NULL, NULL, NULL); + error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, + NULL, NULL); fp->f_offset = kuio.uio_offset; if (error == 0) { - readcnt = uap->count - kuio.uio_resid; + readcnt = SCARG(uap, count) - kuio.uio_resid; edp = (struct dirent *)&dirbuf[readcnt]; for (dp = (struct dirent *)dirbuf; dp < edp; ) { # if (BYTE_ORDER == LITTLE_ENDIAN) @@ -2165,14 +2486,70 @@ ogetdirentries(p, uap, retval) } FREE(dirbuf, M_TEMP); } - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); if (error) return (error); - error = copyout((caddr_t)&loff, (caddr_t)uap->basep, sizeof(long)); - *retval = uap->count - auio.uio_resid; + +#ifdef UNION +{ + extern int (**union_vnodeop_p)(); + extern struct vnode *union_dircache __P((struct vnode*, struct proc*)); + + if ((SCARG(uap, count) == auio.uio_resid) && + (vp->v_op == union_vnodeop_p)) { + struct vnode *lvp; + + lvp = union_dircache(vp, p); + if (lvp != NULLVP) { + struct vattr va; + + /* + * If the directory is opaque, + * then don't show lower entries + */ + error = VOP_GETATTR(vp, &va, fp->f_cred, p); + if (va.va_flags & OPAQUE) { + vput(lvp); + lvp = NULL; + } + } + + if (lvp != NULLVP) { + error = VOP_OPEN(lvp, FREAD, fp->f_cred, p); + if (error) { + vput(lvp); + return (error); + } + VOP_UNLOCK(lvp, 0, p); + fp->f_data = (caddr_t) lvp; + fp->f_offset = 0; + error = vn_close(vp, FREAD, fp->f_cred, p); + if (error) + return (error); + vp = lvp; + goto unionread; + } + } +} +#endif /* UNION */ + + if ((SCARG(uap, count) == auio.uio_resid) && + (vp->v_flag & VROOT) && + (vp->v_mount->mnt_flag & MNT_UNION)) { + struct vnode *tvp = vp; + vp = vp->v_mount->mnt_vnodecovered; + VREF(vp); + fp->f_data = (caddr_t) vp; + fp->f_offset = 0; + vrele(tvp); + goto unionread; + } + error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep), + sizeof(long)); + *retval = SCARG(uap, count) - auio.uio_resid; return (error); } -#endif +#endif /* COMPAT_43 */ /* * Read a block of directory entries in a file system independent format. @@ -2188,18 +2565,22 @@ struct getdirentries_args { int getdirentries(p, uap, retval) struct proc *p; - register struct getdirentries_args *uap; - int *retval; + register struct getdirentries_args /* { + syscallarg(int) fd; + syscallarg(char *) buf; + syscallarg(u_int) count; + syscallarg(long *) basep; + } */ *uap; + register_t *retval; { register struct vnode *vp; struct file *fp; struct uio auio; struct iovec aiov; long loff; - int error; + int error, eofflag; - error = getvnode(p->p_fd, uap->fd, &fp); - if (error) + if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) return (error); if ((fp->f_flag & FREAD) == 0) return (EBADF); @@ -2207,51 +2588,66 @@ getdirentries(p, uap, retval) unionread: if (vp->v_type != VDIR) return (EINVAL); - aiov.iov_base = uap->buf; - aiov.iov_len = uap->count; + aiov.iov_base = SCARG(uap, buf); + aiov.iov_len = SCARG(uap, count); auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_USERSPACE; auio.uio_procp = p; - auio.uio_resid = uap->count; - VOP_LOCK(vp); + auio.uio_resid = SCARG(uap, count); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); loff = auio.uio_offset = fp->f_offset; - error = VOP_READDIR(vp, &auio, fp->f_cred, NULL, NULL, NULL); + error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); fp->f_offset = auio.uio_offset; - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); if (error) return (error); #ifdef UNION { - if ((uap->count == auio.uio_resid) && + extern int (**union_vnodeop_p)(); + extern struct vnode *union_dircache __P((struct vnode*, struct proc*)); + + if ((SCARG(uap, count) == auio.uio_resid) && (vp->v_op == union_vnodeop_p)) { - struct vnode *tvp = vp; + struct vnode *lvp; + + lvp = union_dircache(vp, p); + if (lvp != NULLVP) { + struct vattr va; - vp = union_lowervp(vp); - if (vp != NULLVP) { - VOP_LOCK(vp); - error = VOP_OPEN(vp, FREAD, fp->f_cred, p); - VOP_UNLOCK(vp); + /* + * If the directory is opaque, + * then don't show lower entries + */ + error = VOP_GETATTR(vp, &va, fp->f_cred, p); + if (va.va_flags & OPAQUE) { + vput(lvp); + lvp = NULL; + } + } + if (lvp != NULLVP) { + error = VOP_OPEN(lvp, FREAD, fp->f_cred, p); if (error) { - vrele(vp); + vput(lvp); return (error); } - fp->f_data = (caddr_t) vp; + VOP_UNLOCK(lvp, 0, p); + fp->f_data = (caddr_t) lvp; fp->f_offset = 0; - error = vn_close(tvp, FREAD, fp->f_cred, p); + error = vn_close(vp, FREAD, fp->f_cred, p); if (error) return (error); + vp = lvp; goto unionread; } } } -#endif +#endif /* UNION */ - if ((uap->count == auio.uio_resid) && - vp && + if ((SCARG(uap, count) == auio.uio_resid) && (vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) { struct vnode *tvp = vp; @@ -2262,8 +2658,9 @@ unionread: vrele(tvp); goto unionread; } - error = copyout((caddr_t)&loff, (caddr_t)uap->basep, sizeof(long)); - *retval = uap->count - auio.uio_resid; + error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep), + sizeof(long)); + *retval = SCARG(uap, count) - auio.uio_resid; return (error); } @@ -2275,17 +2672,19 @@ struct umask_args { int newmask; }; #endif -mode_t /* XXX */ +int umask(p, uap, retval) struct proc *p; - struct umask_args *uap; - int *retval; + struct umask_args /* { + syscallarg(int) newmask; + } */ *uap; + int *retval; /* XXX */ { register struct filedesc *fdp; fdp = p->p_fd; *retval = fdp->fd_cmask; - fdp->fd_cmask = uap->newmask & ALLPERMS; + fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS; return (0); } @@ -2302,31 +2701,27 @@ struct revoke_args { int revoke(p, uap, retval) struct proc *p; - register struct revoke_args *uap; - int *retval; + register struct revoke_args /* { + syscallarg(char *) path; + } */ *uap; + register_t *retval; { register struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p); - error = namei(&nd); - if (error) + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + if (error = namei(&nd)) return (error); vp = nd.ni_vp; - if (vp->v_type != VCHR && vp->v_type != VBLK) { - error = EINVAL; - goto out; - } - error = VOP_GETATTR(vp, &vattr, p->p_ucred, p); - if (error) + if (error = VOP_GETATTR(vp, &vattr, p->p_ucred, p)) goto out; if (p->p_ucred->cr_uid != vattr.va_uid && (error = suser(p->p_ucred, &p->p_acflag))) goto out; if (vp->v_usecount > 1 || (vp->v_flag & VALIASED)) - vgoneall(vp); + VOP_REVOKE(vp, REVOKEALL); out: vrele(vp); return (error); diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index 0ba5c45..98842c6 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -96,10 +96,11 @@ vn_open(ndp, fmode, cmode) VATTR_NULL(vap); vap->va_type = VREG; vap->va_mode = cmode; - LEASE_CHECK(ndp->ni_dvp, p, cred, LEASE_WRITE); - error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, - &ndp->ni_cnd, vap); - if (error) + if (fmode & O_EXCL) + vap->va_vaflags |= VA_EXCLUSIVE; + VOP_LEASE(ndp->ni_dvp, p, cred, LEASE_WRITE); + if (error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp, + &ndp->ni_cnd, vap)) return (error); fmode &= ~O_TRUNC; vp = ndp->ni_vp; @@ -149,9 +150,9 @@ vn_open(ndp, fmode, cmode) } } if (fmode & O_TRUNC) { - VOP_UNLOCK(vp); /* XXX */ - LEASE_CHECK(vp, p, cred, LEASE_WRITE); - VOP_LOCK(vp); /* XXX */ + VOP_UNLOCK(vp, 0, p); /* XXX */ + VOP_LEASE(vp, p, cred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ VATTR_NULL(vap); vap->va_size = 0; error = VOP_SETATTR(vp, vap, cred, p); @@ -179,8 +180,7 @@ bad: /* * Check for write permissions on the specified vnode. - * The read-only status of the file system is checked. - * Also, prototype text segments cannot be written. + * Prototype text segments cannot be written. */ int vn_writechk(vp) @@ -237,7 +237,7 @@ vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p) int error; if ((ioflg & IO_NODELOCKED) == 0) - VOP_LOCK(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); auio.uio_iov = &aiov; auio.uio_iovcnt = 1; aiov.iov_base = base; @@ -258,7 +258,7 @@ vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p) if (auio.uio_resid && error == 0) error = EIO; if ((ioflg & IO_NODELOCKED) == 0) - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (error); } @@ -271,12 +271,13 @@ vn_read(fp, uio, cred) struct uio *uio; struct ucred *cred; { - register struct vnode *vp = (struct vnode *)fp->f_data; + struct vnode *vp = (struct vnode *)fp->f_data; + struct proc *p = uio->uio_procp; int count, error; int flag, seq; - LEASE_CHECK(vp, uio->uio_procp, cred, LEASE_READ); - VOP_LOCK(vp); + VOP_LEASE(vp, p, cred, LEASE_READ); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); uio->uio_offset = fp->f_offset; count = uio->uio_resid; flag = 0; @@ -313,7 +314,7 @@ vn_read(fp, uio, cred) error = VOP_READ(vp, uio, flag, cred); fp->f_offset += count - uio->uio_resid; fp->f_nextread = fp->f_offset; - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (error); } @@ -326,15 +327,19 @@ vn_write(fp, uio, cred) struct uio *uio; struct ucred *cred; { - register struct vnode *vp = (struct vnode *)fp->f_data; - int count, error, ioflag = 0; + struct vnode *vp = (struct vnode *)fp->f_data; + struct proc *p = uio->uio_procp; + int count, error, ioflag = IO_UNIT; if (vp->v_type == VREG && (fp->f_flag & O_APPEND)) ioflag |= IO_APPEND; if (fp->f_flag & FNONBLOCK) ioflag |= IO_NDELAY; - LEASE_CHECK(vp, uio->uio_procp, cred, LEASE_WRITE); - VOP_LOCK(vp); + if ((fp->f_flag & O_FSYNC) || + (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) + ioflag |= IO_SYNC; + VOP_LEASE(vp, p, cred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); uio->uio_offset = fp->f_offset; count = uio->uio_resid; error = VOP_WRITE(vp, uio, ioflag, cred); @@ -342,7 +347,7 @@ vn_write(fp, uio, cred) fp->f_offset = uio->uio_offset; else fp->f_offset += count - uio->uio_resid; - VOP_UNLOCK(vp); + VOP_UNLOCK(vp, 0, p); return (error); } @@ -402,7 +407,7 @@ vn_stat(vp, sb, p) sb->st_rdev = vap->va_rdev; sb->st_size = vap->va_size; sb->st_atimespec = vap->va_atime; - sb->st_mtimespec= vap->va_mtime; + sb->st_mtimespec = vap->va_mtime; sb->st_ctimespec = vap->va_ctime; sb->st_blksize = vap->va_blocksize; sb->st_flags = vap->va_flags; @@ -495,3 +500,34 @@ vn_closefile(fp, p) return (vn_close(((struct vnode *)fp->f_data), fp->f_flag, fp->f_cred, p)); } + +/* + * Check that the vnode is still valid, and if so + * acquire requested lock. + */ +int +vn_lock(vp, flags, p) + struct vnode *vp; + int flags; + struct proc *p; +{ + int error; + + do { + if ((flags & LK_INTERLOCK) == 0) { + simple_lock(&vp->v_interlock); + } + if (vp->v_flag & VXLOCK) { + vp->v_flag |= VXWANT; + simple_unlock(&vp->v_interlock); + tsleep((caddr_t)vp, PINOD, "vn_lock", 0); + error = ENOENT; + } else { + error = VOP_LOCK(vp, flags | LK_INTERLOCK, p); + if (error == 0) + return (error); + } + flags &= ~LK_INTERLOCK; + } while (flags & LK_RETRY); + return (error); +} diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src index a8fb13b..7e3338f 100644 --- a/sys/kern/vnode_if.src +++ b/sys/kern/vnode_if.src @@ -30,8 +30,33 @@ # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # -# @(#)vnode_if.src 8.3 (Berkeley) 2/3/94 -# $FreeBSD$ +# @(#)vnode_if.src 8.12 (Berkeley) 5/14/95 +# $Id: vnode_if.src,v 1.9.2000.1 1996/09/17 14:32:01 peter Exp $ +# + +# +# Above each of the vop descriptors is a specification of the locking +# protocol used by each vop call. The first column is the name of +# the variable, the remaining three columns are in, out and error +# respectively. The "in" column defines the lock state on input, +# the "out" column defines the state on succesful return, and the +# "error" column defines the locking state on error exit. +# +# The locking value can take the following values: +# L: locked. +# U: unlocked/ +# -: not applicable. vnode does not yet (or no longer) exists. +# =: the same on input and output, may be either L or U. +# X: locked if not nil. +# + +# +#% lookup dvp L ? ? +#% lookup vpp - L - +# +# XXX - the lookup locking protocol defies simple description and depends +# on the flags and operation fields in the (cnp) structure. Note +# especially that *vpp may equal dvp and both may be locked. # vop_lookup { IN struct vnode *dvp; @@ -39,6 +64,10 @@ vop_lookup { IN struct componentname *cnp; }; +# +#% create dvp L U U +#% create vpp - L - +# vop_create { IN WILLRELE struct vnode *dvp; OUT struct vnode **vpp; @@ -46,6 +75,21 @@ vop_create { IN struct vattr *vap; }; +# +#% whiteout dvp L L L +#% whiteout cnp - - - +#% whiteout flag - - - +# +vop_whiteout { + IN WILLRELE struct vnode *dvp; + IN struct componentname *cnp; + IN int flags; +}; + +# +#% mknod dvp L U U +#% mknod vpp - X - +# vop_mknod { IN WILLRELE struct vnode *dvp; OUT WILLRELE struct vnode **vpp; @@ -53,6 +97,9 @@ vop_mknod { IN struct vattr *vap; }; +# +#% open vp L L L +# vop_open { IN struct vnode *vp; IN int mode; @@ -60,6 +107,9 @@ vop_open { IN struct proc *p; }; +# +#% close vp U U U +# vop_close { IN struct vnode *vp; IN int fflag; @@ -67,6 +117,9 @@ vop_close { IN struct proc *p; }; +# +#% access vp L L L +# vop_access { IN struct vnode *vp; IN int mode; @@ -74,6 +127,9 @@ vop_access { IN struct proc *p; }; +# +#% getattr vp = = = +# vop_getattr { IN struct vnode *vp; IN struct vattr *vap; @@ -81,6 +137,9 @@ vop_getattr { IN struct proc *p; }; +# +#% setattr vp L L L +# vop_setattr { IN struct vnode *vp; IN struct vattr *vap; @@ -88,6 +147,9 @@ vop_setattr { IN struct proc *p; }; +# +#% read vp L L L +# vop_read { IN struct vnode *vp; INOUT struct uio *uio; @@ -95,6 +157,9 @@ vop_read { IN struct ucred *cred; }; +# +#% write vp L L L +# vop_write { IN struct vnode *vp; INOUT struct uio *uio; @@ -102,16 +167,33 @@ vop_write { IN struct ucred *cred; }; +# +#% lease vp = = = +# +vop_lease { + IN struct vnode *vp; + IN struct proc *p; + IN struct ucred *cred; + IN int flag; +}; + +# +#% ioctl vp U U U +# vop_ioctl { IN struct vnode *vp; - IN int command; + IN u_long command; IN caddr_t data; IN int fflag; IN struct ucred *cred; IN struct proc *p; }; +# +#% select vp U U U +# # Needs work? (fflags) +# vop_select { IN struct vnode *vp; IN int which; @@ -120,6 +202,17 @@ vop_select { IN struct proc *p; }; +# +#% revoke vp U U U +# +vop_revoke { + IN struct vnode *vp; + IN int flags; +}; + +# +# XXX - not used +# vop_mmap { IN struct vnode *vp; IN int fflags; @@ -127,6 +220,9 @@ vop_mmap { IN struct proc *p; }; +# +#% fsync vp L L L +# vop_fsync { IN struct vnode *vp; IN struct ucred *cred; @@ -134,7 +230,10 @@ vop_fsync { IN struct proc *p; }; -# Needs word: Is newoff right? What's it mean? +# +# XXX - not used +# Needs work: Is newoff right? What's it mean? +# vop_seek { IN struct vnode *vp; IN off_t oldoff; @@ -142,18 +241,32 @@ vop_seek { IN struct ucred *cred; }; +# +#% remove dvp L U U +#% remove vp L U U +# vop_remove { IN WILLRELE struct vnode *dvp; IN WILLRELE struct vnode *vp; IN struct componentname *cnp; }; +# +#% link vp U U U +#% link tdvp L U U +# vop_link { IN WILLRELE struct vnode *tdvp; IN struct vnode *vp; IN struct componentname *cnp; }; +# +#% rename fdvp U U U +#% rename fvp U U U +#% rename tdvp L U U +#% rename tvp X U U +# vop_rename { IN WILLRELE struct vnode *fdvp; IN WILLRELE struct vnode *fvp; @@ -163,6 +276,10 @@ vop_rename { IN struct componentname *tcnp; }; +# +#% mkdir dvp L U U +#% mkdir vpp - L - +# vop_mkdir { IN WILLRELE struct vnode *dvp; OUT struct vnode **vpp; @@ -170,12 +287,24 @@ vop_mkdir { IN struct vattr *vap; }; +# +#% rmdir dvp L U U +#% rmdir vp L U U +# vop_rmdir { IN WILLRELE struct vnode *dvp; IN WILLRELE struct vnode *vp; IN struct componentname *cnp; }; +# +#% symlink dvp L U U +#% symlink vpp - U - +# +# XXX - note that the return vnode has already been VRELE'ed +# by the filesystem layer. To use it you must use vget, +# possibly with a further namei. +# vop_symlink { IN WILLRELE struct vnode *dvp; OUT WILLRELE struct vnode **vpp; @@ -184,42 +313,73 @@ vop_symlink { IN char *target; }; +# +#% readdir vp L L L +# vop_readdir { IN struct vnode *vp; INOUT struct uio *uio; IN struct ucred *cred; INOUT int *eofflag; - INOUT int *ncookies; - INOUT u_int **cookies; + OUT int *ncookies; + INOUT u_long **cookies; }; +# +#% readlink vp L L L +# vop_readlink { IN struct vnode *vp; INOUT struct uio *uio; IN struct ucred *cred; }; +# +#% abortop dvp = = = +# vop_abortop { IN struct vnode *dvp; IN struct componentname *cnp; }; +# +#% inactive vp L U U +# vop_inactive { IN struct vnode *vp; + IN struct proc *p; }; +# +#% reclaim vp U U U +# vop_reclaim { IN struct vnode *vp; + IN struct proc *p; }; +# +#% lock vp U L U +# vop_lock { IN struct vnode *vp; + IN int flags; + IN struct proc *p; }; +# +#% unlock vp L U L +# vop_unlock { IN struct vnode *vp; + IN int flags; + IN struct proc *p; }; +# +#% bmap vp L L L +#% bmap vpp - U - +# vop_bmap { IN struct vnode *vp; IN daddr_t bn; @@ -229,24 +389,39 @@ vop_bmap { OUT int *runb; }; +# +# Needs work: no vp? +# #vop_strategy { # IN struct buf *bp; #}; +# +#% print vp = = = +# vop_print { IN struct vnode *vp; }; +# +#% islocked vp = = = +# vop_islocked { IN struct vnode *vp; }; +# +#% pathconf vp L L L +# vop_pathconf { IN struct vnode *vp; IN int name; - OUT int *retval; + OUT register_t *retval; }; +# +#% advlock vp U U U +# vop_advlock { IN struct vnode *vp; IN caddr_t id; @@ -255,6 +430,9 @@ vop_advlock { IN int flags; }; +# +#% blkatoff vp L L L +# vop_blkatoff { IN struct vnode *vp; IN off_t offset; @@ -262,6 +440,9 @@ vop_blkatoff { OUT struct buf **bpp; }; +# +#% valloc pvp L L L +# vop_valloc { IN struct vnode *pvp; IN int mode; @@ -269,17 +450,26 @@ vop_valloc { OUT struct vnode **vpp; }; +# +#% reallocblks vp L L L +# vop_reallocblks { IN struct vnode *vp; IN struct cluster_save *buflist; }; +# +#% vfree pvp L L L +# vop_vfree { IN struct vnode *pvp; IN ino_t ino; IN int mode; }; +# +#% truncate vp L L L +# vop_truncate { IN struct vnode *vp; IN off_t length; @@ -288,6 +478,9 @@ vop_truncate { IN struct proc *p; }; +# +#% update vp L L L +# vop_update { IN struct vnode *vp; IN struct timeval *access; @@ -312,7 +505,9 @@ vop_putpages { IN vm_ooffset_t offset; }; +# # Needs work: no vp? +# #vop_bwrite { # IN struct buf *bp; #}; |