diff options
author | phk <phk@FreeBSD.org> | 2004-12-06 08:31:32 +0000 |
---|---|---|
committer | phk <phk@FreeBSD.org> | 2004-12-06 08:31:32 +0000 |
commit | 753d615ec01e09d4509ce777a880341ea6feb39d (patch) | |
tree | 93ace8ca23553123c0dfd5066b9419aea9adb71e /sys/nfsclient | |
parent | b62f254b426d1e3871d394d61c4ae1997fca0a70 (diff) | |
download | FreeBSD-src-753d615ec01e09d4509ce777a880341ea6feb39d.zip FreeBSD-src-753d615ec01e09d4509ce777a880341ea6feb39d.tar.gz |
For reasons unknown, the nfs locking code used a fifo to send requests to
userland and a dedicated system call to get replies.
The vnode-bypass of fifos broke this into a panic.
Ditch all the magic and create a device /dev/nfslock instead, and
use that for both directions apart from the shorter path, this is
also faster because the device driver runs Giant free using the
vnode bypass.
Noticed by: marcel
Diffstat (limited to 'sys/nfsclient')
-rw-r--r-- | sys/nfsclient/nfs.h | 4 | ||||
-rw-r--r-- | sys/nfsclient/nfs_lock.c | 214 | ||||
-rw-r--r-- | sys/nfsclient/nfs_lock.h | 6 | ||||
-rw-r--r-- | sys/nfsclient/nfs_nfsiod.c | 13 | ||||
-rw-r--r-- | sys/nfsclient/nfs_subs.c | 10 |
5 files changed, 159 insertions, 88 deletions
diff --git a/sys/nfsclient/nfs.h b/sys/nfsclient/nfs.h index 6be9001..7e79980 100644 --- a/sys/nfsclient/nfs.h +++ b/sys/nfsclient/nfs.h @@ -101,10 +101,6 @@ #ifdef NFS_NPROCS #include <nfsclient/nfsstats.h> #endif -/* - * Flags for nfsclnt() system call. - */ -#define NFSCLNT_LOCKDANS 0x200 /* * vfs.nfs sysctl(3) identifiers diff --git a/sys/nfsclient/nfs_lock.c b/sys/nfsclient/nfs_lock.c index 88b9609..e23fd7a 100644 --- a/sys/nfsclient/nfs_lock.c +++ b/sys/nfsclient/nfs_lock.c @@ -33,6 +33,7 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> +#include <sys/conf.h> #include <sys/fcntl.h> #include <sys/kernel.h> /* for hz */ #include <sys/limits.h> @@ -61,6 +62,150 @@ __FBSDID("$FreeBSD$"); #include <nfsclient/nfs_lock.h> #include <nfsclient/nlminfo.h> +MALLOC_DEFINE(M_NFSLOCK, "NFS lock", "NFS lock request"); + +static int nfslockdans(struct thread *td, struct lockd_ans *ansp); +/* + * -------------------------------------------------------------------- + * A miniature device driver which the userland uses to talk to us. + * + */ + +static struct cdev *nfslock_dev; +static struct mtx nfslock_mtx; +static int nfslock_isopen; +static TAILQ_HEAD(,__lock_msg) nfslock_list; + +static int +nfslock_open(struct cdev *dev, int oflags, int devtype, struct thread *td) +{ + int error; + + mtx_lock(&nfslock_mtx); + if (!nfslock_isopen) { + error = 0; + nfslock_isopen = 1; + } else { + error = EOPNOTSUPP; + } + mtx_unlock(&nfslock_mtx); + + return (error); +} + +static int +nfslock_close(struct cdev *dev, int fflag, int devtype, struct thread *td) +{ + struct __lock_msg *lm; + + mtx_lock(&nfslock_mtx); + nfslock_isopen = 0; + while (!TAILQ_EMPTY(&nfslock_list)) { + lm = TAILQ_FIRST(&nfslock_list); + /* XXX: answer request */ + TAILQ_REMOVE(&nfslock_list, lm, lm_link); + free(lm, M_NFSLOCK); + } + mtx_unlock(&nfslock_mtx); + return (0); +} + +static int +nfslock_read(struct cdev *dev, struct uio *uio, int ioflag) +{ + int error; + struct __lock_msg *lm; + + if (uio->uio_resid != sizeof *lm) + return (EOPNOTSUPP); + lm = NULL; + error = 0; + mtx_lock(&nfslock_mtx); + while (TAILQ_EMPTY(&nfslock_list)) { + error = msleep(&nfslock_list, &nfslock_mtx, PSOCK | PCATCH, + "nfslockd", 0); + if (error) + break; + } + if (!error) { + lm = TAILQ_FIRST(&nfslock_list); + TAILQ_REMOVE(&nfslock_list, lm, lm_link); + } + mtx_unlock(&nfslock_mtx); + if (!error) { + error = uiomove(lm, sizeof *lm, uio); + free(lm, M_NFSLOCK); + } + return (error); +} + +static int +nfslock_write(struct cdev *dev, struct uio *uio, int ioflag) +{ + struct lockd_ans la; + int error; + + if (uio->uio_resid != sizeof la) + return (EOPNOTSUPP); + error = uiomove(&la, sizeof la, uio); + if (!error) + error = nfslockdans(curthread, &la); + return (error); +} + +static int +nfslock_send(struct __lock_msg *lm) +{ + struct __lock_msg *lm2; + int error; + + error = 0; + lm2 = malloc(sizeof *lm2, M_NFSLOCK, M_WAITOK); + mtx_lock(&nfslock_mtx); + if (nfslock_isopen) { + memcpy(lm2, lm, sizeof *lm2); + TAILQ_INSERT_TAIL(&nfslock_list, lm2, lm_link); + wakeup(&nfslock_list); + } else { + error = EOPNOTSUPP; + } + mtx_unlock(&nfslock_mtx); + if (error) + free(lm2, M_NFSLOCK); + return (error); +} + +static struct cdevsw nfslock_cdevsw = { + .d_version = D_VERSION, + .d_open = nfslock_open, + .d_close = nfslock_close, + .d_read = nfslock_read, + .d_write = nfslock_write, + .d_name = "nfslock" +}; + +static int +nfslock_modevent(module_t mod __unused, int type, void *data __unused) +{ + + switch (type) { + case MOD_LOAD: + if (bootverbose) + printf("nfslock: pseudo-device\n"); + mtx_init(&nfslock_mtx, "nfslock", NULL, MTX_DEF); + TAILQ_INIT(&nfslock_list); + nfslock_dev = make_dev(&nfslock_cdevsw, 0, + UID_ROOT, GID_KMEM, 0600, _PATH_NFSLCKDEV); + return (0); + default: + return (EOPNOTSUPP); + } +} + +DEV_MODULE(nfslock, nfslock_modevent, NULL); +MODULE_VERSION(nfslock, 1); + + /* * XXX * We have to let the process know if the call succeeded. I'm using an extra @@ -76,12 +221,11 @@ int nfs_dolock(struct vop_advlock_args *ap) { LOCKD_MSG msg; - struct nameidata nd; struct thread *td; - struct vnode *vp, *wvp; - int error, error1; + struct vnode *vp; + int error; struct flock *fl; - int fmode, ioflg; + int ioflg; struct proc *p; td = curthread; @@ -132,59 +276,14 @@ nfs_dolock(struct vop_advlock_args *ap) msg.lm_nfsv3 = NFS_ISV3(vp); cru2x(td->td_ucred, &msg.lm_cred); - /* - * Open the lock fifo. If for any reason we don't find the fifo, it - * means that the lock daemon isn't running. Translate any missing - * file error message for the user, otherwise the application will - * complain that the user's file is missing, which isn't the case. - * Note that we use proc0's cred, so the fifo is opened as root. - * - * XXX: Note that this behavior is relative to the root directory - * of the current process, and this may result in a variety of - * {functional, security} problems in chroot() environments. - */ - NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, _PATH_LCKFIFO, td); - - fmode = FFLAGS(O_WRONLY | O_NONBLOCK); - error = vn_open_cred(&nd, &fmode, 0, thread0.td_ucred, -1); - switch (error) { - case ENOENT: - case ENXIO: - /* - * Map a failure to find the fifo or no listener on the - * fifo to locking not being supported. - */ - return (EOPNOTSUPP); - case 0: - break; - default: - return (error); - } - wvp = nd.ni_vp; - VOP_UNLOCK(wvp, 0, td); /* vn_open leaves it locked */ - - - ioflg = IO_UNIT | IO_NOMACCHECK; for (;;) { - VOP_LEASE(wvp, td, thread0.td_ucred, LEASE_WRITE); - - error = vn_rdwr(UIO_WRITE, wvp, (caddr_t)&msg, sizeof(msg), 0, - UIO_SYSSPACE, ioflg, thread0.td_ucred, NOCRED, NULL, td); + error = nfslock_send(&msg); + if (error) + return (error); - if (error && (((ioflg & IO_NDELAY) == 0) || error != EAGAIN)) { - break; - } - /* - * If we're locking a file, wait for an answer. Unlocks succeed - * immediately. - */ + /* Unlocks succeed immediately. */ if (fl->l_type == F_UNLCK) - /* - * XXX this isn't exactly correct. The client side - * needs to continue sending it's unlock until - * it gets a responce back. - */ - break; + return (error); /* * retry after 20 seconds if we haven't gotten a responce yet. @@ -227,16 +326,14 @@ nfs_dolock(struct vop_advlock_args *ap) break; } - error1 = vn_close(wvp, FWRITE, thread0.td_ucred, td); - /* prefer any previous 'error' to our vn_close 'error1'. */ - return (error != 0 ? error : error1); + return (error); } /* * nfslockdans -- * NFS advisory byte-level locks answer from the lock daemon. */ -int +static int nfslockdans(struct thread *td, struct lockd_ans *ansp) { struct proc *targetp; @@ -283,3 +380,4 @@ nfslockdans(struct thread *td, struct lockd_ans *ansp) PROC_UNLOCK(targetp); return (0); } + diff --git a/sys/nfsclient/nfs_lock.h b/sys/nfsclient/nfs_lock.h index e4bb794..b49d12c 100644 --- a/sys/nfsclient/nfs_lock.h +++ b/sys/nfsclient/nfs_lock.h @@ -44,7 +44,7 @@ * and where lockd reads these requests. * */ -#define _PATH_LCKFIFO "/var/run/lock" +#define _PATH_NFSLCKDEV "nfslock" /* * This structure is used to uniquely identify the process which originated @@ -58,12 +58,13 @@ struct lockd_msg_ident { int msg_seq; /* Sequence number of message */ }; -#define LOCKD_MSG_VERSION 2 +#define LOCKD_MSG_VERSION 3 /* * The structure that the kernel hands us for each lock request. */ typedef struct __lock_msg { + TAILQ_ENTRY(__lock_msg) lm_link; /* internal linkage */ int lm_version; /* which version is this */ struct lockd_msg_ident lm_msg_ident; /* originator of the message */ struct flock lm_fl; /* The lock request. */ @@ -88,5 +89,4 @@ struct lockd_ans { #ifdef _KERNEL int nfs_dolock(struct vop_advlock_args *ap); -int nfslockdans(struct thread *td, struct lockd_ans *ansp); #endif diff --git a/sys/nfsclient/nfs_nfsiod.c b/sys/nfsclient/nfs_nfsiod.c index d8d5913..d1ca197 100644 --- a/sys/nfsclient/nfs_nfsiod.c +++ b/sys/nfsclient/nfs_nfsiod.c @@ -198,19 +198,6 @@ SYSINIT(nfsiod, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, nfsiod_setup, NULL); static int nfs_defect = 0; SYSCTL_INT(_vfs_nfs, OID_AUTO, defect, CTLFLAG_RW, &nfs_defect, 0, ""); -int -nfsclnt(struct thread *td, struct nfsclnt_args *uap) -{ - struct lockd_ans la; - int error; - - if ((uap->flag & NFSCLNT_LOCKDANS) != 0) { - error = copyin(uap->argp, &la, sizeof(la)); - return (error != 0 ? error : nfslockdans(td, &la)); - } - return EINVAL; -} - /* * Asynchronous I/O daemons for client nfs. * They do read-ahead and write-behind operations on the block I/O cache. diff --git a/sys/nfsclient/nfs_subs.c b/sys/nfsclient/nfs_subs.c index 986c74d..8467b76 100644 --- a/sys/nfsclient/nfs_subs.c +++ b/sys/nfsclient/nfs_subs.c @@ -96,9 +96,6 @@ int nfs_pbuf_freecnt = -1; /* start out unlimited */ struct nfs_reqq nfs_reqq; struct nfs_bufq nfs_bufq; -static int nfs_prev_nfsclnt_sy_narg; -static sy_call_t *nfs_prev_nfsclnt_sy_call; - /* * and the reverse mapping from generic to Version 2 procedure numbers */ @@ -416,11 +413,6 @@ nfs_init(struct vfsconf *vfsp) TAILQ_INIT(&nfs_reqq); callout_init(&nfs_callout, 0); - nfs_prev_nfsclnt_sy_narg = sysent[SYS_nfsclnt].sy_narg; - sysent[SYS_nfsclnt].sy_narg = 2; - nfs_prev_nfsclnt_sy_call = sysent[SYS_nfsclnt].sy_call; - sysent[SYS_nfsclnt].sy_call = (sy_call_t *)nfsclnt; - nfs_pbuf_freecnt = nswbuf / 2 + 1; return (0); @@ -432,8 +424,6 @@ nfs_uninit(struct vfsconf *vfsp) int i; callout_stop(&nfs_callout); - sysent[SYS_nfsclnt].sy_narg = nfs_prev_nfsclnt_sy_narg; - sysent[SYS_nfsclnt].sy_call = nfs_prev_nfsclnt_sy_call; KASSERT(TAILQ_EMPTY(&nfs_reqq), ("nfs_uninit: request queue not empty")); |