summaryrefslogtreecommitdiffstats
path: root/sys/nfsclient
diff options
context:
space:
mode:
authorphk <phk@FreeBSD.org>2004-12-06 08:31:32 +0000
committerphk <phk@FreeBSD.org>2004-12-06 08:31:32 +0000
commit753d615ec01e09d4509ce777a880341ea6feb39d (patch)
tree93ace8ca23553123c0dfd5066b9419aea9adb71e /sys/nfsclient
parentb62f254b426d1e3871d394d61c4ae1997fca0a70 (diff)
downloadFreeBSD-src-753d615ec01e09d4509ce777a880341ea6feb39d.zip
FreeBSD-src-753d615ec01e09d4509ce777a880341ea6feb39d.tar.gz
For reasons unknown, the nfs locking code used a fifo to send requests to
userland and a dedicated system call to get replies. The vnode-bypass of fifos broke this into a panic. Ditch all the magic and create a device /dev/nfslock instead, and use that for both directions apart from the shorter path, this is also faster because the device driver runs Giant free using the vnode bypass. Noticed by: marcel
Diffstat (limited to 'sys/nfsclient')
-rw-r--r--sys/nfsclient/nfs.h4
-rw-r--r--sys/nfsclient/nfs_lock.c214
-rw-r--r--sys/nfsclient/nfs_lock.h6
-rw-r--r--sys/nfsclient/nfs_nfsiod.c13
-rw-r--r--sys/nfsclient/nfs_subs.c10
5 files changed, 159 insertions, 88 deletions
diff --git a/sys/nfsclient/nfs.h b/sys/nfsclient/nfs.h
index 6be9001..7e79980 100644
--- a/sys/nfsclient/nfs.h
+++ b/sys/nfsclient/nfs.h
@@ -101,10 +101,6 @@
#ifdef NFS_NPROCS
#include <nfsclient/nfsstats.h>
#endif
-/*
- * Flags for nfsclnt() system call.
- */
-#define NFSCLNT_LOCKDANS 0x200
/*
* vfs.nfs sysctl(3) identifiers
diff --git a/sys/nfsclient/nfs_lock.c b/sys/nfsclient/nfs_lock.c
index 88b9609..e23fd7a 100644
--- a/sys/nfsclient/nfs_lock.c
+++ b/sys/nfsclient/nfs_lock.c
@@ -33,6 +33,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/conf.h>
#include <sys/fcntl.h>
#include <sys/kernel.h> /* for hz */
#include <sys/limits.h>
@@ -61,6 +62,150 @@ __FBSDID("$FreeBSD$");
#include <nfsclient/nfs_lock.h>
#include <nfsclient/nlminfo.h>
+MALLOC_DEFINE(M_NFSLOCK, "NFS lock", "NFS lock request");
+
+static int nfslockdans(struct thread *td, struct lockd_ans *ansp);
+/*
+ * --------------------------------------------------------------------
+ * A miniature device driver which the userland uses to talk to us.
+ *
+ */
+
+static struct cdev *nfslock_dev;
+static struct mtx nfslock_mtx;
+static int nfslock_isopen;
+static TAILQ_HEAD(,__lock_msg) nfslock_list;
+
+static int
+nfslock_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
+{
+ int error;
+
+ mtx_lock(&nfslock_mtx);
+ if (!nfslock_isopen) {
+ error = 0;
+ nfslock_isopen = 1;
+ } else {
+ error = EOPNOTSUPP;
+ }
+ mtx_unlock(&nfslock_mtx);
+
+ return (error);
+}
+
+static int
+nfslock_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
+{
+ struct __lock_msg *lm;
+
+ mtx_lock(&nfslock_mtx);
+ nfslock_isopen = 0;
+ while (!TAILQ_EMPTY(&nfslock_list)) {
+ lm = TAILQ_FIRST(&nfslock_list);
+ /* XXX: answer request */
+ TAILQ_REMOVE(&nfslock_list, lm, lm_link);
+ free(lm, M_NFSLOCK);
+ }
+ mtx_unlock(&nfslock_mtx);
+ return (0);
+}
+
+static int
+nfslock_read(struct cdev *dev, struct uio *uio, int ioflag)
+{
+ int error;
+ struct __lock_msg *lm;
+
+ if (uio->uio_resid != sizeof *lm)
+ return (EOPNOTSUPP);
+ lm = NULL;
+ error = 0;
+ mtx_lock(&nfslock_mtx);
+ while (TAILQ_EMPTY(&nfslock_list)) {
+ error = msleep(&nfslock_list, &nfslock_mtx, PSOCK | PCATCH,
+ "nfslockd", 0);
+ if (error)
+ break;
+ }
+ if (!error) {
+ lm = TAILQ_FIRST(&nfslock_list);
+ TAILQ_REMOVE(&nfslock_list, lm, lm_link);
+ }
+ mtx_unlock(&nfslock_mtx);
+ if (!error) {
+ error = uiomove(lm, sizeof *lm, uio);
+ free(lm, M_NFSLOCK);
+ }
+ return (error);
+}
+
+static int
+nfslock_write(struct cdev *dev, struct uio *uio, int ioflag)
+{
+ struct lockd_ans la;
+ int error;
+
+ if (uio->uio_resid != sizeof la)
+ return (EOPNOTSUPP);
+ error = uiomove(&la, sizeof la, uio);
+ if (!error)
+ error = nfslockdans(curthread, &la);
+ return (error);
+}
+
+static int
+nfslock_send(struct __lock_msg *lm)
+{
+ struct __lock_msg *lm2;
+ int error;
+
+ error = 0;
+ lm2 = malloc(sizeof *lm2, M_NFSLOCK, M_WAITOK);
+ mtx_lock(&nfslock_mtx);
+ if (nfslock_isopen) {
+ memcpy(lm2, lm, sizeof *lm2);
+ TAILQ_INSERT_TAIL(&nfslock_list, lm2, lm_link);
+ wakeup(&nfslock_list);
+ } else {
+ error = EOPNOTSUPP;
+ }
+ mtx_unlock(&nfslock_mtx);
+ if (error)
+ free(lm2, M_NFSLOCK);
+ return (error);
+}
+
+static struct cdevsw nfslock_cdevsw = {
+ .d_version = D_VERSION,
+ .d_open = nfslock_open,
+ .d_close = nfslock_close,
+ .d_read = nfslock_read,
+ .d_write = nfslock_write,
+ .d_name = "nfslock"
+};
+
+static int
+nfslock_modevent(module_t mod __unused, int type, void *data __unused)
+{
+
+ switch (type) {
+ case MOD_LOAD:
+ if (bootverbose)
+ printf("nfslock: pseudo-device\n");
+ mtx_init(&nfslock_mtx, "nfslock", NULL, MTX_DEF);
+ TAILQ_INIT(&nfslock_list);
+ nfslock_dev = make_dev(&nfslock_cdevsw, 0,
+ UID_ROOT, GID_KMEM, 0600, _PATH_NFSLCKDEV);
+ return (0);
+ default:
+ return (EOPNOTSUPP);
+ }
+}
+
+DEV_MODULE(nfslock, nfslock_modevent, NULL);
+MODULE_VERSION(nfslock, 1);
+
+
/*
* XXX
* We have to let the process know if the call succeeded. I'm using an extra
@@ -76,12 +221,11 @@ int
nfs_dolock(struct vop_advlock_args *ap)
{
LOCKD_MSG msg;
- struct nameidata nd;
struct thread *td;
- struct vnode *vp, *wvp;
- int error, error1;
+ struct vnode *vp;
+ int error;
struct flock *fl;
- int fmode, ioflg;
+ int ioflg;
struct proc *p;
td = curthread;
@@ -132,59 +276,14 @@ nfs_dolock(struct vop_advlock_args *ap)
msg.lm_nfsv3 = NFS_ISV3(vp);
cru2x(td->td_ucred, &msg.lm_cred);
- /*
- * Open the lock fifo. If for any reason we don't find the fifo, it
- * means that the lock daemon isn't running. Translate any missing
- * file error message for the user, otherwise the application will
- * complain that the user's file is missing, which isn't the case.
- * Note that we use proc0's cred, so the fifo is opened as root.
- *
- * XXX: Note that this behavior is relative to the root directory
- * of the current process, and this may result in a variety of
- * {functional, security} problems in chroot() environments.
- */
- NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, _PATH_LCKFIFO, td);
-
- fmode = FFLAGS(O_WRONLY | O_NONBLOCK);
- error = vn_open_cred(&nd, &fmode, 0, thread0.td_ucred, -1);
- switch (error) {
- case ENOENT:
- case ENXIO:
- /*
- * Map a failure to find the fifo or no listener on the
- * fifo to locking not being supported.
- */
- return (EOPNOTSUPP);
- case 0:
- break;
- default:
- return (error);
- }
- wvp = nd.ni_vp;
- VOP_UNLOCK(wvp, 0, td); /* vn_open leaves it locked */
-
-
- ioflg = IO_UNIT | IO_NOMACCHECK;
for (;;) {
- VOP_LEASE(wvp, td, thread0.td_ucred, LEASE_WRITE);
-
- error = vn_rdwr(UIO_WRITE, wvp, (caddr_t)&msg, sizeof(msg), 0,
- UIO_SYSSPACE, ioflg, thread0.td_ucred, NOCRED, NULL, td);
+ error = nfslock_send(&msg);
+ if (error)
+ return (error);
- if (error && (((ioflg & IO_NDELAY) == 0) || error != EAGAIN)) {
- break;
- }
- /*
- * If we're locking a file, wait for an answer. Unlocks succeed
- * immediately.
- */
+ /* Unlocks succeed immediately. */
if (fl->l_type == F_UNLCK)
- /*
- * XXX this isn't exactly correct. The client side
- * needs to continue sending it's unlock until
- * it gets a responce back.
- */
- break;
+ return (error);
/*
* retry after 20 seconds if we haven't gotten a responce yet.
@@ -227,16 +326,14 @@ nfs_dolock(struct vop_advlock_args *ap)
break;
}
- error1 = vn_close(wvp, FWRITE, thread0.td_ucred, td);
- /* prefer any previous 'error' to our vn_close 'error1'. */
- return (error != 0 ? error : error1);
+ return (error);
}
/*
* nfslockdans --
* NFS advisory byte-level locks answer from the lock daemon.
*/
-int
+static int
nfslockdans(struct thread *td, struct lockd_ans *ansp)
{
struct proc *targetp;
@@ -283,3 +380,4 @@ nfslockdans(struct thread *td, struct lockd_ans *ansp)
PROC_UNLOCK(targetp);
return (0);
}
+
diff --git a/sys/nfsclient/nfs_lock.h b/sys/nfsclient/nfs_lock.h
index e4bb794..b49d12c 100644
--- a/sys/nfsclient/nfs_lock.h
+++ b/sys/nfsclient/nfs_lock.h
@@ -44,7 +44,7 @@
* and where lockd reads these requests.
*
*/
-#define _PATH_LCKFIFO "/var/run/lock"
+#define _PATH_NFSLCKDEV "nfslock"
/*
* This structure is used to uniquely identify the process which originated
@@ -58,12 +58,13 @@ struct lockd_msg_ident {
int msg_seq; /* Sequence number of message */
};
-#define LOCKD_MSG_VERSION 2
+#define LOCKD_MSG_VERSION 3
/*
* The structure that the kernel hands us for each lock request.
*/
typedef struct __lock_msg {
+ TAILQ_ENTRY(__lock_msg) lm_link; /* internal linkage */
int lm_version; /* which version is this */
struct lockd_msg_ident lm_msg_ident; /* originator of the message */
struct flock lm_fl; /* The lock request. */
@@ -88,5 +89,4 @@ struct lockd_ans {
#ifdef _KERNEL
int nfs_dolock(struct vop_advlock_args *ap);
-int nfslockdans(struct thread *td, struct lockd_ans *ansp);
#endif
diff --git a/sys/nfsclient/nfs_nfsiod.c b/sys/nfsclient/nfs_nfsiod.c
index d8d5913..d1ca197 100644
--- a/sys/nfsclient/nfs_nfsiod.c
+++ b/sys/nfsclient/nfs_nfsiod.c
@@ -198,19 +198,6 @@ SYSINIT(nfsiod, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, nfsiod_setup, NULL);
static int nfs_defect = 0;
SYSCTL_INT(_vfs_nfs, OID_AUTO, defect, CTLFLAG_RW, &nfs_defect, 0, "");
-int
-nfsclnt(struct thread *td, struct nfsclnt_args *uap)
-{
- struct lockd_ans la;
- int error;
-
- if ((uap->flag & NFSCLNT_LOCKDANS) != 0) {
- error = copyin(uap->argp, &la, sizeof(la));
- return (error != 0 ? error : nfslockdans(td, &la));
- }
- return EINVAL;
-}
-
/*
* Asynchronous I/O daemons for client nfs.
* They do read-ahead and write-behind operations on the block I/O cache.
diff --git a/sys/nfsclient/nfs_subs.c b/sys/nfsclient/nfs_subs.c
index 986c74d..8467b76 100644
--- a/sys/nfsclient/nfs_subs.c
+++ b/sys/nfsclient/nfs_subs.c
@@ -96,9 +96,6 @@ int nfs_pbuf_freecnt = -1; /* start out unlimited */
struct nfs_reqq nfs_reqq;
struct nfs_bufq nfs_bufq;
-static int nfs_prev_nfsclnt_sy_narg;
-static sy_call_t *nfs_prev_nfsclnt_sy_call;
-
/*
* and the reverse mapping from generic to Version 2 procedure numbers
*/
@@ -416,11 +413,6 @@ nfs_init(struct vfsconf *vfsp)
TAILQ_INIT(&nfs_reqq);
callout_init(&nfs_callout, 0);
- nfs_prev_nfsclnt_sy_narg = sysent[SYS_nfsclnt].sy_narg;
- sysent[SYS_nfsclnt].sy_narg = 2;
- nfs_prev_nfsclnt_sy_call = sysent[SYS_nfsclnt].sy_call;
- sysent[SYS_nfsclnt].sy_call = (sy_call_t *)nfsclnt;
-
nfs_pbuf_freecnt = nswbuf / 2 + 1;
return (0);
@@ -432,8 +424,6 @@ nfs_uninit(struct vfsconf *vfsp)
int i;
callout_stop(&nfs_callout);
- sysent[SYS_nfsclnt].sy_narg = nfs_prev_nfsclnt_sy_narg;
- sysent[SYS_nfsclnt].sy_call = nfs_prev_nfsclnt_sy_call;
KASSERT(TAILQ_EMPTY(&nfs_reqq),
("nfs_uninit: request queue not empty"));
OpenPOWER on IntegriCloud