Re-implement the client side of rpc.lockd in the kernel. This implementation

provides the correct semantics for flock(2) style locks which are used by the lockf(1) command line tool and the pidfile(3) library. It also implements recovery from server restarts and ensures that dirty cache blocks are written to the server before obtaining locks (allowing multiple clients to use file locking to safely share data). Sponsored by: Isilon Systems PR: 94256 MFC after: 2 weeks
author: dfr <dfr@FreeBSD.org> 2008-06-26 10:21:54 +0000
committer: dfr <dfr@FreeBSD.org> 2008-06-26 10:21:54 +0000
commit: 41cea6d5ca71b8cf057f9face8055b218b30e18e (patch)
tree: 994a214037913bc4e44eaee5070c65aeadf53485
parent: ca3c788812715a263f83dcec4bdabaf6c10eb922 (diff)
download: FreeBSD-src-41cea6d5ca71b8cf057f9face8055b218b30e18e.zip
FreeBSD-src-41cea6d5ca71b8cf057f9face8055b218b30e18e.tar.gz
28 files changed, 3102 insertions, 776 deletions
diff --git a/sys/conf/files b/sys/conf/files
index 9261e2e..77bd32e 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -2059,6 +2059,7 @@ nfsserver/nfs_srvsock.c		optional nfsserver
 nfsserver/nfs_srvcache.c	optional nfsserver
 nfsserver/nfs_srvsubs.c		optional nfsserver
 nfsserver/nfs_syscalls.c	optional nfsserver
+nlm/nlm_advlock.c		optional nfslockd
 nlm/nlm_prot_clnt.c		optional nfslockd
 nlm/nlm_prot_impl.c		optional nfslockd
 nlm/nlm_prot_server.c		optional nfslockd
diff --git a/sys/kern/kern_lockf.c b/sys/kern/kern_lockf.c
index 8cab502..130e0b9 100644
--- a/sys/kern/kern_lockf.c
+++ b/sys/kern/kern_lockf.c
@@ -1354,7 +1354,8 @@ lf_setlock(struct lockf *state, struct lockf_entry *lock, struct vnode *vp,
 	priority = PLOCK;
 	if (lock->lf_type == F_WRLCK)
 		priority += 4;
-	priority |= PCATCH;
+	if (!(lock->lf_flags & F_NOINTR))
+		priority |= PCATCH;
 	/*
 	 * Scan lock list for this file looking for locks that would block us.
 	 */
@@ -1814,27 +1815,26 @@ lf_split(struct lockf *state, struct lockf_entry *lock1,
 	lf_insert_lock(state, splitlock);
 }
 
-struct clearlock {
-	STAILQ_ENTRY(clearlock) link;
+struct lockdesc {
+	STAILQ_ENTRY(lockdesc) link;
 	struct vnode *vp;
 	struct flock fl;
 };
-STAILQ_HEAD(clearlocklist, clearlock);
+STAILQ_HEAD(lockdesclist, lockdesc);
 
-void
-lf_clearremotesys(int sysid)
+int
+lf_iteratelocks_sysid(int sysid, lf_iterator *fn, void *arg)
 {
 	struct lockf *ls;
 	struct lockf_entry *lf;
-	struct clearlock *cl;
-	struct clearlocklist locks;
-
-	KASSERT(sysid != 0, ("Can't clear local locks with F_UNLCKSYS"));
+	struct lockdesc *ldesc;
+	struct lockdesclist locks;
+	int error;
 
 	/*
 	 * In order to keep the locking simple, we iterate over the
 	 * active lock lists to build a list of locks that need
-	 * releasing. We then call VOP_ADVLOCK for each one in turn.
+	 * releasing. We then call the iterator for each one in turn.
 	 *
 	 * We take an extra reference to the vnode for the duration to
 	 * make sure it doesn't go away before we are finished.
@@ -1847,32 +1847,116 @@ lf_clearremotesys(int sysid)
 			if (lf->lf_owner->lo_sysid != sysid)
 				continue;
 
-			cl = malloc(sizeof(struct clearlock), M_LOCKF,
+			ldesc = malloc(sizeof(struct lockdesc), M_LOCKF,
 			    M_WAITOK);
-			cl->vp = lf->lf_vnode;
-			vref(cl->vp);
-			cl->fl.l_start = lf->lf_start;
+			ldesc->vp = lf->lf_vnode;
+			vref(ldesc->vp);
+			ldesc->fl.l_start = lf->lf_start;
 			if (lf->lf_end == OFF_MAX)
-				cl->fl.l_len = 0;
+				ldesc->fl.l_len = 0;
 			else
-				cl->fl.l_len =
+				ldesc->fl.l_len =
 					lf->lf_end - lf->lf_start + 1;
-			cl->fl.l_whence = SEEK_SET;
-			cl->fl.l_type = F_UNLCK;
-			cl->fl.l_pid = lf->lf_owner->lo_pid;
-			cl->fl.l_sysid = sysid;
-			STAILQ_INSERT_TAIL(&locks, cl, link);
+			ldesc->fl.l_whence = SEEK_SET;
+			ldesc->fl.l_type = F_UNLCK;
+			ldesc->fl.l_pid = lf->lf_owner->lo_pid;
+			ldesc->fl.l_sysid = sysid;
+			STAILQ_INSERT_TAIL(&locks, ldesc, link);
 		}
 		sx_xunlock(&ls->ls_lock);
 	}
 	sx_xunlock(&lf_lock_states_lock);
 
-	while ((cl = STAILQ_FIRST(&locks)) != NULL) {
+	/*
+	 * Call the iterator function for each lock in turn. If the
+	 * iterator returns an error code, just free the rest of the
+	 * lockdesc structures.
+	 */
+	error = 0;
+	while ((ldesc = STAILQ_FIRST(&locks)) != NULL) {
+		STAILQ_REMOVE_HEAD(&locks, link);
+		if (!error)
+			error = fn(ldesc->vp, &ldesc->fl, arg);
+		vrele(ldesc->vp);
+		free(ldesc, M_LOCKF);
+	}
+
+	return (error);
+}
+
+int
+lf_iteratelocks_vnode(struct vnode *vp, lf_iterator *fn, void *arg)
+{
+	struct lockf *ls;
+	struct lockf_entry *lf;
+	struct lockdesc *ldesc;
+	struct lockdesclist locks;
+	int error;
+
+	/*
+	 * In order to keep the locking simple, we iterate over the
+	 * active lock lists to build a list of locks that need
+	 * releasing. We then call the iterator for each one in turn.
+	 *
+	 * We take an extra reference to the vnode for the duration to
+	 * make sure it doesn't go away before we are finished.
+	 */
+	STAILQ_INIT(&locks);
+	ls = vp->v_lockf;
+	if (!ls)
+		return (0);
+
+	sx_xlock(&ls->ls_lock);
+	LIST_FOREACH(lf, &ls->ls_active, lf_link) {
+		ldesc = malloc(sizeof(struct lockdesc), M_LOCKF,
+		    M_WAITOK);
+		ldesc->vp = lf->lf_vnode;
+		vref(ldesc->vp);
+		ldesc->fl.l_start = lf->lf_start;
+		if (lf->lf_end == OFF_MAX)
+			ldesc->fl.l_len = 0;
+		else
+			ldesc->fl.l_len =
+				lf->lf_end - lf->lf_start + 1;
+		ldesc->fl.l_whence = SEEK_SET;
+		ldesc->fl.l_type = F_UNLCK;
+		ldesc->fl.l_pid = lf->lf_owner->lo_pid;
+		ldesc->fl.l_sysid = lf->lf_owner->lo_sysid;
+		STAILQ_INSERT_TAIL(&locks, ldesc, link);
+	}
+	sx_xunlock(&ls->ls_lock);
+
+	/*
+	 * Call the iterator function for each lock in turn. If the
+	 * iterator returns an error code, just free the rest of the
+	 * lockdesc structures.
+	 */
+	error = 0;
+	while ((ldesc = STAILQ_FIRST(&locks)) != NULL) {
 		STAILQ_REMOVE_HEAD(&locks, link);
-		VOP_ADVLOCK(cl->vp, 0, F_UNLCK, &cl->fl, F_REMOTE);
-		vrele(cl->vp);
-		free(cl, M_LOCKF);
+		if (!error)
+			error = fn(ldesc->vp, &ldesc->fl, arg);
+		vrele(ldesc->vp);
+		free(ldesc, M_LOCKF);
 	}
+
+	return (error);
+}
+
+static int
+lf_clearremotesys_iterator(struct vnode *vp, struct flock *fl, void *arg)
+{
+
+	VOP_ADVLOCK(vp, 0, F_UNLCK, fl, F_REMOTE);
+	return (0);
+}
+
+void
+lf_clearremotesys(int sysid)
+{
+
+	KASSERT(sysid != 0, ("Can't clear local locks with F_UNLCKSYS"));
+	lf_iteratelocks_sysid(sysid, lf_clearremotesys_iterator, NULL);
 }
 
 int
diff --git a/sys/modules/nfslockd/Makefile b/sys/modules/nfslockd/Makefile
index 8c02c88..104925c 100644
--- a/sys/modules/nfslockd/Makefile
+++ b/sys/modules/nfslockd/Makefile
@@ -3,6 +3,7 @@
 .PATH: ${.CURDIR}/../../nlm ${.CURDIR}/../../rpc
 KMOD=	nfslockd
 SRCS=	vnode_if.h \
+	nlm_advlock.c \
 	nlm_prot_clnt.c \
 	nlm_prot_impl.c \
 	nlm_prot_server.c \
diff --git a/sys/nfsclient/nfs.h b/sys/nfsclient/nfs.h
index 29fb332..9e52420 100644
--- a/sys/nfsclient/nfs.h
+++ b/sys/nfsclient/nfs.h
@@ -93,6 +93,7 @@
 #define	NFSSTA_SNDLOCK		0x01000000  /* Send socket lock */
 #define	NFSSTA_WANTSND		0x02000000  /* Want above */
 #define	NFSSTA_TIMEO		0x10000000  /* Experiencing a timeout */
+#define	NFSSTA_LOCKTIMEO	0x20000000  /* Experiencing a lockd timeout */
 
 
 /*
diff --git a/sys/nfsclient/nfs_node.c b/sys/nfsclient/nfs_node.c
index 7876d32..03e672a 100644
--- a/sys/nfsclient/nfs_node.c
+++ b/sys/nfsclient/nfs_node.c
@@ -234,6 +234,13 @@ nfs_reclaim(struct vop_reclaim_args *ap)
 		vprint("nfs_reclaim: pushing active", vp);
 
 	/*
+	 * If the NLM is running, give it a chance to abort pending
+	 * locks.
+	 */
+	if (nfs_reclaim_p)
+		nfs_reclaim_p(ap);
+
+	/*
 	 * Destroy the vm object and flush associated pages.
 	 */
 	vnode_destroy_vobject(vp);
diff --git a/sys/nfsclient/nfs_vfsops.c b/sys/nfsclient/nfs_vfsops.c
index f342211..17536491 100644
--- a/sys/nfsclient/nfs_vfsops.c
+++ b/sys/nfsclient/nfs_vfsops.c
@@ -495,6 +495,7 @@ nfs_mountroot(struct mount *mp, struct thread *td)
 		(l >> 24) & 0xff, (l >> 16) & 0xff,
 		(l >>  8) & 0xff, (l >>  0) & 0xff, nd->root_hostnam);
 	printf("NFS ROOT: %s\n", buf);
+	nd->root_args.hostname = buf;
 	if ((error = nfs_mountdiskless(buf,
 	    &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) {
 		return (error);
@@ -540,6 +541,7 @@ nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp)
 	int s;
 	int adjsock;
 	int maxio;
+	char *p;
 
 	s = splnet();
 
@@ -699,6 +701,11 @@ nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp)
 				(void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
 			}
 	}
+
+	strlcpy(nmp->nm_hostname, argp->hostname, sizeof(nmp->nm_hostname));
+	p = strchr(nmp->nm_hostname, ':');
+	if (p)
+		*p = '\0';
 }
 
 static const char *nfs_opts[] = { "from", "nfs_args",
diff --git a/sys/nfsclient/nfs_vnops.c b/sys/nfsclient/nfs_vnops.c
index 524c564..3711165 100644
--- a/sys/nfsclient/nfs_vnops.c
+++ b/sys/nfsclient/nfs_vnops.c
@@ -198,6 +198,8 @@ struct mtx 	nfs_iod_mtx;
 struct proc	*nfs_iodwant[NFS_MAXASYNCDAEMON];
 struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON];
 int		 nfs_numasync = 0;
+vop_advlock_t	*nfs_advlock_p = nfs_dolock;
+vop_reclaim_t	*nfs_reclaim_p = NULL;
 #define	DIRHDSIZ	(sizeof (struct dirent) - (MAXNAMLEN + 1))
 
 SYSCTL_DECL(_vfs_nfs);
@@ -3051,8 +3053,13 @@ nfs_advlock(struct vop_advlock_args *ap)
 		size = VTONFS(vp)->n_size;
 		VOP_UNLOCK(vp, 0);
 		error = lf_advlock(ap, &(vp->v_lockf), size);
-	} else
-		error = nfs_dolock(ap);
+	} else {
+		if (nfs_advlock_p)
+			error = nfs_advlock_p(ap);
+		else
+			error = ENOLCK;
+	}
+
 	return (error);
 }
 
diff --git a/sys/nfsclient/nfsmount.h b/sys/nfsclient/nfsmount.h
index 8615846..6fa7f8b 100644
--- a/sys/nfsclient/nfsmount.h
+++ b/sys/nfsclient/nfsmount.h
@@ -91,6 +91,7 @@ struct	nfsmount {
 	int	nm_tprintf_initial_delay;	/* initial delay */
 	int	nm_tprintf_delay;		/* interval for messages */
 	struct nfs_tcp_mountstate nm_nfstcpstate;
+	char	nm_hostname[MNAMELEN];	 /* server's name */
 
 	/* NFSv4 */
 	uint64_t nm_clientid;
diff --git a/sys/nfsclient/nfsnode.h b/sys/nfsclient/nfsnode.h
index f227361..03e5e7f 100644
--- a/sys/nfsclient/nfsnode.h
+++ b/sys/nfsclient/nfsnode.h
@@ -187,6 +187,9 @@ extern	struct vop_vector	nfs4_vnodeops;
 extern struct buf_ops buf_ops_nfs;
 extern struct buf_ops buf_ops_nfs4;
 
+extern vop_advlock_t *nfs_advlock_p;
+extern vop_reclaim_t *nfs_reclaim_p;
+
 /*
  * Prototypes for NFS vnode operations
  */
diff --git a/sys/nlm/nlm.h b/sys/nlm/nlm.h
index 32bb974..addd07e 100644
--- a/sys/nlm/nlm.h
+++ b/sys/nlm/nlm.h
@@ -36,7 +36,17 @@
 MALLOC_DECLARE(M_NLM);
 #endif
 
+/*
+ * This value is added to host system IDs when recording NFS client
+ * locks in the local lock manager.
+ */
+#define NLM_SYSID_CLIENT	0x1000000
+
 struct nlm_host;
+struct vnode;
+
+extern struct timeval nlm_zero_tv;
+extern int nlm_nsm_state;
 
 /*
  * Copy a struct netobj.
@@ -47,61 +57,140 @@ extern void nlm_copy_netobj(struct netobj *dst, struct netobj *src,
 /*
  * Search for an existing NLM host that matches the given name
  * (typically the caller_name element of an nlm4_lock).  If none is
- * found, create a new host. If 'rqstp' is non-NULL, record the remote
+ * found, create a new host. If 'addr' is non-NULL, record the remote
  * address of the host so that we can call it back for async
- * responses.
+ * responses. If 'vers' is greater than zero then record the NLM
+ * program version to use to communicate with this client. The host
+ * reference count is incremented - the caller must call
+ * nlm_host_release when it has finished using it.
  */
 extern struct nlm_host *nlm_find_host_by_name(const char *name,
-    struct svc_req *rqstp);
+    const struct sockaddr *addr, rpcvers_t vers);
 
 /*
  * Search for an existing NLM host that matches the given remote
  * address. If none is found, create a new host with the requested
  * address and remember 'vers' as the NLM protocol version to use for
- * that host.
+ * that host. The host reference count is incremented - the caller
+ * must call nlm_host_release when it has finished using it.
  */
 extern struct nlm_host *nlm_find_host_by_addr(const struct sockaddr *addr,
     int vers);
 
 /*
+ * Register this NLM host with the local NSM so that we can be
+ * notified if it reboots.
+ */
+extern void nlm_host_monitor(struct nlm_host *host, int state);
+
+/*
+ * Decrement the host reference count, freeing resources if the
+ * reference count reaches zero.
+ */
+extern void nlm_host_release(struct nlm_host *host);
+
+/*
  * Return an RPC client handle that can be used to talk to the NLM
  * running on the given host.
  */
 extern CLIENT *nlm_host_get_rpc(struct nlm_host *host);
 
 /*
+ * Return the system ID for a host.
+ */
+extern int nlm_host_get_sysid(struct nlm_host *host);
+
+/*
+ * Return the remote NSM state value for a host.
+ */
+extern int nlm_host_get_state(struct nlm_host *host);
+
+/*
+ * When sending a blocking lock request, we need to track the request
+ * in our waiting lock list. We add an entry to the waiting list
+ * before we send the lock RPC so that we can cope with a granted
+ * message arriving at any time. Call this function before sending the
+ * lock rpc. If the lock succeeds, call nlm_deregister_wait_lock with
+ * the handle this function returns, otherwise nlm_wait_lock. Both
+ * will remove the entry from the waiting list.
+ */
+extern void *nlm_register_wait_lock(struct nlm4_lock *lock, struct vnode *vp);
+
+/*
+ * Deregister a blocking lock request. Call this if the lock succeeded
+ * without blocking.
+ */
+extern void nlm_deregister_wait_lock(void *handle);
+
+/*
+ * Wait for a granted callback for a blocked lock request, waiting at
+ * most timo ticks. If no granted message is received within the
+ * timeout, return EWOULDBLOCK. If a signal interrupted the wait,
+ * return EINTR - the caller must arrange to send a cancellation to
+ * the server. In both cases, the request is removed from the waiting
+ * list.
+ */
+extern int nlm_wait_lock(void *handle, int timo);
+
+/*
+ * Cancel any pending waits for this vnode - called on forcible unmounts.
+ */
+extern void nlm_cancel_wait(struct vnode *vp);
+
+/*
  * Called when a host restarts.
  */
 extern void nlm_sm_notify(nlm_sm_status *argp);
 
 /*
- * Implementation for lock testing RPCs. Returns the NLM host that
- * matches the RPC arguments.
+ * Implementation for lock testing RPCs. If the request was handled
+ * successfully and rpcp is non-NULL, *rpcp is set to an RPC client
+ * handle which can be used to send an async rpc reply. Returns zero
+ * if the request was handled, or a suitable unix error code
+ * otherwise.
+ */
+extern int nlm_do_test(nlm4_testargs *argp, nlm4_testres *result,
+    struct svc_req *rqstp, CLIENT **rpcp);
+
+/*
+ * Implementation for lock setting RPCs. If the request was handled
+ * successfully and rpcp is non-NULL, *rpcp is set to an RPC client
+ * handle which can be used to send an async rpc reply. Returns zero
+ * if the request was handled, or a suitable unix error code
+ * otherwise.
  */
-extern struct nlm_host *nlm_do_test(nlm4_testargs *argp,
-    nlm4_testres *result, struct svc_req *rqstp);
+extern int nlm_do_lock(nlm4_lockargs *argp, nlm4_res *result,
+    struct svc_req *rqstp, bool_t monitor, CLIENT **rpcp); 
 
 /*
- * Implementation for lock setting RPCs. Returns the NLM host that
- * matches the RPC arguments. If monitor is TRUE, set up an NSM
- * monitor for this host.
+ * Implementation for cancelling a pending lock request. If the
+ * request was handled successfully and rpcp is non-NULL, *rpcp is set
+ * to an RPC client handle which can be used to send an async rpc
+ * reply. Returns zero if the request was handled, or a suitable unix
+ * error code otherwise.
  */
-extern struct nlm_host *nlm_do_lock(nlm4_lockargs *argp,
-    nlm4_res *result, struct svc_req *rqstp, bool_t monitor); 
+extern int nlm_do_cancel(nlm4_cancargs *argp, nlm4_res *result,
+    struct svc_req *rqstp, CLIENT **rpcp);
 
 /*
- * Implementation for cancelling a pending lock request. Returns the
- * NLM host that matches the RPC arguments.
+ * Implementation for unlocking RPCs. If the request was handled
+ * successfully and rpcp is non-NULL, *rpcp is set to an RPC client
+ * handle which can be used to send an async rpc reply. Returns zero
+ * if the request was handled, or a suitable unix error code
+ * otherwise.
  */
-extern struct nlm_host *nlm_do_cancel(nlm4_cancargs *argp,
-    nlm4_res *result, struct svc_req *rqstp);
+extern int nlm_do_unlock(nlm4_unlockargs *argp, nlm4_res *result,
+    struct svc_req *rqstp, CLIENT **rpcp);
 
 /*
- * Implementation for unlocking RPCs. Returns the NLM host that
- * matches the RPC arguments.
+ * Implementation for granted RPCs. If the request was handled
+ * successfully and rpcp is non-NULL, *rpcp is set to an RPC client
+ * handle which can be used to send an async rpc reply. Returns zero
+ * if the request was handled, or a suitable unix error code
+ * otherwise.
  */
-extern struct nlm_host *nlm_do_unlock(nlm4_unlockargs *argp,
-    nlm4_res *result, struct svc_req *rqstp);
+extern int nlm_do_granted(nlm4_testargs *argp, nlm4_res *result,
+    struct svc_req *rqstp, CLIENT **rpcp);
 
 /*
  * Free all locks associated with the hostname argp->name.
@@ -109,10 +198,17 @@ extern struct nlm_host *nlm_do_unlock(nlm4_unlockargs *argp,
 extern void nlm_do_free_all(nlm4_notify *argp);
 
 /*
- * Find an RPC transport that can be used to communicate with the
- * userland part of lockd.
+ * Recover client lock state after a server reboot.
+ */
+extern void nlm_client_recovery(struct nlm_host *);
+
+/*
+ * Interface from NFS client code to the NLM.
  */
-extern CLIENT *nlm_user_lockd(void);
+struct vop_advlock_args;
+struct vop_reclaim_args;
+extern int nlm_advlock(struct vop_advlock_args *ap);
+extern int nlm_reclaim(struct vop_reclaim_args *ap);
 
 #endif
 
diff --git a/sys/nlm/nlm_advlock.c b/sys/nlm/nlm_advlock.c
new file mode 100644
index 0000000..fb8b5a7
--- /dev/null
+++ b/sys/nlm/nlm_advlock.c
@@ -0,0 +1,1235 @@
+/*-
+ * Copyright (c) 2008 Isilon Inc http://www.isilon.com/
+ * Authors: Doug Rabson <dfr@rabson.org>
+ * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/fcntl.h>
+#include <sys/kernel.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/lockf.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/syslog.h>
+#include <sys/systm.h>
+#include <sys/unistd.h>
+#include <sys/vnode.h>
+
+#include <rpc/rpcclnt.h>
+#include <nfs/nfsproto.h>
+#include <nfsclient/nfs.h>
+#include <nfsclient/nfsnode.h>
+#include <nfsclient/nfsmount.h>
+
+#include <nlm/nlm_prot.h>
+#include <nlm/nlm.h>
+
+/*
+ * We need to keep track of the svid values used for F_FLOCK locks.
+ */
+struct nlm_file_svid {
+	int		ns_refs;	/* thread count + 1 if active */
+	int		ns_svid;	/* on-the-wire SVID for this file */
+	struct ucred	*ns_ucred;	/* creds to use for lock recovery */
+	void		*ns_id;		/* local struct file pointer */
+	bool_t		ns_active;	/* TRUE if we own a lock */
+	LIST_ENTRY(nlm_file_svid) ns_link;
+};
+LIST_HEAD(nlm_file_svid_list, nlm_file_svid);
+
+#define NLM_SVID_HASH_SIZE	256
+struct nlm_file_svid_list nlm_file_svids[NLM_SVID_HASH_SIZE];
+
+struct mtx nlm_svid_lock;
+static struct unrhdr *nlm_svid_allocator;
+static volatile u_int nlm_xid = 1;
+
+static int nlm_setlock(struct nlm_host *host, struct rpc_callextra *ext,
+    rpcvers_t vers, struct timeval *timo, int retries,
+    struct vnode *vp, int op, struct flock *fl, int flags,
+    int svid, size_t fhlen, void *fh, off_t size, bool_t reclaim);
+static int nlm_clearlock(struct nlm_host *host,  struct rpc_callextra *ext,
+    rpcvers_t vers, struct timeval *timo, int retries,
+    struct vnode *vp, int op, struct flock *fl, int flags,
+    int svid, size_t fhlen, void *fh, off_t size);
+static int nlm_getlock(struct nlm_host *host, struct rpc_callextra *ext,
+    rpcvers_t vers, struct timeval *timo, int retries,
+    struct vnode *vp, int op, struct flock *fl, int flags,
+    int svid, size_t fhlen, void *fh, off_t size);
+static int nlm_map_status(nlm4_stats stat);
+static struct nlm_file_svid *nlm_find_svid(void *id);
+static void nlm_free_svid(struct nlm_file_svid *nf);
+static int nlm_init_lock(struct flock *fl, int flags, int svid,
+    rpcvers_t vers, size_t fhlen, void *fh, off_t size,
+    struct nlm4_lock *lock, char oh_space[32]);
+
+static void
+nlm_client_init(void *dummy)
+{
+	int i;
+
+	mtx_init(&nlm_svid_lock, "NLM svid lock", NULL, MTX_DEF);
+	nlm_svid_allocator = new_unrhdr(PID_MAX + 2, INT_MAX, &nlm_svid_lock);
+	for (i = 0; i < NLM_SVID_HASH_SIZE; i++)
+		LIST_INIT(&nlm_file_svids[i]);
+}
+SYSINIT(nlm_client_init, SI_SUB_LOCK, SI_ORDER_FIRST, nlm_client_init, NULL);
+
+static int
+nlm_msg(struct thread *td, const char *server, const char *msg, int error)
+{
+	struct proc *p;
+
+	p = td ? td->td_proc : NULL;
+	if (error) {
+		tprintf(p, LOG_INFO, "nfs server %s: %s, error %d\n", server,
+		    msg, error);
+	} else {
+		tprintf(p, LOG_INFO, "nfs server %s: %s\n", server, msg);
+	}
+	return (0);
+}
+
+struct nlm_feedback_arg {
+	bool_t	nf_printed;
+	struct nfsmount *nf_nmp;
+};
+
+static void
+nlm_down(struct nlm_feedback_arg *nf, struct thread *td,
+    const char *msg, int error)
+{
+	struct nfsmount *nmp = nf->nf_nmp;
+
+	if (nmp == NULL)
+		return;
+	mtx_lock(&nmp->nm_mtx);
+	if (!(nmp->nm_state & NFSSTA_LOCKTIMEO)) {
+		nmp->nm_state |= NFSSTA_LOCKTIMEO;
+		mtx_unlock(&nmp->nm_mtx);
+		vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
+		    VQ_NOTRESPLOCK, 0);
+	} else {
+		mtx_unlock(&nmp->nm_mtx);
+	}
+
+	nf->nf_printed = TRUE;
+	nlm_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error);
+}
+
+static void
+nlm_up(struct nlm_feedback_arg *nf, struct thread *td,
+    const char *msg)
+{
+	struct nfsmount *nmp = nf->nf_nmp;
+
+	if (!nf->nf_printed)
+		return;
+
+	nlm_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0);
+
+	mtx_lock(&nmp->nm_mtx);
+	if (nmp->nm_state & NFSSTA_LOCKTIMEO) {
+		nmp->nm_state &= ~NFSSTA_LOCKTIMEO;
+		mtx_unlock(&nmp->nm_mtx);
+		vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid,
+		    VQ_NOTRESPLOCK, 1);
+	} else {
+		mtx_unlock(&nmp->nm_mtx);
+	}
+}
+
+static void
+nlm_feedback(int type, int proc, void *arg)
+{
+	struct thread *td = curthread;
+	struct nlm_feedback_arg *nf = (struct nlm_feedback_arg *) arg;
+
+	switch (type) {
+	case FEEDBACK_REXMIT2:
+	case FEEDBACK_RECONNECT:
+		nlm_down(nf, td, "lockd not responding", 0);
+		break;
+
+	case FEEDBACK_OK:
+		nlm_up(nf, td, "lockd is alive again");
+		break;
+	}
+}
+
+/*
+ * nlm_advlock --
+ *      NFS advisory byte-level locks.
+ */
+static int
+nlm_advlock_internal(struct vnode *vp, void *id, int op, struct flock *fl,
+    int flags, bool_t reclaim, bool_t unlock_vp)
+{
+	struct thread *td = curthread;
+	struct nfsmount *nmp;
+	struct nfsnode *np;
+	off_t size;
+	size_t fhlen;
+	union nfsfh fh;
+	struct sockaddr *sa;
+	struct sockaddr_storage ss;
+	char servername[MNAMELEN];
+	struct timeval timo;
+	int retries;
+	rpcvers_t vers;
+	struct nlm_host *host;
+	struct rpc_callextra ext;
+	struct nlm_feedback_arg nf;
+	AUTH *auth;
+	struct ucred *cred;
+	struct nlm_file_svid *ns;
+	int svid;
+	int error;
+
+	ASSERT_VOP_LOCKED(vp, "nlm_advlock_1");
+
+	/*
+	 * Push any pending writes to the server and flush our cache
+	 * so that if we are contending with another machine for a
+	 * file, we get whatever they wrote and vice-versa.
+	 */
+	if (op == F_SETLK || op == F_UNLCK)
+		nfs_vinvalbuf(vp, V_SAVE, td, 1);
+
+	np = VTONFS(vp);
+	nmp = VFSTONFS(vp->v_mount);
+	size = np->n_size;
+	sa = nmp->nm_nam;
+	memcpy(&ss, sa, sa->sa_len);
+	sa = (struct sockaddr *) &ss;
+	strcpy(servername, nmp->nm_hostname);
+	fhlen = np->n_fhsize;
+	memcpy(&fh.fh_bytes, np->n_fhp, fhlen);
+	timo.tv_sec = nmp->nm_timeo / NFS_HZ;
+	timo.tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ);
+	if (NFS_ISV3(vp))
+		vers = NLM_VERS4;
+	else
+		vers = NLM_VERS;
+
+	if (nmp->nm_flag & NFSMNT_SOFT)
+		retries = nmp->nm_retry;
+	else
+		retries = INT_MAX;
+
+	if (unlock_vp)
+		VOP_UNLOCK(vp, 0);
+
+	/*
+	 * We need to switch to mount-point creds so that we can send
+	 * packets from a privileged port.
+	 */
+	cred = td->td_ucred;
+	td->td_ucred = vp->v_mount->mnt_cred;
+
+	host = nlm_find_host_by_name(servername, sa, vers);
+	auth = authunix_create(cred);
+	memset(&ext, 0, sizeof(ext));
+
+	nf.nf_printed = FALSE;
+	nf.nf_nmp = nmp;
+	ext.rc_auth = auth;
+
+	ext.rc_feedback = nlm_feedback;
+	ext.rc_feedback_arg = &nf;
+
+	ns = NULL;
+	if (flags & F_FLOCK) {
+		ns = nlm_find_svid(id);
+		KASSERT(fl->l_start == 0 && fl->l_len == 0,
+		    ("F_FLOCK lock requests must be whole-file locks"));
+		if (!ns->ns_ucred) {
+			/*
+			 * Remember the creds used for locking in case
+			 * we need to recover the lock later.
+			 */
+			ns->ns_ucred = crdup(cred);
+		}
+		svid = ns->ns_svid;
+	} else if (flags & F_REMOTE) {
+		/*
+		 * If we are recovering after a server restart or
+		 * trashing locks on a force unmount, use the same
+		 * svid as last time.
+		 */
+		svid = fl->l_pid;
+	} else {
+		svid = ((struct proc *) id)->p_pid;
+	}
+
+	switch(op) {
+	case F_SETLK:
+		if ((flags & (F_FLOCK|F_WAIT)) == (F_FLOCK|F_WAIT)
+		    && fl->l_type == F_WRLCK) {
+			/*
+			 * The semantics for flock(2) require that any
+			 * shared lock on the file must be released
+			 * before an exclusive lock is granted. The
+			 * local locking code interprets this by
+			 * unlocking the file before sleeping on a
+			 * blocked exclusive lock request. We
+			 * approximate this by first attempting
+			 * non-blocking and if that fails, we unlock
+			 * the file and block.
+			 */
+			error = nlm_setlock(host, &ext, vers, &timo, retries,
+			    vp, F_SETLK, fl, flags & ~F_WAIT,
+			    svid, fhlen, &fh.fh_bytes, size, reclaim);
+			if (error == EAGAIN) {
+				fl->l_type = F_UNLCK;
+				error = nlm_clearlock(host, &ext, vers, &timo,
+				    retries, vp, F_UNLCK, fl, flags,
+				    svid, fhlen, &fh.fh_bytes, size);
+				fl->l_type = F_WRLCK;
+				if (!error) {
+					mtx_lock(&nlm_svid_lock);
+					if (ns->ns_active) {
+						ns->ns_refs--;
+						ns->ns_active = FALSE;
+					}
+					mtx_unlock(&nlm_svid_lock);
+					flags |= F_WAIT;
+					error = nlm_setlock(host, &ext, vers,
+					    &timo, retries, vp, F_SETLK, fl,
+					    flags, svid, fhlen, &fh.fh_bytes,
+					    size, reclaim);
+				}
+			}
+		} else {
+			error = nlm_setlock(host, &ext, vers, &timo, retries,
+			    vp, op, fl, flags, svid, fhlen, &fh.fh_bytes,
+			    size, reclaim);
+		}
+		if (!error && ns) {
+			mtx_lock(&nlm_svid_lock);
+			if (!ns->ns_active) {
+				/*
+				 * Add one to the reference count to
+				 * hold onto the SVID for the lifetime
+				 * of the lock. Note that since
+				 * F_FLOCK only supports whole-file
+				 * locks, there can only be one active
+				 * lock for this SVID.
+				 */
+				ns->ns_refs++;
+				ns->ns_active = TRUE;
+			}
+			mtx_unlock(&nlm_svid_lock);
+		}
+		break;
+
+	case F_UNLCK:
+		error = nlm_clearlock(host, &ext, vers, &timo, retries,
+		    vp, op, fl, flags, svid, fhlen, &fh.fh_bytes, size);
+		if (!error && ns) {
+			mtx_lock(&nlm_svid_lock);
+			if (ns->ns_active) {
+				ns->ns_refs--;
+				ns->ns_active = FALSE;
+			}
+			mtx_unlock(&nlm_svid_lock);
+		}
+		break;
+
+	case F_GETLK:
+		error = nlm_getlock(host, &ext, vers, &timo, retries,
+		    vp, op, fl, flags, svid, fhlen, &fh.fh_bytes, size);
+		break;
+
+	default:
+		error = EINVAL;
+		break;
+	}
+
+	if (ns)
+		nlm_free_svid(ns);
+
+	td->td_ucred = cred;
+	AUTH_DESTROY(auth);
+
+	nlm_host_release(host);
+
+	return (error);
+}
+
+int
+nlm_advlock(struct vop_advlock_args *ap)
+{
+
+	return (nlm_advlock_internal(ap->a_vp, ap->a_id, ap->a_op, ap->a_fl,
+		ap->a_flags, FALSE, TRUE));
+}
+
+/*
+ * Set the creds of td to the creds of the given lock's owner. The new
+ * creds reference count will be incremented via crhold. The caller is
+ * responsible for calling crfree and restoring td's original creds.
+ */
+static void
+nlm_set_creds_for_lock(struct thread *td, struct flock *fl)
+{
+	int i;
+	struct nlm_file_svid *ns;
+	struct proc *p;
+	struct ucred *cred;
+
+	cred = NULL;
+	if (fl->l_pid > PID_MAX) {
+		/*
+		 * If this was originally a F_FLOCK-style lock, we
+		 * recorded the creds used when it was originally
+		 * locked in the nlm_file_svid structure.
+		 */
+		mtx_lock(&nlm_svid_lock);
+		for (i = 0; i < NLM_SVID_HASH_SIZE; i++) {
+			for (ns = LIST_FIRST(&nlm_file_svids[i]); ns;
+			     ns = LIST_NEXT(ns, ns_link)) {
+				if (ns->ns_svid == fl->l_pid) {
+					cred = crhold(ns->ns_ucred);
+					break;
+				}
+			}
+		}
+		mtx_unlock(&nlm_svid_lock);
+	} else {
+		/*
+		 * This lock is owned by a process. Get a reference to
+		 * the process creds.
+		 */
+		p = pfind(fl->l_pid);
+		if (p) {
+			cred = crhold(p->p_ucred);
+			PROC_UNLOCK(p);
+		}
+	}
+
+	/*
+	 * If we can't find a cred, fall back on the recovery
+	 * thread's cred.
+	 */
+	if (!cred) {
+		cred = crhold(td->td_ucred);
+	}
+
+	td->td_ucred = cred;
+}
+
+static int
+nlm_reclaim_free_lock(struct vnode *vp, struct flock *fl, void *arg)
+{
+	struct flock newfl;
+	struct thread *td = curthread;
+	struct ucred *oldcred;
+	int error;
+
+	newfl = *fl;
+	newfl.l_type = F_UNLCK;
+
+	oldcred = td->td_ucred;
+	nlm_set_creds_for_lock(td, &newfl);
+
+	error = nlm_advlock_internal(vp, NULL, F_UNLCK, &newfl, F_REMOTE,
+	    FALSE, FALSE);
+
+	crfree(td->td_ucred);
+	td->td_ucred = oldcred;
+
+	return (error);
+}
+
+int
+nlm_reclaim(struct vop_reclaim_args *ap)
+{
+
+	nlm_cancel_wait(ap->a_vp);
+	lf_iteratelocks_vnode(ap->a_vp, nlm_reclaim_free_lock, NULL);
+	return (0);
+}
+
+struct nlm_recovery_context {
+	struct nlm_host	*nr_host;	/* host we are recovering */
+	int		nr_state;	/* remote NSM state for recovery */
+};
+
+static int
+nlm_client_recover_lock(struct vnode *vp, struct flock *fl, void *arg)
+{
+	struct nlm_recovery_context *nr = (struct nlm_recovery_context *) arg;
+	struct thread *td = curthread;
+	struct ucred *oldcred;
+	int state, error;
+
+	/*
+	 * If the remote NSM state changes during recovery, the host
+	 * must have rebooted a second time. In that case, we must
+	 * restart the recovery.
+	 */
+	state = nlm_host_get_state(nr->nr_host);
+	if (nr->nr_state != state)
+		return (ERESTART);
+
+	error = vn_lock(vp, LK_SHARED);
+	if (error)
+		return (error);
+
+	oldcred = td->td_ucred;
+	nlm_set_creds_for_lock(td, fl);
+
+	error = nlm_advlock_internal(vp, NULL, F_SETLK, fl, F_REMOTE,
+	    TRUE, TRUE);
+
+	crfree(td->td_ucred);
+	td->td_ucred = oldcred;
+
+	return (error);
+}
+
+void
+nlm_client_recovery(struct nlm_host *host)
+{
+	struct nlm_recovery_context nr;
+	int sysid, error;
+
+	sysid = NLM_SYSID_CLIENT | nlm_host_get_sysid(host);
+	do {
+		nr.nr_host = host;
+		nr.nr_state = nlm_host_get_state(host);
+		error = lf_iteratelocks_sysid(sysid,
+		    nlm_client_recover_lock, &nr);
+	} while (error == ERESTART);
+}
+
+static void
+nlm_convert_to_nlm_lock(struct nlm_lock *dst, struct nlm4_lock *src)
+{
+
+	dst->caller_name = src->caller_name;
+	dst->fh = src->fh;
+	dst->oh = src->oh;
+	dst->svid = src->svid;
+	dst->l_offset = src->l_offset;
+	dst->l_len = src->l_len;
+}
+
+static void
+nlm_convert_to_nlm4_holder(struct nlm4_holder *dst, struct nlm_holder *src)
+{
+
+	dst->exclusive = src->exclusive;
+	dst->svid = src->svid;
+	dst->oh = src->oh;
+	dst->l_offset = src->l_offset;
+	dst->l_len = src->l_len;
+}
+
+static void
+nlm_convert_to_nlm4_res(struct nlm4_res *dst, struct nlm_res *src)
+{
+	dst->cookie = src->cookie;
+	dst->stat.stat = (enum nlm4_stats) src->stat.stat;
+}
+
+static enum clnt_stat
+nlm_test_rpc(rpcvers_t vers, nlm4_testargs *args, nlm4_testres *res, CLIENT *client,
+    struct rpc_callextra *ext, struct timeval timo)
+{
+	if (vers == NLM_VERS4) {
+		return nlm4_test_4(args, res, client, ext, timo);
+	} else {
+		nlm_testargs args1;
+		nlm_testres res1;
+		enum clnt_stat stat;
+
+		args1.cookie = args->cookie;
+		args1.exclusive = args->exclusive;
+		nlm_convert_to_nlm_lock(&args1.alock, &args->alock);
+		memset(&res1, 0, sizeof(res1));
+
+		stat = nlm_test_1(&args1, &res1, client, ext, timo);
+
+		if (stat == RPC_SUCCESS) {
+			res->cookie = res1.cookie;
+			res->stat.stat = (enum nlm4_stats) res1.stat.stat;
+			if (res1.stat.stat == nlm_denied)
+				nlm_convert_to_nlm4_holder(
+					&res->stat.nlm4_testrply_u.holder,
+					&res1.stat.nlm_testrply_u.holder);
+		}
+
+		return (stat);
+	}
+}
+
+static enum clnt_stat
+nlm_lock_rpc(rpcvers_t vers, nlm4_lockargs *args, nlm4_res *res, CLIENT *client,
+    struct rpc_callextra *ext, struct timeval timo)
+{
+	if (vers == NLM_VERS4) {
+		return nlm4_lock_4(args, res, client, ext, timo);
+	} else {
+		nlm_lockargs args1;
+		nlm_res res1;
+		enum clnt_stat stat;
+
+		args1.cookie = args->cookie;
+		args1.block = args->block;
+		args1.exclusive = args->exclusive;
+		nlm_convert_to_nlm_lock(&args1.alock, &args->alock);
+		args1.reclaim = args->reclaim;
+		args1.state = args->state;
+		memset(&res1, 0, sizeof(res1));
+
+		stat = nlm_lock_1(&args1, &res1, client, ext, timo);
+
+		if (stat == RPC_SUCCESS) {
+			nlm_convert_to_nlm4_res(res, &res1);
+		}
+
+		return (stat);
+	}
+}
+
+static enum clnt_stat
+nlm_cancel_rpc(rpcvers_t vers, nlm4_cancargs *args, nlm4_res *res, CLIENT *client,
+    struct rpc_callextra *ext, struct timeval timo)
+{
+	if (vers == NLM_VERS4) {
+		return nlm4_cancel_4(args, res, client, ext, timo);
+	} else {
+		nlm_cancargs args1;
+		nlm_res res1;
+		enum clnt_stat stat;
+
+		args1.cookie = args->cookie;
+		args1.block = args->block;
+		args1.exclusive = args->exclusive;
+		nlm_convert_to_nlm_lock(&args1.alock, &args->alock);
+		memset(&res1, 0, sizeof(res1));
+
+		stat = nlm_cancel_1(&args1, &res1, client, ext, timo);
+
+		if (stat == RPC_SUCCESS) {
+			nlm_convert_to_nlm4_res(res, &res1);
+		}
+
+		return (stat);
+	}
+}
+
+static enum clnt_stat
+nlm_unlock_rpc(rpcvers_t vers, nlm4_unlockargs *args, nlm4_res *res, CLIENT *client,
+    struct rpc_callextra *ext, struct timeval timo)
+{
+	if (vers == NLM_VERS4) {
+		return nlm4_unlock_4(args, res, client, ext, timo);
+	} else {
+		nlm_unlockargs args1;
+		nlm_res res1;
+		enum clnt_stat stat;
+
+		args1.cookie = args->cookie;
+		nlm_convert_to_nlm_lock(&args1.alock, &args->alock);
+		memset(&res1, 0, sizeof(res1));
+
+		stat = nlm_unlock_1(&args1, &res1, client, ext, timo);
+
+		if (stat == RPC_SUCCESS) {
+			nlm_convert_to_nlm4_res(res, &res1);
+		}
+
+		return (stat);
+	}
+}
+
+/*
+ * Called after a lock request (set or clear) succeeded. We record the
+ * details in the local lock manager. Note that since the remote
+ * server has granted the lock, we can be sure that it doesn't
+ * conflict with any other locks we have in the local lock manager.
+ *
+ * Since it is possible that host may also make NLM client requests to
+ * our NLM server, we use a different sysid value to record our own
+ * client locks.
+ *
+ * Note that since it is possible for us to receive replies from the
+ * server in a different order than the locks were granted (e.g. if
+ * many local threads are contending for the same lock), we must use a
+ * blocking operation when registering with the local lock manager.
+ * We expect that any actual wait will be rare and short hence we
+ * ignore signals for this.
+ */
+static void
+nlm_record_lock(struct vnode *vp, int op, struct flock *fl,
+    int svid, int sysid, off_t size)
+{
+	struct vop_advlockasync_args a;
+	struct flock newfl;
+	int error;
+
+	a.a_vp = vp;
+	a.a_id = NULL;
+	a.a_op = op;
+	a.a_fl = &newfl;
+	a.a_flags = F_REMOTE|F_WAIT|F_NOINTR;
+	a.a_task = NULL;
+	a.a_cookiep = NULL;
+	newfl.l_start = fl->l_start;
+	newfl.l_len = fl->l_len;
+	newfl.l_type = fl->l_type;
+	newfl.l_whence = fl->l_whence;
+	newfl.l_pid = svid;
+	newfl.l_sysid = NLM_SYSID_CLIENT | sysid;
+
+	error = lf_advlockasync(&a, &vp->v_lockf, size);
+	KASSERT(error == 0, ("Failed to register NFS lock locally - error=%d",
+		error));
+}
+
+static int
+nlm_setlock(struct nlm_host *host, struct rpc_callextra *ext,
+    rpcvers_t vers, struct timeval *timo, int retries,
+    struct vnode *vp, int op, struct flock *fl, int flags,
+    int svid, size_t fhlen, void *fh, off_t size, bool_t reclaim)
+{
+	struct nlm4_lockargs args;
+	char oh_space[32];
+	struct nlm4_res res;
+	u_int xid;
+	CLIENT *client;
+	enum clnt_stat stat;
+	int retry, block, exclusive;
+	void *wait_handle = NULL;
+	int error;
+
+	memset(&args, 0, sizeof(args));
+	memset(&res, 0, sizeof(res));
+
+	block = (flags & F_WAIT) ? TRUE : FALSE;
+	exclusive = (fl->l_type == F_WRLCK);
+
+	error = nlm_init_lock(fl, flags, svid, vers, fhlen, fh, size,
+	    &args.alock, oh_space);
+	if (error)
+		return (error);
+	args.block = block;
+	args.exclusive = exclusive;
+	args.reclaim = reclaim;
+	args.state = nlm_nsm_state;
+
+	retry = 5*hz;
+	for (;;) {
+		client = nlm_host_get_rpc(host);
+		if (!client)
+			return (ENOLCK); /* XXX retry? */
+
+		if (block)
+			wait_handle = nlm_register_wait_lock(&args.alock, vp);
+
+		xid = atomic_fetchadd_int(&nlm_xid, 1);
+		args.cookie.n_len = sizeof(xid);
+		args.cookie.n_bytes = (char*) &xid;
+
+		stat = nlm_lock_rpc(vers, &args, &res, client, ext, *timo);
+
+		CLNT_RELEASE(client);
+
+		if (stat != RPC_SUCCESS) {
+			if (block)
+				nlm_deregister_wait_lock(wait_handle);
+			if (retries) {
+				retries--;
+				continue;
+			}
+			return (EINVAL);
+		}
+
+		/*
+		 * Free res.cookie.
+		 */
+		xdr_free((xdrproc_t) xdr_nlm4_res, &res);
+
+		if (block && res.stat.stat != nlm4_blocked)
+			nlm_deregister_wait_lock(wait_handle);
+
+		if (res.stat.stat == nlm4_denied_grace_period) {
+			/*
+			 * The server has recently rebooted and is
+			 * giving old clients a change to reclaim
+			 * their locks. Wait for a few seconds and try
+			 * again.
+			 */
+			error = tsleep(&args, PCATCH, "nlmgrace", retry);
+			if (error && error != EWOULDBLOCK)
+				return (error);
+			retry = 2*retry;
+			if (retry > 30*hz)
+				retry = 30*hz;
+			continue;
+		}
+
+		if (block && res.stat.stat == nlm4_blocked) {
+			/*
+			 * The server should call us back with a
+			 * granted message when the lock succeeds. In
+			 * order to deal with broken servers, lost
+			 * granted messages and server reboots, we
+			 * will also re-try every few seconds.
+			 */
+			error = nlm_wait_lock(wait_handle, retry);
+			if (error == EWOULDBLOCK) {
+				retry = 2*retry;
+				if (retry > 30*hz)
+					retry = 30*hz;
+				continue;
+			}
+			if (error) {
+				/*
+				 * We need to call the server to
+				 * cancel our lock request.
+				 */
+				nlm4_cancargs cancel;
+
+				memset(&cancel, 0, sizeof(cancel));
+
+				xid = atomic_fetchadd_int(&nlm_xid, 1);
+				cancel.cookie.n_len = sizeof(xid);
+				cancel.cookie.n_bytes = (char*) &xid;
+				cancel.block = block;
+				cancel.exclusive = exclusive;
+				cancel.alock = args.alock;
+
+				do {
+					client = nlm_host_get_rpc(host);
+					if (!client)
+						/* XXX retry? */
+						return (ENOLCK);
+
+					stat = nlm_cancel_rpc(vers, &cancel,
+					    &res, client, ext, *timo);
+
+					CLNT_RELEASE(client);
+
+					if (stat != RPC_SUCCESS) {
+						/*
+						 * We need to cope
+						 * with temporary
+						 * network partitions
+						 * as well as server
+						 * reboots. This means
+						 * we have to keep
+						 * trying to cancel
+						 * until the server
+						 * wakes up again.
+						 */
+						pause("nlmcancel", 10*hz);
+					}
+				} while (stat != RPC_SUCCESS);
+
+				/*
+				 * Free res.cookie.
+				 */
+				xdr_free((xdrproc_t) xdr_nlm4_res, &res);
+
+				switch (res.stat.stat) {
+				case nlm_denied:
+					/*
+					 * There was nothing
+					 * to cancel. We are
+					 * going to go ahead
+					 * and assume we got
+					 * the lock.
+					 */
+					error = 0;
+					break;
+
+				case nlm4_denied_grace_period:
+					/*
+					 * The server has
+					 * recently rebooted -
+					 * treat this as a
+					 * successful
+					 * cancellation.
+					 */
+					break;
+
+				case nlm4_granted:
+					/*
+					 * We managed to
+					 * cancel.
+					 */
+					break;
+
+				default:
+					/*
+					 * Broken server
+					 * implementation -
+					 * can't really do
+					 * anything here.
+					 */
+					break;
+				}
+
+			}
+		} else {
+			error = nlm_map_status(res.stat.stat);
+		}
+
+		if (!error && !reclaim) {
+			nlm_record_lock(vp, op, fl, args.alock.svid,
+			    nlm_host_get_sysid(host), size);
+			nlm_host_monitor(host, 0);
+		}
+
+		return (error);
+	}
+}
+
+static int
+nlm_clearlock(struct nlm_host *host, struct rpc_callextra *ext,
+    rpcvers_t vers, struct timeval *timo, int retries,
+    struct vnode *vp, int op, struct flock *fl, int flags,
+    int svid, size_t fhlen, void *fh, off_t size)
+{
+	struct nlm4_unlockargs args;
+	char oh_space[32];
+	struct nlm4_res res;
+	u_int xid;
+	CLIENT *client;
+	enum clnt_stat stat;
+	int error;
+
+	memset(&args, 0, sizeof(args));
+	memset(&res, 0, sizeof(res));
+
+	error = nlm_init_lock(fl, flags, svid, vers, fhlen, fh, size,
+	    &args.alock, oh_space);
+	if (error)
+		return (error);
+
+	for (;;) {
+		client = nlm_host_get_rpc(host);
+		if (!client)
+			return (ENOLCK); /* XXX retry? */
+
+		xid = atomic_fetchadd_int(&nlm_xid, 1);
+		args.cookie.n_len = sizeof(xid);
+		args.cookie.n_bytes = (char*) &xid;
+
+		stat = nlm_unlock_rpc(vers, &args, &res, client, ext, *timo);
+
+		CLNT_RELEASE(client);
+
+		if (stat != RPC_SUCCESS) {
+			if (retries) {
+				retries--;
+				continue;
+			}
+			return (EINVAL);
+		}
+
+		/*
+		 * Free res.cookie.
+		 */
+		xdr_free((xdrproc_t) xdr_nlm4_res, &res);
+
+		if (res.stat.stat == nlm4_denied_grace_period) {
+			/*
+			 * The server has recently rebooted and is
+			 * giving old clients a change to reclaim
+			 * their locks. Wait for a few seconds and try
+			 * again.
+			 */
+			error = tsleep(&args, PCATCH, "nlmgrace", 5*hz);
+			if (error && error != EWOULDBLOCK)
+				return (error);
+			continue;
+		}
+
+		/*
+		 * If we are being called via nlm_reclaim (which will
+		 * use the F_REMOTE flag), don't record the lock
+		 * operation in the local lock manager since the vnode
+		 * is going away.
+		 */
+		if (!(flags & F_REMOTE))
+			nlm_record_lock(vp, op, fl, args.alock.svid,
+			    nlm_host_get_sysid(host), size);
+
+		return (0);
+	}
+}
+
+static int
+nlm_getlock(struct nlm_host *host, struct rpc_callextra *ext,
+    rpcvers_t vers, struct timeval *timo, int retries,
+    struct vnode *vp, int op, struct flock *fl, int flags,
+    int svid, size_t fhlen, void *fh, off_t size)
+{
+	struct nlm4_testargs args;
+	char oh_space[32];
+	struct nlm4_testres res;
+	u_int xid;
+	CLIENT *client;
+	enum clnt_stat stat;
+	int exclusive;
+	int error;
+
+	KASSERT(!(flags & F_FLOCK), ("unexpected F_FLOCK for F_GETLK"));
+
+	memset(&args, 0, sizeof(args));
+	memset(&res, 0, sizeof(res));
+
+	exclusive = (fl->l_type == F_WRLCK);
+
+	error = nlm_init_lock(fl, flags, svid, vers, fhlen, fh, size,
+	    &args.alock, oh_space);
+	if (error)
+		return (error);
+	args.exclusive = exclusive;
+
+	for (;;) {
+		client = nlm_host_get_rpc(host);
+		if (!client)
+			return (ENOLCK); /* XXX retry? */
+
+		xid = atomic_fetchadd_int(&nlm_xid, 1);
+		args.cookie.n_len = sizeof(xid);
+		args.cookie.n_bytes = (char*) &xid;
+
+		stat = nlm_test_rpc(vers, &args, &res, client, ext, *timo);
+
+		CLNT_RELEASE(client);
+
+		if (stat != RPC_SUCCESS) {
+			if (retries) {
+				retries--;
+				continue;
+			}
+			return (EINVAL);
+		}
+
+		if (res.stat.stat == nlm4_denied_grace_period) {
+			/*
+			 * The server has recently rebooted and is
+			 * giving old clients a change to reclaim
+			 * their locks. Wait for a few seconds and try
+			 * again.
+			 */
+			xdr_free((xdrproc_t) xdr_nlm4_testres, &res);
+			error = tsleep(&args, PCATCH, "nlmgrace", 5*hz);
+			if (error && error != EWOULDBLOCK)
+				return (error);
+			continue;
+		}
+
+		if (res.stat.stat == nlm4_denied) {
+			struct nlm4_holder *h =
+				&res.stat.nlm4_testrply_u.holder;
+			fl->l_start = h->l_offset;
+			fl->l_len = h->l_len;
+			fl->l_pid = h->svid;
+			if (h->exclusive)
+				fl->l_type = F_WRLCK;
+			else
+				fl->l_type = F_RDLCK;
+			fl->l_whence = SEEK_SET;
+			fl->l_sysid = 0;
+		} else {
+			fl->l_type = F_UNLCK;
+		}
+
+		xdr_free((xdrproc_t) xdr_nlm4_testres, &res);
+
+		return (0);
+	}
+}
+
+static int
+nlm_map_status(nlm4_stats stat)
+{
+	switch (stat) {
+	case nlm4_granted:
+		return (0);
+
+	case nlm4_denied:
+		return (EAGAIN);
+
+	case nlm4_denied_nolocks:
+		return (ENOLCK);
+
+	case nlm4_deadlck:
+		return (EDEADLK);
+
+	case nlm4_rofs:
+		return (EROFS);
+
+	case nlm4_stale_fh:
+		return (ESTALE);
+
+	case nlm4_fbig:
+		return (EFBIG);
+
+	case nlm4_failed:
+		return (EACCES);
+
+	default:
+		return (EINVAL);
+	}
+}
+
+static struct nlm_file_svid *
+nlm_find_svid(void *id)
+{
+	struct nlm_file_svid *ns, *newns;
+	int h;
+
+	h = (((uintptr_t) id) >> 7) % NLM_SVID_HASH_SIZE;
+
+	mtx_lock(&nlm_svid_lock);
+	LIST_FOREACH(ns, &nlm_file_svids[h], ns_link) {
+		if (ns->ns_id == id) {
+			ns->ns_refs++;
+			break;
+		}
+	}
+	mtx_unlock(&nlm_svid_lock);
+	if (!ns) {
+		int svid = alloc_unr(nlm_svid_allocator);
+		newns = malloc(sizeof(struct nlm_file_svid), M_NLM,
+		    M_WAITOK);
+		newns->ns_refs = 1;
+		newns->ns_id = id;
+		newns->ns_svid = svid;
+		newns->ns_ucred = NULL;
+		newns->ns_active = FALSE;
+
+		/*
+		 * We need to check for a race with some other
+		 * thread allocating a svid for this file.
+		 */
+		mtx_lock(&nlm_svid_lock);
+		LIST_FOREACH(ns, &nlm_file_svids[h], ns_link) {
+			if (ns->ns_id == id) {
+				ns->ns_refs++;
+				break;
+			}
+		}
+		if (ns) {
+			mtx_unlock(&nlm_svid_lock);
+			free_unr(nlm_svid_allocator, newns->ns_svid);
+			free(newns, M_NLM);
+		} else {
+			LIST_INSERT_HEAD(&nlm_file_svids[h], newns,
+			    ns_link);
+			ns = newns;
+			mtx_unlock(&nlm_svid_lock);
+		}
+	}
+
+	return (ns);
+}
+
+static void
+nlm_free_svid(struct nlm_file_svid *ns)
+{
+
+	mtx_lock(&nlm_svid_lock);
+	ns->ns_refs--;
+	if (!ns->ns_refs) {
+		KASSERT(!ns->ns_active, ("Freeing active SVID"));
+		LIST_REMOVE(ns, ns_link);
+		mtx_unlock(&nlm_svid_lock);
+		free_unr(nlm_svid_allocator, ns->ns_svid);
+		if (ns->ns_ucred)
+			crfree(ns->ns_ucred);
+		free(ns, M_NLM);
+	} else {
+		mtx_unlock(&nlm_svid_lock);
+	}
+}
+
+static int
+nlm_init_lock(struct flock *fl, int flags, int svid,
+    rpcvers_t vers, size_t fhlen, void *fh, off_t size,
+    struct nlm4_lock *lock, char oh_space[32])
+{
+	size_t oh_len;
+	off_t start, len;
+
+	if (fl->l_whence == SEEK_END) {
+		if (size > OFF_MAX
+		    || (fl->l_start > 0 && size > OFF_MAX - fl->l_start))
+			return (EOVERFLOW);
+		start = size + fl->l_start;
+	} else if (fl->l_whence == SEEK_SET || fl->l_whence == SEEK_CUR) {
+		start = fl->l_start;
+	} else {
+		return (EINVAL);
+	}
+	if (start < 0)
+		return (EINVAL);
+	if (fl->l_len < 0) {
+		len = -fl->l_len;
+		start -= len;
+		if (start < 0)
+			return (EINVAL);
+	} else {
+		len = fl->l_len;
+	}
+
+	if (vers == NLM_VERS) {
+		/*
+		 * Enforce range limits on V1 locks
+		 */
+		if (start > 0xffffffffLL || len > 0xffffffffLL)
+			return (EOVERFLOW);
+	}
+
+	snprintf(oh_space, 32, "%d@%s", svid, hostname);
+	oh_len = strlen(oh_space);
+
+	memset(lock, 0, sizeof(*lock));
+	lock->caller_name = hostname;
+	lock->fh.n_len = fhlen;
+	lock->fh.n_bytes = fh;
+	lock->oh.n_len = oh_len;
+	lock->oh.n_bytes = oh_space;
+	lock->svid = svid;
+	lock->l_offset = start;
+	lock->l_len = len;
+
+	return (0);
+}
diff --git a/sys/nlm/nlm_prot.h b/sys/nlm/nlm_prot.h
index 6197189..98c5688 100644
--- a/sys/nlm/nlm_prot.h
+++ b/sys/nlm/nlm_prot.h
@@ -280,129 +280,129 @@ typedef struct nlm4_notify nlm4_notify;
 #define	NLM_SM ((unsigned long)(0))
 
 #define	NLM_SM_NOTIFY ((unsigned long)(1))
-extern  enum clnt_stat nlm_sm_notify_0(struct nlm_sm_status *, void *, CLIENT *);
+extern  enum clnt_stat nlm_sm_notify_0(struct nlm_sm_status *, void *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm_sm_notify_0_svc(struct nlm_sm_status *, void *, struct svc_req *);
 #define	NLM_VERS ((unsigned long)(1))
 
 #define	NLM_TEST ((unsigned long)(1))
-extern  enum clnt_stat nlm_test_1(struct nlm_testargs *, nlm_testres *, CLIENT *);
+extern  enum clnt_stat nlm_test_1(struct nlm_testargs *, nlm_testres *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm_test_1_svc(struct nlm_testargs *, nlm_testres *, struct svc_req *);
 #define	NLM_LOCK ((unsigned long)(2))
-extern  enum clnt_stat nlm_lock_1(struct nlm_lockargs *, nlm_res *, CLIENT *);
+extern  enum clnt_stat nlm_lock_1(struct nlm_lockargs *, nlm_res *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm_lock_1_svc(struct nlm_lockargs *, nlm_res *, struct svc_req *);
 #define	NLM_CANCEL ((unsigned long)(3))
-extern  enum clnt_stat nlm_cancel_1(struct nlm_cancargs *, nlm_res *, CLIENT *);
+extern  enum clnt_stat nlm_cancel_1(struct nlm_cancargs *, nlm_res *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm_cancel_1_svc(struct nlm_cancargs *, nlm_res *, struct svc_req *);
 #define	NLM_UNLOCK ((unsigned long)(4))
-extern  enum clnt_stat nlm_unlock_1(struct nlm_unlockargs *, nlm_res *, CLIENT *);
+extern  enum clnt_stat nlm_unlock_1(struct nlm_unlockargs *, nlm_res *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm_unlock_1_svc(struct nlm_unlockargs *, nlm_res *, struct svc_req *);
 #define	NLM_GRANTED ((unsigned long)(5))
-extern  enum clnt_stat nlm_granted_1(struct nlm_testargs *, nlm_res *, CLIENT *);
+extern  enum clnt_stat nlm_granted_1(struct nlm_testargs *, nlm_res *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm_granted_1_svc(struct nlm_testargs *, nlm_res *, struct svc_req *);
 #define	NLM_TEST_MSG ((unsigned long)(6))
-extern  enum clnt_stat nlm_test_msg_1(struct nlm_testargs *, void *, CLIENT *);
+extern  enum clnt_stat nlm_test_msg_1(struct nlm_testargs *, void *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm_test_msg_1_svc(struct nlm_testargs *, void *, struct svc_req *);
 #define	NLM_LOCK_MSG ((unsigned long)(7))
-extern  enum clnt_stat nlm_lock_msg_1(struct nlm_lockargs *, void *, CLIENT *);
+extern  enum clnt_stat nlm_lock_msg_1(struct nlm_lockargs *, void *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm_lock_msg_1_svc(struct nlm_lockargs *, void *, struct svc_req *);
 #define	NLM_CANCEL_MSG ((unsigned long)(8))
-extern  enum clnt_stat nlm_cancel_msg_1(struct nlm_cancargs *, void *, CLIENT *);
+extern  enum clnt_stat nlm_cancel_msg_1(struct nlm_cancargs *, void *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm_cancel_msg_1_svc(struct nlm_cancargs *, void *, struct svc_req *);
 #define	NLM_UNLOCK_MSG ((unsigned long)(9))
-extern  enum clnt_stat nlm_unlock_msg_1(struct nlm_unlockargs *, void *, CLIENT *);
+extern  enum clnt_stat nlm_unlock_msg_1(struct nlm_unlockargs *, void *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm_unlock_msg_1_svc(struct nlm_unlockargs *, void *, struct svc_req *);
 #define	NLM_GRANTED_MSG ((unsigned long)(10))
-extern  enum clnt_stat nlm_granted_msg_1(struct nlm_testargs *, void *, CLIENT *);
+extern  enum clnt_stat nlm_granted_msg_1(struct nlm_testargs *, void *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm_granted_msg_1_svc(struct nlm_testargs *, void *, struct svc_req *);
 #define	NLM_TEST_RES ((unsigned long)(11))
-extern  enum clnt_stat nlm_test_res_1(nlm_testres *, void *, CLIENT *);
+extern  enum clnt_stat nlm_test_res_1(nlm_testres *, void *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm_test_res_1_svc(nlm_testres *, void *, struct svc_req *);
 #define	NLM_LOCK_RES ((unsigned long)(12))
-extern  enum clnt_stat nlm_lock_res_1(nlm_res *, void *, CLIENT *);
+extern  enum clnt_stat nlm_lock_res_1(nlm_res *, void *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm_lock_res_1_svc(nlm_res *, void *, struct svc_req *);
 #define	NLM_CANCEL_RES ((unsigned long)(13))
-extern  enum clnt_stat nlm_cancel_res_1(nlm_res *, void *, CLIENT *);
+extern  enum clnt_stat nlm_cancel_res_1(nlm_res *, void *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm_cancel_res_1_svc(nlm_res *, void *, struct svc_req *);
 #define	NLM_UNLOCK_RES ((unsigned long)(14))
-extern  enum clnt_stat nlm_unlock_res_1(nlm_res *, void *, CLIENT *);
+extern  enum clnt_stat nlm_unlock_res_1(nlm_res *, void *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm_unlock_res_1_svc(nlm_res *, void *, struct svc_req *);
 #define	NLM_GRANTED_RES ((unsigned long)(15))
-extern  enum clnt_stat nlm_granted_res_1(nlm_res *, void *, CLIENT *);
+extern  enum clnt_stat nlm_granted_res_1(nlm_res *, void *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm_granted_res_1_svc(nlm_res *, void *, struct svc_req *);
 extern int nlm_prog_1_freeresult(SVCXPRT *, xdrproc_t, caddr_t);
 #define	NLM_VERSX ((unsigned long)(3))
 
 #define	NLM_SHARE ((unsigned long)(20))
-extern  enum clnt_stat nlm_share_3(nlm_shareargs *, nlm_shareres *, CLIENT *);
+extern  enum clnt_stat nlm_share_3(nlm_shareargs *, nlm_shareres *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm_share_3_svc(nlm_shareargs *, nlm_shareres *, struct svc_req *);
 #define	NLM_UNSHARE ((unsigned long)(21))
-extern  enum clnt_stat nlm_unshare_3(nlm_shareargs *, nlm_shareres *, CLIENT *);
+extern  enum clnt_stat nlm_unshare_3(nlm_shareargs *, nlm_shareres *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm_unshare_3_svc(nlm_shareargs *, nlm_shareres *, struct svc_req *);
 #define	NLM_NM_LOCK ((unsigned long)(22))
-extern  enum clnt_stat nlm_nm_lock_3(nlm_lockargs *, nlm_res *, CLIENT *);
+extern  enum clnt_stat nlm_nm_lock_3(nlm_lockargs *, nlm_res *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm_nm_lock_3_svc(nlm_lockargs *, nlm_res *, struct svc_req *);
 #define	NLM_FREE_ALL ((unsigned long)(23))
-extern  enum clnt_stat nlm_free_all_3(nlm_notify *, void *, CLIENT *);
+extern  enum clnt_stat nlm_free_all_3(nlm_notify *, void *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm_free_all_3_svc(nlm_notify *, void *, struct svc_req *);
 extern int nlm_prog_3_freeresult(SVCXPRT *, xdrproc_t, caddr_t);
 #define	NLM_VERS4 ((unsigned long)(4))
 
 #define	NLM4_TEST ((unsigned long)(1))
-extern  enum clnt_stat nlm4_test_4(nlm4_testargs *, nlm4_testres *, CLIENT *);
+extern  enum clnt_stat nlm4_test_4(nlm4_testargs *, nlm4_testres *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm4_test_4_svc(nlm4_testargs *, nlm4_testres *, struct svc_req *);
 #define	NLM4_LOCK ((unsigned long)(2))
-extern  enum clnt_stat nlm4_lock_4(nlm4_lockargs *, nlm4_res *, CLIENT *);
+extern  enum clnt_stat nlm4_lock_4(nlm4_lockargs *, nlm4_res *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm4_lock_4_svc(nlm4_lockargs *, nlm4_res *, struct svc_req *);
 #define	NLM4_CANCEL ((unsigned long)(3))
-extern  enum clnt_stat nlm4_cancel_4(nlm4_cancargs *, nlm4_res *, CLIENT *);
+extern  enum clnt_stat nlm4_cancel_4(nlm4_cancargs *, nlm4_res *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm4_cancel_4_svc(nlm4_cancargs *, nlm4_res *, struct svc_req *);
 #define	NLM4_UNLOCK ((unsigned long)(4))
-extern  enum clnt_stat nlm4_unlock_4(nlm4_unlockargs *, nlm4_res *, CLIENT *);
+extern  enum clnt_stat nlm4_unlock_4(nlm4_unlockargs *, nlm4_res *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm4_unlock_4_svc(nlm4_unlockargs *, nlm4_res *, struct svc_req *);
 #define	NLM4_GRANTED ((unsigned long)(5))
-extern  enum clnt_stat nlm4_granted_4(nlm4_testargs *, nlm4_res *, CLIENT *);
+extern  enum clnt_stat nlm4_granted_4(nlm4_testargs *, nlm4_res *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm4_granted_4_svc(nlm4_testargs *, nlm4_res *, struct svc_req *);
 #define	NLM4_TEST_MSG ((unsigned long)(6))
-extern  enum clnt_stat nlm4_test_msg_4(nlm4_testargs *, void *, CLIENT *);
+extern  enum clnt_stat nlm4_test_msg_4(nlm4_testargs *, void *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm4_test_msg_4_svc(nlm4_testargs *, void *, struct svc_req *);
 #define	NLM4_LOCK_MSG ((unsigned long)(7))
-extern  enum clnt_stat nlm4_lock_msg_4(nlm4_lockargs *, void *, CLIENT *);
+extern  enum clnt_stat nlm4_lock_msg_4(nlm4_lockargs *, void *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm4_lock_msg_4_svc(nlm4_lockargs *, void *, struct svc_req *);
 #define	NLM4_CANCEL_MSG ((unsigned long)(8))
-extern  enum clnt_stat nlm4_cancel_msg_4(nlm4_cancargs *, void *, CLIENT *);
+extern  enum clnt_stat nlm4_cancel_msg_4(nlm4_cancargs *, void *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm4_cancel_msg_4_svc(nlm4_cancargs *, void *, struct svc_req *);
 #define	NLM4_UNLOCK_MSG ((unsigned long)(9))
-extern  enum clnt_stat nlm4_unlock_msg_4(nlm4_unlockargs *, void *, CLIENT *);
+extern  enum clnt_stat nlm4_unlock_msg_4(nlm4_unlockargs *, void *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm4_unlock_msg_4_svc(nlm4_unlockargs *, void *, struct svc_req *);
 #define	NLM4_GRANTED_MSG ((unsigned long)(10))
-extern  enum clnt_stat nlm4_granted_msg_4(nlm4_testargs *, void *, CLIENT *);
+extern  enum clnt_stat nlm4_granted_msg_4(nlm4_testargs *, void *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm4_granted_msg_4_svc(nlm4_testargs *, void *, struct svc_req *);
 #define	NLM4_TEST_RES ((unsigned long)(11))
-extern  enum clnt_stat nlm4_test_res_4(nlm4_testres *, void *, CLIENT *);
+extern  enum clnt_stat nlm4_test_res_4(nlm4_testres *, void *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm4_test_res_4_svc(nlm4_testres *, void *, struct svc_req *);
 #define	NLM4_LOCK_RES ((unsigned long)(12))
-extern  enum clnt_stat nlm4_lock_res_4(nlm4_res *, void *, CLIENT *);
+extern  enum clnt_stat nlm4_lock_res_4(nlm4_res *, void *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm4_lock_res_4_svc(nlm4_res *, void *, struct svc_req *);
 #define	NLM4_CANCEL_RES ((unsigned long)(13))
-extern  enum clnt_stat nlm4_cancel_res_4(nlm4_res *, void *, CLIENT *);
+extern  enum clnt_stat nlm4_cancel_res_4(nlm4_res *, void *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm4_cancel_res_4_svc(nlm4_res *, void *, struct svc_req *);
 #define	NLM4_UNLOCK_RES ((unsigned long)(14))
-extern  enum clnt_stat nlm4_unlock_res_4(nlm4_res *, void *, CLIENT *);
+extern  enum clnt_stat nlm4_unlock_res_4(nlm4_res *, void *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm4_unlock_res_4_svc(nlm4_res *, void *, struct svc_req *);
 #define	NLM4_GRANTED_RES ((unsigned long)(15))
-extern  enum clnt_stat nlm4_granted_res_4(nlm4_res *, void *, CLIENT *);
+extern  enum clnt_stat nlm4_granted_res_4(nlm4_res *, void *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm4_granted_res_4_svc(nlm4_res *, void *, struct svc_req *);
 #define	NLM4_SHARE ((unsigned long)(20))
-extern  enum clnt_stat nlm4_share_4(nlm4_shareargs *, nlm4_shareres *, CLIENT *);
+extern  enum clnt_stat nlm4_share_4(nlm4_shareargs *, nlm4_shareres *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm4_share_4_svc(nlm4_shareargs *, nlm4_shareres *, struct svc_req *);
 #define	NLM4_UNSHARE ((unsigned long)(21))
-extern  enum clnt_stat nlm4_unshare_4(nlm4_shareargs *, nlm4_shareres *, CLIENT *);
+extern  enum clnt_stat nlm4_unshare_4(nlm4_shareargs *, nlm4_shareres *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm4_unshare_4_svc(nlm4_shareargs *, nlm4_shareres *, struct svc_req *);
 #define	NLM4_NM_LOCK ((unsigned long)(22))
-extern  enum clnt_stat nlm4_nm_lock_4(nlm4_lockargs *, nlm4_res *, CLIENT *);
+extern  enum clnt_stat nlm4_nm_lock_4(nlm4_lockargs *, nlm4_res *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm4_nm_lock_4_svc(nlm4_lockargs *, nlm4_res *, struct svc_req *);
 #define	NLM4_FREE_ALL ((unsigned long)(23))
-extern  enum clnt_stat nlm4_free_all_4(nlm4_notify *, void *, CLIENT *);
+extern  enum clnt_stat nlm4_free_all_4(nlm4_notify *, void *, CLIENT *, struct rpc_callextra *, struct timeval);
 extern  bool_t nlm4_free_all_4_svc(nlm4_notify *, void *, struct svc_req *);
 extern int nlm_prog_4_freeresult(SVCXPRT *, xdrproc_t, caddr_t);
 
diff --git a/sys/nlm/nlm_prot_clnt.c b/sys/nlm/nlm_prot_clnt.c
index 9a16e32..a268e63 100644
--- a/sys/nlm/nlm_prot_clnt.c
+++ b/sys/nlm/nlm_prot_clnt.c
@@ -17,356 +17,353 @@ __RCSID("$NetBSD: nlm_prot.x,v 1.6 2000/06/07 14:30:15 bouyer Exp $");
 #endif /* not lint */
 __FBSDID("$FreeBSD$");
 
-/* Default timeout can be changed using clnt_control() */
-static struct timeval TIMEOUT = { 25, 0 };
-
 enum clnt_stat 
-nlm_sm_notify_0(struct nlm_sm_status *argp, void *clnt_res, CLIENT *clnt)
+nlm_sm_notify_0(struct nlm_sm_status *argp, void *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM_SM_NOTIFY,
+	return (CLNT_CALL_EXT(clnt, ext, NLM_SM_NOTIFY,
 		(xdrproc_t) xdr_nlm_sm_status, (caddr_t) argp,
 		(xdrproc_t) xdr_void, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm_test_1(struct nlm_testargs *argp, nlm_testres *clnt_res, CLIENT *clnt)
+nlm_test_1(struct nlm_testargs *argp, nlm_testres *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM_TEST,
+	return (CLNT_CALL_EXT(clnt, ext, NLM_TEST,
 		(xdrproc_t) xdr_nlm_testargs, (caddr_t) argp,
 		(xdrproc_t) xdr_nlm_testres, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm_lock_1(struct nlm_lockargs *argp, nlm_res *clnt_res, CLIENT *clnt)
+nlm_lock_1(struct nlm_lockargs *argp, nlm_res *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM_LOCK,
+	return (CLNT_CALL_EXT(clnt, ext, NLM_LOCK,
 		(xdrproc_t) xdr_nlm_lockargs, (caddr_t) argp,
 		(xdrproc_t) xdr_nlm_res, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm_cancel_1(struct nlm_cancargs *argp, nlm_res *clnt_res, CLIENT *clnt)
+nlm_cancel_1(struct nlm_cancargs *argp, nlm_res *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM_CANCEL,
+	return (CLNT_CALL_EXT(clnt, ext, NLM_CANCEL,
 		(xdrproc_t) xdr_nlm_cancargs, (caddr_t) argp,
 		(xdrproc_t) xdr_nlm_res, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm_unlock_1(struct nlm_unlockargs *argp, nlm_res *clnt_res, CLIENT *clnt)
+nlm_unlock_1(struct nlm_unlockargs *argp, nlm_res *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM_UNLOCK,
+	return (CLNT_CALL_EXT(clnt, ext, NLM_UNLOCK,
 		(xdrproc_t) xdr_nlm_unlockargs, (caddr_t) argp,
 		(xdrproc_t) xdr_nlm_res, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm_granted_1(struct nlm_testargs *argp, nlm_res *clnt_res, CLIENT *clnt)
+nlm_granted_1(struct nlm_testargs *argp, nlm_res *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM_GRANTED,
+	return (CLNT_CALL_EXT(clnt, ext, NLM_GRANTED,
 		(xdrproc_t) xdr_nlm_testargs, (caddr_t) argp,
 		(xdrproc_t) xdr_nlm_res, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm_test_msg_1(struct nlm_testargs *argp, void *clnt_res, CLIENT *clnt)
+nlm_test_msg_1(struct nlm_testargs *argp, void *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM_TEST_MSG,
+	return (CLNT_CALL_EXT(clnt, ext, NLM_TEST_MSG,
 		(xdrproc_t) xdr_nlm_testargs, (caddr_t) argp,
 		(xdrproc_t) xdr_void, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm_lock_msg_1(struct nlm_lockargs *argp, void *clnt_res, CLIENT *clnt)
+nlm_lock_msg_1(struct nlm_lockargs *argp, void *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM_LOCK_MSG,
+	return (CLNT_CALL_EXT(clnt, ext, NLM_LOCK_MSG,
 		(xdrproc_t) xdr_nlm_lockargs, (caddr_t) argp,
 		(xdrproc_t) xdr_void, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm_cancel_msg_1(struct nlm_cancargs *argp, void *clnt_res, CLIENT *clnt)
+nlm_cancel_msg_1(struct nlm_cancargs *argp, void *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM_CANCEL_MSG,
+	return (CLNT_CALL_EXT(clnt, ext, NLM_CANCEL_MSG,
 		(xdrproc_t) xdr_nlm_cancargs, (caddr_t) argp,
 		(xdrproc_t) xdr_void, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm_unlock_msg_1(struct nlm_unlockargs *argp, void *clnt_res, CLIENT *clnt)
+nlm_unlock_msg_1(struct nlm_unlockargs *argp, void *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM_UNLOCK_MSG,
+	return (CLNT_CALL_EXT(clnt, ext, NLM_UNLOCK_MSG,
 		(xdrproc_t) xdr_nlm_unlockargs, (caddr_t) argp,
 		(xdrproc_t) xdr_void, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm_granted_msg_1(struct nlm_testargs *argp, void *clnt_res, CLIENT *clnt)
+nlm_granted_msg_1(struct nlm_testargs *argp, void *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM_GRANTED_MSG,
+	return (CLNT_CALL_EXT(clnt, ext, NLM_GRANTED_MSG,
 		(xdrproc_t) xdr_nlm_testargs, (caddr_t) argp,
 		(xdrproc_t) xdr_void, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm_test_res_1(nlm_testres *argp, void *clnt_res, CLIENT *clnt)
+nlm_test_res_1(nlm_testres *argp, void *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM_TEST_RES,
+	return (CLNT_CALL_EXT(clnt, ext, NLM_TEST_RES,
 		(xdrproc_t) xdr_nlm_testres, (caddr_t) argp,
 		(xdrproc_t) xdr_void, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm_lock_res_1(nlm_res *argp, void *clnt_res, CLIENT *clnt)
+nlm_lock_res_1(nlm_res *argp, void *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM_LOCK_RES,
+	return (CLNT_CALL_EXT(clnt, ext, NLM_LOCK_RES,
 		(xdrproc_t) xdr_nlm_res, (caddr_t) argp,
 		(xdrproc_t) xdr_void, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm_cancel_res_1(nlm_res *argp, void *clnt_res, CLIENT *clnt)
+nlm_cancel_res_1(nlm_res *argp, void *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM_CANCEL_RES,
+	return (CLNT_CALL_EXT(clnt, ext, NLM_CANCEL_RES,
 		(xdrproc_t) xdr_nlm_res, (caddr_t) argp,
 		(xdrproc_t) xdr_void, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm_unlock_res_1(nlm_res *argp, void *clnt_res, CLIENT *clnt)
+nlm_unlock_res_1(nlm_res *argp, void *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM_UNLOCK_RES,
+	return (CLNT_CALL_EXT(clnt, ext, NLM_UNLOCK_RES,
 		(xdrproc_t) xdr_nlm_res, (caddr_t) argp,
 		(xdrproc_t) xdr_void, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm_granted_res_1(nlm_res *argp, void *clnt_res, CLIENT *clnt)
+nlm_granted_res_1(nlm_res *argp, void *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM_GRANTED_RES,
+	return (CLNT_CALL_EXT(clnt, ext, NLM_GRANTED_RES,
 		(xdrproc_t) xdr_nlm_res, (caddr_t) argp,
 		(xdrproc_t) xdr_void, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm_share_3(nlm_shareargs *argp, nlm_shareres *clnt_res, CLIENT *clnt)
+nlm_share_3(nlm_shareargs *argp, nlm_shareres *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM_SHARE,
+	return (CLNT_CALL_EXT(clnt, ext, NLM_SHARE,
 		(xdrproc_t) xdr_nlm_shareargs, (caddr_t) argp,
 		(xdrproc_t) xdr_nlm_shareres, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm_unshare_3(nlm_shareargs *argp, nlm_shareres *clnt_res, CLIENT *clnt)
+nlm_unshare_3(nlm_shareargs *argp, nlm_shareres *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM_UNSHARE,
+	return (CLNT_CALL_EXT(clnt, ext, NLM_UNSHARE,
 		(xdrproc_t) xdr_nlm_shareargs, (caddr_t) argp,
 		(xdrproc_t) xdr_nlm_shareres, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm_nm_lock_3(nlm_lockargs *argp, nlm_res *clnt_res, CLIENT *clnt)
+nlm_nm_lock_3(nlm_lockargs *argp, nlm_res *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM_NM_LOCK,
+	return (CLNT_CALL_EXT(clnt, ext, NLM_NM_LOCK,
 		(xdrproc_t) xdr_nlm_lockargs, (caddr_t) argp,
 		(xdrproc_t) xdr_nlm_res, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm_free_all_3(nlm_notify *argp, void *clnt_res, CLIENT *clnt)
+nlm_free_all_3(nlm_notify *argp, void *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM_FREE_ALL,
+	return (CLNT_CALL_EXT(clnt, ext, NLM_FREE_ALL,
 		(xdrproc_t) xdr_nlm_notify, (caddr_t) argp,
 		(xdrproc_t) xdr_void, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm4_test_4(nlm4_testargs *argp, nlm4_testres *clnt_res, CLIENT *clnt)
+nlm4_test_4(nlm4_testargs *argp, nlm4_testres *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM4_TEST,
+	return (CLNT_CALL_EXT(clnt, ext, NLM4_TEST,
 		(xdrproc_t) xdr_nlm4_testargs, (caddr_t) argp,
 		(xdrproc_t) xdr_nlm4_testres, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm4_lock_4(nlm4_lockargs *argp, nlm4_res *clnt_res, CLIENT *clnt)
+nlm4_lock_4(nlm4_lockargs *argp, nlm4_res *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM4_LOCK,
+	return (CLNT_CALL_EXT(clnt, ext, NLM4_LOCK,
 		(xdrproc_t) xdr_nlm4_lockargs, (caddr_t) argp,
 		(xdrproc_t) xdr_nlm4_res, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm4_cancel_4(nlm4_cancargs *argp, nlm4_res *clnt_res, CLIENT *clnt)
+nlm4_cancel_4(nlm4_cancargs *argp, nlm4_res *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM4_CANCEL,
+	return (CLNT_CALL_EXT(clnt, ext, NLM4_CANCEL,
 		(xdrproc_t) xdr_nlm4_cancargs, (caddr_t) argp,
 		(xdrproc_t) xdr_nlm4_res, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm4_unlock_4(nlm4_unlockargs *argp, nlm4_res *clnt_res, CLIENT *clnt)
+nlm4_unlock_4(nlm4_unlockargs *argp, nlm4_res *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM4_UNLOCK,
+	return (CLNT_CALL_EXT(clnt, ext, NLM4_UNLOCK,
 		(xdrproc_t) xdr_nlm4_unlockargs, (caddr_t) argp,
 		(xdrproc_t) xdr_nlm4_res, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm4_granted_4(nlm4_testargs *argp, nlm4_res *clnt_res, CLIENT *clnt)
+nlm4_granted_4(nlm4_testargs *argp, nlm4_res *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM4_GRANTED,
+	return (CLNT_CALL_EXT(clnt, ext, NLM4_GRANTED,
 		(xdrproc_t) xdr_nlm4_testargs, (caddr_t) argp,
 		(xdrproc_t) xdr_nlm4_res, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm4_test_msg_4(nlm4_testargs *argp, void *clnt_res, CLIENT *clnt)
+nlm4_test_msg_4(nlm4_testargs *argp, void *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM4_TEST_MSG,
+	return (CLNT_CALL_EXT(clnt, ext, NLM4_TEST_MSG,
 		(xdrproc_t) xdr_nlm4_testargs, (caddr_t) argp,
 		(xdrproc_t) xdr_void, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm4_lock_msg_4(nlm4_lockargs *argp, void *clnt_res, CLIENT *clnt)
+nlm4_lock_msg_4(nlm4_lockargs *argp, void *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM4_LOCK_MSG,
+	return (CLNT_CALL_EXT(clnt, ext, NLM4_LOCK_MSG,
 		(xdrproc_t) xdr_nlm4_lockargs, (caddr_t) argp,
 		(xdrproc_t) xdr_void, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm4_cancel_msg_4(nlm4_cancargs *argp, void *clnt_res, CLIENT *clnt)
+nlm4_cancel_msg_4(nlm4_cancargs *argp, void *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM4_CANCEL_MSG,
+	return (CLNT_CALL_EXT(clnt, ext, NLM4_CANCEL_MSG,
 		(xdrproc_t) xdr_nlm4_cancargs, (caddr_t) argp,
 		(xdrproc_t) xdr_void, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm4_unlock_msg_4(nlm4_unlockargs *argp, void *clnt_res, CLIENT *clnt)
+nlm4_unlock_msg_4(nlm4_unlockargs *argp, void *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM4_UNLOCK_MSG,
+	return (CLNT_CALL_EXT(clnt, ext, NLM4_UNLOCK_MSG,
 		(xdrproc_t) xdr_nlm4_unlockargs, (caddr_t) argp,
 		(xdrproc_t) xdr_void, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm4_granted_msg_4(nlm4_testargs *argp, void *clnt_res, CLIENT *clnt)
+nlm4_granted_msg_4(nlm4_testargs *argp, void *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM4_GRANTED_MSG,
+	return (CLNT_CALL_EXT(clnt, ext, NLM4_GRANTED_MSG,
 		(xdrproc_t) xdr_nlm4_testargs, (caddr_t) argp,
 		(xdrproc_t) xdr_void, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm4_test_res_4(nlm4_testres *argp, void *clnt_res, CLIENT *clnt)
+nlm4_test_res_4(nlm4_testres *argp, void *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM4_TEST_RES,
+	return (CLNT_CALL_EXT(clnt, ext, NLM4_TEST_RES,
 		(xdrproc_t) xdr_nlm4_testres, (caddr_t) argp,
 		(xdrproc_t) xdr_void, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm4_lock_res_4(nlm4_res *argp, void *clnt_res, CLIENT *clnt)
+nlm4_lock_res_4(nlm4_res *argp, void *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM4_LOCK_RES,
+	return (CLNT_CALL_EXT(clnt, ext, NLM4_LOCK_RES,
 		(xdrproc_t) xdr_nlm4_res, (caddr_t) argp,
 		(xdrproc_t) xdr_void, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm4_cancel_res_4(nlm4_res *argp, void *clnt_res, CLIENT *clnt)
+nlm4_cancel_res_4(nlm4_res *argp, void *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM4_CANCEL_RES,
+	return (CLNT_CALL_EXT(clnt, ext, NLM4_CANCEL_RES,
 		(xdrproc_t) xdr_nlm4_res, (caddr_t) argp,
 		(xdrproc_t) xdr_void, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm4_unlock_res_4(nlm4_res *argp, void *clnt_res, CLIENT *clnt)
+nlm4_unlock_res_4(nlm4_res *argp, void *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM4_UNLOCK_RES,
+	return (CLNT_CALL_EXT(clnt, ext, NLM4_UNLOCK_RES,
 		(xdrproc_t) xdr_nlm4_res, (caddr_t) argp,
 		(xdrproc_t) xdr_void, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm4_granted_res_4(nlm4_res *argp, void *clnt_res, CLIENT *clnt)
+nlm4_granted_res_4(nlm4_res *argp, void *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM4_GRANTED_RES,
+	return (CLNT_CALL_EXT(clnt, ext, NLM4_GRANTED_RES,
 		(xdrproc_t) xdr_nlm4_res, (caddr_t) argp,
 		(xdrproc_t) xdr_void, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm4_share_4(nlm4_shareargs *argp, nlm4_shareres *clnt_res, CLIENT *clnt)
+nlm4_share_4(nlm4_shareargs *argp, nlm4_shareres *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM4_SHARE,
+	return (CLNT_CALL_EXT(clnt, ext, NLM4_SHARE,
 		(xdrproc_t) xdr_nlm4_shareargs, (caddr_t) argp,
 		(xdrproc_t) xdr_nlm4_shareres, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm4_unshare_4(nlm4_shareargs *argp, nlm4_shareres *clnt_res, CLIENT *clnt)
+nlm4_unshare_4(nlm4_shareargs *argp, nlm4_shareres *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM4_UNSHARE,
+	return (CLNT_CALL_EXT(clnt, ext, NLM4_UNSHARE,
 		(xdrproc_t) xdr_nlm4_shareargs, (caddr_t) argp,
 		(xdrproc_t) xdr_nlm4_shareres, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm4_nm_lock_4(nlm4_lockargs *argp, nlm4_res *clnt_res, CLIENT *clnt)
+nlm4_nm_lock_4(nlm4_lockargs *argp, nlm4_res *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM4_NM_LOCK,
+	return (CLNT_CALL_EXT(clnt, ext, NLM4_NM_LOCK,
 		(xdrproc_t) xdr_nlm4_lockargs, (caddr_t) argp,
 		(xdrproc_t) xdr_nlm4_res, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
 
 enum clnt_stat 
-nlm4_free_all_4(nlm4_notify *argp, void *clnt_res, CLIENT *clnt)
+nlm4_free_all_4(nlm4_notify *argp, void *clnt_res, CLIENT *clnt, struct rpc_callextra *ext, struct timeval timo)
 {
-	return (clnt_call(clnt, NLM4_FREE_ALL,
+	return (CLNT_CALL_EXT(clnt, ext, NLM4_FREE_ALL,
 		(xdrproc_t) xdr_nlm4_notify, (caddr_t) argp,
 		(xdrproc_t) xdr_void, (caddr_t) clnt_res,
-		TIMEOUT));
+		timo));
 }
diff --git a/sys/nlm/nlm_prot_impl.c b/sys/nlm/nlm_prot_impl.c
index 4baa48f..7647ae5 100644
--- a/sys/nlm/nlm_prot_impl.c
+++ b/sys/nlm/nlm_prot_impl.c
@@ -33,6 +33,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/fcntl.h>
 #include <sys/kernel.h>
+#include <sys/kthread.h>
 #include <sys/lockf.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
@@ -51,6 +52,10 @@ __FBSDID("$FreeBSD$");
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 
+#include <nfs/nfsproto.h>
+#include <nfsclient/nfs.h>
+#include <nfsclient/nfsnode.h>
+
 #include <nlm/nlm_prot.h>
 #include <nlm/sm_inter.h>
 #include <nlm/nlm.h>
@@ -131,21 +136,53 @@ static struct socket *nlm_socket6;
 static CLIENT *nlm_nsm;
 
 /*
- * An RPC client handle that can be used to communicate with the
- * userland part of lockd.
+ * An AUTH handle for the server's creds.
  */
-static CLIENT *nlm_lockd;
+static AUTH *nlm_auth;
+
+/*
+ * A zero timeval for sending async RPC messages.
+ */
+struct timeval nlm_zero_tv = { 0, 0 };
+
+/*
+ * The local NSM state number
+ */
+int nlm_nsm_state;
+
+
+/*
+ * A lock to protect the host list and waiting lock list.
+ */
+static struct mtx nlm_global_lock;
 
 /*
  * Locks:
  * (l)		locked by nh_lock
  * (s)		only accessed via server RPC which is single threaded
+ * (g)		locked by nlm_global_lock
  * (c)		const until freeing
+ * (a)		modified using atomic ops
+ */
+
+/*
+ * A pending client-side lock request, stored on the nlm_waiting_locks
+ * list.
  */
+struct nlm_waiting_lock {
+	TAILQ_ENTRY(nlm_waiting_lock) nw_link; /* (g) */
+	bool_t		nw_waiting;	       /* (g) */
+	nlm4_lock	nw_lock;	       /* (c) */
+	union nfsfh	nw_fh;		       /* (c) */
+	struct vnode	*nw_vp;		       /* (c) */
+};
+TAILQ_HEAD(nlm_waiting_lock_list, nlm_waiting_lock);
+
+struct nlm_waiting_lock_list nlm_waiting_locks; /* (g) */
 
 /*
- * A pending asynchronous lock request, stored on the nh_pending list
- * of the NLM host.
+ * A pending server-side asynchronous lock request, stored on the
+ * nh_pending list of the NLM host.
  */
 struct nlm_async_lock {
 	TAILQ_ENTRY(nlm_async_lock) af_link; /* (l) host's list of locks */
@@ -154,6 +191,7 @@ struct nlm_async_lock {
 	struct vnode	*af_vp;		/* (l) vnode to lock */
 	struct flock	af_fl;		/* (c) lock details */
 	struct nlm_host *af_host;	/* (c) host which is locking */
+	CLIENT		*af_rpc;	/* (c) rpc client to send message */
 	nlm4_testargs	af_granted;	/* (c) notification details */
 };
 TAILQ_HEAD(nlm_async_lock_list, nlm_async_lock);
@@ -164,19 +202,21 @@ TAILQ_HEAD(nlm_async_lock_list, nlm_async_lock);
 enum nlm_host_state {
 	NLM_UNMONITORED,
 	NLM_MONITORED,
-	NLM_MONITOR_FAILED
+	NLM_MONITOR_FAILED,
+	NLM_RECOVERING
 };
 struct nlm_host {
 	struct mtx	nh_lock;
-	TAILQ_ENTRY(nlm_host) nh_link; /* (s) global list of hosts */
-	char		*nh_caller_name; /* (c) printable name of host */
+	volatile u_int	nh_refs;       /* (a) reference count */
+	TAILQ_ENTRY(nlm_host) nh_link; /* (g) global list of hosts */
+	char		nh_caller_name[MAXNAMELEN]; /* (c) printable name of host */
 	uint32_t	nh_sysid;	 /* (c) our allocaed system ID */
 	char		nh_sysid_string[10]; /* (c) string rep. of sysid */
 	struct sockaddr_storage	nh_addr; /* (s) remote address of host */
-	CLIENT		*nh_rpc;	 /* (s) RPC handle to send to host */
+	CLIENT		*nh_rpc;	 /* (l) RPC handle to send to host */
 	rpcvers_t	nh_vers;	 /* (s) NLM version of host */
 	int		nh_state;	 /* (s) last seen NSM state of host */
-	enum nlm_host_state nh_monstate; /* (s) local NSM monitoring state */
+	enum nlm_host_state nh_monstate; /* (l) local NSM monitoring state */
 	time_t		nh_idle_timeout; /* (s) Time at which host is idle */
 	time_t		nh_rpc_create_time; /* (s) Time we create RPC client */
 	struct sysctl_ctx_list nh_sysctl; /* (c) vfs.nlm.sysid nodes */
@@ -185,8 +225,8 @@ struct nlm_host {
 };
 TAILQ_HEAD(nlm_host_list, nlm_host);
 
-static struct nlm_host_list nlm_hosts;
-static uint32_t nlm_next_sysid = 1;
+static struct nlm_host_list nlm_hosts; /* (g) */
+static uint32_t nlm_next_sysid = 1;    /* (g) */
 
 static void	nlm_host_unmonitor(struct nlm_host *);
 
@@ -200,6 +240,8 @@ nlm_init(void *dummy)
 {
 	int error;
 
+	mtx_init(&nlm_global_lock, "nlm_global_lock", NULL, MTX_DEF);
+	TAILQ_INIT(&nlm_waiting_locks);
 	TAILQ_INIT(&nlm_hosts);
 
 	error = syscall_register(&nlm_syscall_offset, &nlm_syscall_sysent,
@@ -381,7 +423,7 @@ again:
 	CLNT_CONTROL(rpcb, CLSET_PROG, &prog);
 	CLNT_CONTROL(rpcb, CLSET_VERS, &vers);
 	CLNT_CONTROL(rpcb, CLSET_WAITCHAN, &wchan);
-	rpcb->cl_auth = authunix_create(curthread->td_ucred);
+	rpcb->cl_auth = nlm_auth;
 
 	return (rpcb);
 }
@@ -394,6 +436,7 @@ static void
 nlm_lock_callback(void *arg, int pending)
 {
 	struct nlm_async_lock *af = (struct nlm_async_lock *) arg;
+	struct rpc_callextra ext;
 
 	if (nlm_debug_level >= 2)
 		printf("NLM: async lock %p for %s (sysid %d) granted\n",
@@ -408,9 +451,11 @@ nlm_lock_callback(void *arg, int pending)
 	 * thing nlm_host_notify does is to cancel pending async lock
 	 * requests.
 	 */
+	memset(&ext, 0, sizeof(ext));
+	ext.rc_auth = nlm_auth;
 	if (af->af_host->nh_vers == NLM_VERS4) {
 		nlm4_granted_msg_4(&af->af_granted,
-		    NULL, af->af_host->nh_rpc);
+		    NULL, af->af_rpc, &ext, nlm_zero_tv);
 	} else {
 		/*
 		 * Back-convert to legacy protocol
@@ -429,7 +474,7 @@ nlm_lock_callback(void *arg, int pending)
 			af->af_granted.alock.l_len;
 
 		nlm_granted_msg_1(&granted,
-		    NULL, af->af_host->nh_rpc);
+		    NULL, af->af_rpc, &ext, nlm_zero_tv);
 	}
 
 	/*
@@ -456,6 +501,8 @@ nlm_free_async_lock(struct nlm_async_lock *af)
 	/*
 	 * Free an async lock.
 	 */
+	if (af->af_rpc)
+		CLNT_RELEASE(af->af_rpc);
 	xdr_free((xdrproc_t) xdr_nlm4_testargs, &af->af_granted);
 	if (af->af_vp)
 		vrele(af->af_vp);
@@ -527,11 +574,57 @@ nlm_free_finished_locks(struct nlm_host *host)
 }
 
 /*
- * This is called when we receive a host state change
- * notification. We unlock any active locks owned by the host.
+ * Free resources used by a host. This is called after the reference
+ * count has reached zero so it doesn't need to worry about locks.
  */
 static void
-nlm_host_notify(struct nlm_host *host, int newstate, bool_t destroy)
+nlm_host_destroy(struct nlm_host *host)
+{
+
+	mtx_lock(&nlm_global_lock);
+	TAILQ_REMOVE(&nlm_hosts, host, nh_link);
+	mtx_unlock(&nlm_global_lock);
+
+	if (host->nh_rpc)
+		CLNT_RELEASE(host->nh_rpc);
+	mtx_destroy(&host->nh_lock);
+	sysctl_ctx_free(&host->nh_sysctl);
+	free(host, M_NLM);
+}
+
+/*
+ * Thread start callback for client lock recovery
+ */
+static void
+nlm_client_recovery_start(void *arg)
+{
+	struct nlm_host *host = (struct nlm_host *) arg;
+
+	if (nlm_debug_level >= 1)
+		printf("NLM: client lock recovery for %s started\n",
+		    host->nh_caller_name);
+
+	nlm_client_recovery(host);
+
+	if (nlm_debug_level >= 1)
+		printf("NLM: client lock recovery for %s completed\n",
+		    host->nh_caller_name);
+
+	host->nh_monstate = NLM_MONITORED;
+	nlm_host_release(host);
+
+	kthread_exit();
+}
+
+/*
+ * This is called when we receive a host state change notification. We
+ * unlock any active locks owned by the host. When rpc.lockd is
+ * shutting down, this function is called with newstate set to zero
+ * which allows us to cancel any pending async locks and clear the
+ * locking state.
+ */
+static void
+nlm_host_notify(struct nlm_host *host, int newstate)
 {
 	struct nlm_async_lock *af;
 
@@ -557,28 +650,24 @@ nlm_host_notify(struct nlm_host *host, int newstate, bool_t destroy)
 	nlm_free_finished_locks(host);
 
 	/*
-	 * The host just rebooted - trash its locks and forget any
-	 * RPC client handle that we may have for it.
+	 * The host just rebooted - trash its locks.
 	 */
 	lf_clearremotesys(host->nh_sysid);
-	if (host->nh_rpc) {
-		AUTH_DESTROY(host->nh_rpc->cl_auth);
-		CLNT_DESTROY(host->nh_rpc);
-		host->nh_rpc = NULL;
-	}
 	host->nh_state = newstate;
 
 	/*
-	 * Destroy the host if the caller believes that it won't be
-	 * used again. This is safe enough - if we see the same name
-	 * again, we will just create a new host.
+	 * If we have any remote locks for this host (i.e. it
+	 * represents a remote NFS server that our local NFS client
+	 * has locks for), start a recovery thread.
 	 */
-	if (destroy) {
-		TAILQ_REMOVE(&nlm_hosts, host, nh_link);
-		mtx_destroy(&host->nh_lock);
-		sysctl_ctx_free(&host->nh_sysctl);
-		free(host->nh_caller_name, M_NLM);
-		free(host, M_NLM);
+	if (newstate != 0
+	    && host->nh_monstate != NLM_RECOVERING
+	    && lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid) > 0) {
+		struct thread *td;
+		host->nh_monstate = NLM_RECOVERING;
+		refcount_acquire(&host->nh_refs);
+		kthread_add(nlm_client_recovery_start, host, curproc, &td, 0, 0,
+		    "NFS lock recovery for %s", host->nh_caller_name);
 	}
 }
 
@@ -597,6 +686,20 @@ nlm_host_lock_count_sysctl(SYSCTL_HANDLER_ARGS)
 }
 
 /*
+ * Sysctl handler to count the number of client locks for a sysid.
+ */
+static int
+nlm_host_client_lock_count_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct nlm_host *host;
+	int count;
+
+	host = oidp->oid_arg1;
+	count = lf_countlocks(NLM_SYSID_CLIENT | host->nh_sysid);
+	return sysctl_handle_int(oidp, &count, 0, req);
+}
+
+/*
  * Create a new NLM host.
  */
 static struct nlm_host *
@@ -605,12 +708,17 @@ nlm_create_host(const char* caller_name)
 	struct nlm_host *host;
 	struct sysctl_oid *oid;
 
+	mtx_assert(&nlm_global_lock, MA_OWNED);
+
 	if (nlm_debug_level >= 1)
 		printf("NLM: new host %s (sysid %d)\n",
 		    caller_name, nlm_next_sysid);
-	host = malloc(sizeof(struct nlm_host), M_NLM, M_WAITOK|M_ZERO);
+	host = malloc(sizeof(struct nlm_host), M_NLM, M_NOWAIT|M_ZERO);
+	if (!host)
+		return (NULL);
 	mtx_init(&host->nh_lock, "nh_lock", NULL, MTX_DEF);
-	host->nh_caller_name = strdup(caller_name, M_NLM);
+	host->nh_refs = 1;
+	strlcpy(host->nh_caller_name, caller_name, MAXNAMELEN);
 	host->nh_sysid = nlm_next_sysid++;
 	snprintf(host->nh_sysid_string, sizeof(host->nh_sysid_string),
 		"%d", host->nh_sysid);
@@ -622,6 +730,8 @@ nlm_create_host(const char* caller_name)
 	TAILQ_INIT(&host->nh_finished);
 	TAILQ_INSERT_TAIL(&nlm_hosts, host, nh_link);
 
+	mtx_unlock(&nlm_global_lock);
+
 	sysctl_ctx_init(&host->nh_sysctl);
 	oid = SYSCTL_ADD_NODE(&host->nh_sysctl,
 	    SYSCTL_STATIC_CHILDREN(_vfs_nlm_sysid),
@@ -635,6 +745,11 @@ nlm_create_host(const char* caller_name)
 	SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO,
 	    "lock_count", CTLTYPE_INT | CTLFLAG_RD, host, 0,
 	    nlm_host_lock_count_sysctl, "I", "");
+	SYSCTL_ADD_PROC(&host->nh_sysctl, SYSCTL_CHILDREN(oid), OID_AUTO,
+	    "client_lock_count", CTLTYPE_INT | CTLFLAG_RD, host, 0,
+	    nlm_host_client_lock_count_sysctl, "I", "");
+
+	mtx_lock(&nlm_global_lock);
 
 	return (host);
 }
@@ -683,6 +798,8 @@ nlm_check_idle(void)
 {
 	struct nlm_host *host;
 
+	mtx_assert(&nlm_global_lock, MA_OWNED);
+
 	if (time_uptime <= nlm_next_idle_check)
 		return;
 
@@ -691,12 +808,17 @@ nlm_check_idle(void)
 	TAILQ_FOREACH(host, &nlm_hosts, nh_link) {
 		if (host->nh_monstate == NLM_MONITORED
 		    && time_uptime > host->nh_idle_timeout) {
-			if (lf_countlocks(host->nh_sysid) > 0) {
+			mtx_unlock(&nlm_global_lock);
+			if (lf_countlocks(host->nh_sysid) > 0
+			    || lf_countlocks(NLM_SYSID_CLIENT
+				+ host->nh_sysid)) {
 				host->nh_idle_timeout =
 					time_uptime + NLM_IDLE_TIMEOUT;
+				mtx_lock(&nlm_global_lock);
 				continue;
 			}
 			nlm_host_unmonitor(host);
+			mtx_lock(&nlm_global_lock);
 		} 
 	}
 }
@@ -704,16 +826,18 @@ nlm_check_idle(void)
 /*
  * Search for an existing NLM host that matches the given name
  * (typically the caller_name element of an nlm4_lock).  If none is
- * found, create a new host. If 'rqstp' is non-NULL, record the remote
+ * found, create a new host. If 'addr' is non-NULL, record the remote
  * address of the host so that we can call it back for async
- * responses.
+ * responses. If 'vers' is greater than zero then record the NLM
+ * program version to use to communicate with this client.
  */
 struct nlm_host *
-nlm_find_host_by_name(const char *name, struct svc_req *rqstp)
+nlm_find_host_by_name(const char *name, const struct sockaddr *addr,
+    rpcvers_t vers)
 {
 	struct nlm_host *host;
 
-	nlm_check_idle();
+	mtx_lock(&nlm_global_lock);
 
 	/*
 	 * The remote host is determined by caller_name.
@@ -723,18 +847,24 @@ nlm_find_host_by_name(const char *name, struct svc_req *rqstp)
 			break;
 	}
 
-	if (!host)
+	if (!host) {
 		host = nlm_create_host(name);
+		if (!host) {
+			mtx_unlock(&nlm_global_lock);
+			return (NULL);
+		}
+	}
+	refcount_acquire(&host->nh_refs);
+
 	host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT;
 
 	/*
-	 * If we have an RPC request, record the remote address so
-	 * that can send async replies etc.
+	 * If we have an address for the host, record it so that we
+	 * can send async replies etc.
 	 */
-	if (rqstp) {
-		struct netbuf *addr = &rqstp->rq_xprt->xp_rtaddr;
+	if (addr) {
 		
-		KASSERT(addr->len < sizeof(struct sockaddr_storage),
+		KASSERT(addr->sa_len < sizeof(struct sockaddr_storage),
 		    ("Strange remote transport address length"));
 
 		/*
@@ -745,17 +875,26 @@ nlm_find_host_by_name(const char *name, struct svc_req *rqstp)
 		if (host->nh_addr.ss_len && host->nh_rpc) {
 			if (!nlm_compare_addr(
 				    (struct sockaddr *) &host->nh_addr,
-				    (struct sockaddr *) addr->buf)
-			    || host->nh_vers != rqstp->rq_vers) {
-				AUTH_DESTROY(host->nh_rpc->cl_auth);
-				CLNT_DESTROY(host->nh_rpc);
+				    addr)
+			    || host->nh_vers != vers) {
+				CLIENT *client;
+				mtx_lock(&host->nh_lock);
+				client = host->nh_rpc;
 				host->nh_rpc = NULL;
+				mtx_unlock(&host->nh_lock);
+				if (client) {
+					CLNT_RELEASE(client);
+				}
 			}
 		}
-		memcpy(&host->nh_addr, addr->buf, addr->len);
-		host->nh_vers = rqstp->rq_vers;
+		memcpy(&host->nh_addr, addr, addr->sa_len);
+		host->nh_vers = vers;
 	}
 
+	nlm_check_idle();
+
+	mtx_unlock(&nlm_global_lock);
+
 	return (host);
 }
 
@@ -768,9 +907,32 @@ nlm_find_host_by_name(const char *name, struct svc_req *rqstp)
 struct nlm_host *
 nlm_find_host_by_addr(const struct sockaddr *addr, int vers)
 {
+	/*
+	 * Fake up a name using inet_ntop. This buffer is
+	 * large enough for an IPv6 address.
+	 */
+	char tmp[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"];
 	struct nlm_host *host;
 
-	nlm_check_idle();
+	switch (addr->sa_family) {
+	case AF_INET:
+		__rpc_inet_ntop(AF_INET,
+		    &((const struct sockaddr_in *) addr)->sin_addr,
+		    tmp, sizeof tmp);
+		break;
+#ifdef INET6
+	case AF_INET6:
+		__rpc_inet_ntop(AF_INET6,
+		    &((const struct sockaddr_in6 *) addr)->sin6_addr,
+		    tmp, sizeof tmp);
+		break;
+#endif
+	default:
+		strcmp(tmp, "<unknown>");
+	}
+
+
+	mtx_lock(&nlm_global_lock);
 
 	/*
 	 * The remote host is determined by caller_name.
@@ -782,33 +944,22 @@ nlm_find_host_by_addr(const struct sockaddr *addr, int vers)
 	}
 
 	if (!host) {
-		/*
-		 * Fake up a name using inet_ntop. This buffer is
-		 * large enough for an IPv6 address.
-		 */
-		char tmp[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"];
-		switch (addr->sa_family) {
-		case AF_INET:
-			__rpc_inet_ntop(AF_INET,
-			    &((const struct sockaddr_in *) addr)->sin_addr,
-			    tmp, sizeof tmp);
-			break;
-#ifdef INET6
-		case AF_INET6:
-			__rpc_inet_ntop(AF_INET6,
-			    &((const struct sockaddr_in6 *) addr)->sin6_addr,
-			    tmp, sizeof tmp);
-			break;
-#endif
-		default:
-			strcmp(tmp, "<unknown>");
-		}
 		host = nlm_create_host(tmp);
+		if (!host) {
+			mtx_unlock(&nlm_global_lock);
+			return (NULL);
+		}
 		memcpy(&host->nh_addr, addr, addr->sa_len);
 		host->nh_vers = vers;
 	}
+	refcount_acquire(&host->nh_refs);
+
 	host->nh_idle_timeout = time_uptime + NLM_IDLE_TIMEOUT;
 
+	nlm_check_idle();
+
+	mtx_unlock(&nlm_global_lock);
+
 	return (host);
 }
 
@@ -822,13 +973,25 @@ nlm_find_host_by_sysid(int sysid)
 	struct nlm_host *host;
 
 	TAILQ_FOREACH(host, &nlm_hosts, nh_link) {
-		if (host->nh_sysid == sysid)
+		if (host->nh_sysid == sysid) {
+			refcount_acquire(&host->nh_refs);
 			return (host);
+		}
 	}
 
 	return (NULL);
 }
 
+void nlm_host_release(struct nlm_host *host)
+{
+	if (refcount_release(&host->nh_refs)) {
+		/*
+		 * Free the host
+		 */
+		nlm_host_destroy(host);
+	}
+}
+
 /*
  * Unregister this NLM host with the local NSM due to idleness.
  */
@@ -878,7 +1041,7 @@ nlm_host_unmonitor(struct nlm_host *host)
  * Register this NLM host with the local NSM so that we can be
  * notified if it reboots.
  */
-static void
+void
 nlm_host_monitor(struct nlm_host *host, int state)
 {
 	mon smmon;
@@ -898,8 +1061,13 @@ nlm_host_monitor(struct nlm_host *host, int state)
 			    host->nh_caller_name, host->nh_sysid, state);
 	}
 
-	if (host->nh_monstate != NLM_UNMONITORED)
+	mtx_lock(&host->nh_lock);
+	if (host->nh_monstate != NLM_UNMONITORED) {
+		mtx_unlock(&host->nh_lock);
 		return;
+	}
+	host->nh_monstate = NLM_MONITORED;
+	mtx_unlock(&host->nh_lock);
 
 	if (nlm_debug_level >= 1)
 		printf("NLM: monitoring %s (sysid %d)\n",
@@ -930,7 +1098,9 @@ nlm_host_monitor(struct nlm_host *host, int state)
 	if (smstat.res_stat == stat_fail) {
 		printf("Local NSM refuses to monitor %s\n",
 		    host->nh_caller_name);
+		mtx_lock(&host->nh_lock);
 		host->nh_monstate = NLM_MONITOR_FAILED;
+		mtx_unlock(&host->nh_lock);
 		return;
 	}
 
@@ -944,10 +1114,12 @@ nlm_host_monitor(struct nlm_host *host, int state)
 CLIENT *
 nlm_host_get_rpc(struct nlm_host *host)
 {
-	struct timeval zero;
+	CLIENT *client;
+
+	mtx_lock(&host->nh_lock);
 
 	/*
- 	 * We can't hold onto RPC handles for too long - the async
+	 * We can't hold onto RPC handles for too long - the async
 	 * call/reply protocol used by some NLM clients makes it hard
 	 * to tell when they change port numbers (e.g. after a
 	 * reboot). Note that if a client reboots while it isn't
@@ -955,33 +1127,138 @@ nlm_host_get_rpc(struct nlm_host *host)
 	 * expire the RPC handles after two minutes.
 	 */
 	if (host->nh_rpc && time_uptime > host->nh_rpc_create_time + 2*60) {
-		CLIENT *client;
 		client = host->nh_rpc;
 		host->nh_rpc = NULL;
-		CLNT_DESTROY(client);
+		mtx_unlock(&host->nh_lock);
+		CLNT_RELEASE(client);
+		mtx_lock(&host->nh_lock);
 	}
 
-	if (host->nh_rpc)
-		return (host->nh_rpc);
+	if (!host->nh_rpc) {
+		mtx_unlock(&host->nh_lock);
+		client = nlm_get_rpc((struct sockaddr *)&host->nh_addr,
+		    NLM_PROG, host->nh_vers);
+		mtx_lock(&host->nh_lock);
+
+		if (client) {
+			if (host->nh_rpc) {
+				mtx_unlock(&host->nh_lock);
+				CLNT_DESTROY(client);
+				mtx_lock(&host->nh_lock);
+			} else {
+				host->nh_rpc = client;
+				host->nh_rpc_create_time = time_uptime;
+			}
+		}
+	}
+
+	client = host->nh_rpc;
+	if (client)
+		CLNT_ACQUIRE(client);
+	mtx_unlock(&host->nh_lock);
+
+	return (client);
+
+}
+
+int nlm_host_get_sysid(struct nlm_host *host)
+{
+
+	return (host->nh_sysid);
+}
+
+int
+nlm_host_get_state(struct nlm_host *host)
+{
+
+	return (host->nh_state);
+}
+
+void *
+nlm_register_wait_lock(struct nlm4_lock *lock, struct vnode *vp)
+{
+	struct nlm_waiting_lock *nw;
+
+	nw = malloc(sizeof(struct nlm_waiting_lock), M_NLM, M_WAITOK);
+	nw->nw_lock = *lock;
+	memcpy(&nw->nw_fh.fh_bytes, nw->nw_lock.fh.n_bytes,
+	    nw->nw_lock.fh.n_len);
+	nw->nw_lock.fh.n_bytes = nw->nw_fh.fh_bytes;
+	nw->nw_waiting = TRUE;
+	nw->nw_vp = vp;
+	mtx_lock(&nlm_global_lock);
+	TAILQ_INSERT_TAIL(&nlm_waiting_locks, nw, nw_link);
+	mtx_unlock(&nlm_global_lock);
+
+	return nw;
+}
+
+void
+nlm_deregister_wait_lock(void *handle)
+{
+	struct nlm_waiting_lock *nw = handle;
+
+	mtx_lock(&nlm_global_lock);
+	TAILQ_REMOVE(&nlm_waiting_locks, nw, nw_link);
+	mtx_unlock(&nlm_global_lock);
+	
+	free(nw, M_NLM);
+}
+
+int
+nlm_wait_lock(void *handle, int timo)
+{
+	struct nlm_waiting_lock *nw = handle;
+	int error;
 
 	/*
-	 * Set the send timeout to zero - we only use this rpc handle
-	 * for sending async replies which have no return value.
+	 * If the granted message arrived before we got here,
+	 * nw->nw_waiting will be FALSE - in that case, don't sleep.
 	 */
-	host->nh_rpc = nlm_get_rpc((struct sockaddr *)&host->nh_addr,
-	    NLM_PROG, host->nh_vers);
+	mtx_lock(&nlm_global_lock);
+	error = 0;
+	if (nw->nw_waiting)
+		error = msleep(nw, &nlm_global_lock, PCATCH, "nlmlock", timo);
+	TAILQ_REMOVE(&nlm_waiting_locks, nw, nw_link);
+	if (error) {
+		/*
+		 * The granted message may arrive after the
+		 * interrupt/timeout but before we manage to lock the
+		 * mutex. Detect this by examining nw_lock.
+		 */
+		if (!nw->nw_waiting)
+			error = 0;
+	} else {
+		/*
+		 * If nlm_cancel_wait is called, then error will be
+		 * zero but nw_waiting will still be TRUE. We
+		 * translate this into EINTR.
+		 */
+		if (nw->nw_waiting)
+			error = EINTR;
+	}
+	mtx_unlock(&nlm_global_lock);
 
-	if (host->nh_rpc) {
-		zero.tv_sec = 0;
-		zero.tv_usec = 0;
-		CLNT_CONTROL(host->nh_rpc, CLSET_TIMEOUT, &zero);
+	free(nw, M_NLM);
 
-		host->nh_rpc_create_time = time_uptime;
-	}
+	return (error);
+}
+
+void
+nlm_cancel_wait(struct vnode *vp)
+{
+	struct nlm_waiting_lock *nw;
 
-	return (host->nh_rpc);
+	mtx_lock(&nlm_global_lock);
+	TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) {
+		if (nw->nw_vp == vp) {
+			wakeup(nw);
+		}
+	}
+	mtx_unlock(&nlm_global_lock);
 }
 
+
 /**********************************************************************/
 
 /*
@@ -1099,7 +1376,11 @@ nlm_server_main(int addr_count, char **addrs)
 	sm_stat smstat;
 	struct timeval timo;
 	enum clnt_stat stat;
-	struct nlm_host *host;
+	struct nlm_host *host, *nhost;
+	struct nlm_waiting_lock *nw;
+	vop_advlock_t *old_nfs_advlock;
+	vop_reclaim_t *old_nfs_reclaim;
+	int v4_used, v6_used;
 
 	if (nlm_socket) {
 		printf("NLM: can't start server - it appears to be running already\n");
@@ -1129,6 +1410,7 @@ nlm_server_main(int addr_count, char **addrs)
 	    td->td_ucred, td);
 	if (error) {
 		printf("NLM: can't create IPv6 socket - error %d\n", error);
+		goto out;
 		return (error);
 	}
 	opt.sopt_dir = SOPT_SET;
@@ -1140,6 +1422,8 @@ nlm_server_main(int addr_count, char **addrs)
 	sosetopt(nlm_socket6, &opt);
 #endif
 
+	nlm_auth = authunix_create(curthread->td_ucred);
+
 #ifdef INET6
 	memset(&sin6, 0, sizeof(sin6));
 	sin6.sin6_len = sizeof(sin6);
@@ -1191,36 +1475,88 @@ nlm_server_main(int addr_count, char **addrs)
 
 	if (nlm_debug_level >= 1)
 		printf("NLM: local NSM state is %d\n", smstat.state);
+	nlm_nsm_state = smstat.state;
+
+	old_nfs_advlock = nfs_advlock_p;
+	nfs_advlock_p = nlm_advlock;
+	old_nfs_reclaim = nfs_reclaim_p;
+	nfs_reclaim_p = nlm_reclaim;
 
 	svc_run(pool);
 	error = 0;
 
+	nfs_advlock_p = old_nfs_advlock;
+	nfs_reclaim_p = old_nfs_reclaim;
+
 out:
 	if (pool)
 		svcpool_destroy(pool);
 
 	/*
-	 * Trash all the existing state so that if the server
-	 * restarts, it gets a clean slate.
+	 * We are finished communicating with the NSM.
 	 */
-	while ((host = TAILQ_FIRST(&nlm_hosts)) != NULL) {
-		nlm_host_notify(host, 0, TRUE);
-	}
 	if (nlm_nsm) {
-		AUTH_DESTROY(nlm_nsm->cl_auth);
-		CLNT_DESTROY(nlm_nsm);
+		CLNT_RELEASE(nlm_nsm);
 		nlm_nsm = NULL;
 	}
-	if (nlm_lockd) {
-		AUTH_DESTROY(nlm_lockd->cl_auth);
-		CLNT_DESTROY(nlm_lockd);
-		nlm_lockd = NULL;
+
+	/*
+	 * Trash all the existing state so that if the server
+	 * restarts, it gets a clean slate. This is complicated by the
+	 * possibility that there may be other threads trying to make
+	 * client locking requests.
+	 *
+	 * First we fake a client reboot notification which will
+	 * cancel any pending async locks and purge remote lock state
+	 * from the local lock manager. We release the reference from
+	 * nlm_hosts to the host (which may remove it from the list
+	 * and free it). After this phase, the only entries in the
+	 * nlm_host list should be from other threads performing
+	 * client lock requests. We arrange to defer closing the
+	 * sockets until the last RPC client handle is released.
+	 */
+	v4_used = 0;
+#ifdef INET6
+	v6_used = 0;
+#endif
+	mtx_lock(&nlm_global_lock);
+	TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) {
+		wakeup(nw);
+	}
+	TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, nhost) {
+		mtx_unlock(&nlm_global_lock);
+		nlm_host_notify(host, 0);
+		nlm_host_release(host);
+		mtx_lock(&nlm_global_lock);
+	}
+	TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, nhost) {
+		mtx_lock(&host->nh_lock);
+		if (host->nh_rpc) {
+			if (host->nh_addr.ss_family == AF_INET)
+				v4_used++;
+#ifdef INET6
+			if (host->nh_addr.ss_family == AF_INET6)
+				v6_used++;
+#endif
+			/*
+			 * Note that the rpc over udp code copes
+			 * correctly with the fact that a socket may
+			 * be used by many rpc handles.
+			 */
+			CLNT_CONTROL(host->nh_rpc, CLSET_FD_CLOSE, 0);
+		}
+		mtx_unlock(&host->nh_lock);
 	}
+	mtx_unlock(&nlm_global_lock);
+
+	AUTH_DESTROY(nlm_auth);
 
-	soclose(nlm_socket);
+	if (!v4_used)
+		soclose(nlm_socket);
 	nlm_socket = NULL;
 #ifdef INET6
-	soclose(nlm_socket6);
+	if (!v6_used)
+		soclose(nlm_socket6);
 	nlm_socket6 = NULL;
 #endif
 
@@ -1264,8 +1600,10 @@ nlm_sm_notify(struct nlm_sm_status *argp)
 		printf("nlm_sm_notify(): mon_name = %s\n", argp->mon_name);
 	memcpy(&sysid, &argp->priv, sizeof(sysid));
 	host = nlm_find_host_by_sysid(sysid);
-	if (host)
-		nlm_host_notify(host, argp->state, FALSE);
+	if (host) {
+		nlm_host_notify(host, argp->state);
+		nlm_host_release(host);
+	}
 }
 
 static void
@@ -1372,8 +1710,9 @@ nlm_convert_error(int error)
 		return nlm4_failed;
 }
 
-struct nlm_host *
-nlm_do_test(nlm4_testargs *argp, nlm4_testres *result, struct svc_req *rqstp)
+int
+nlm_do_test(nlm4_testargs *argp, nlm4_testres *result, struct svc_req *rqstp,
+	CLIENT **rpcp)
 {
 	fhandle_t fh;
 	struct vfs_state vs;
@@ -1382,11 +1721,13 @@ nlm_do_test(nlm4_testargs *argp, nlm4_testres *result, struct svc_req *rqstp)
 	struct flock fl;
 	
 	memset(result, 0, sizeof(*result));
+	memset(&vs, 0, sizeof(vs));
 
-	host = nlm_find_host_by_name(argp->alock.caller_name, rqstp);
+	host = nlm_find_host_by_name(argp->alock.caller_name,
+	    (struct sockaddr *) rqstp->rq_xprt->xp_rtaddr.buf, rqstp->rq_vers);
 	if (!host) {
 		result->stat.stat = nlm4_denied_nolocks;
-		return (NULL);
+		return (ENOMEM);
 	}
 
 	if (nlm_debug_level >= 3)
@@ -1401,7 +1742,7 @@ nlm_do_test(nlm4_testargs *argp, nlm4_testres *result, struct svc_req *rqstp)
 
 	if (time_uptime < nlm_grace_threshold) {
 		result->stat.stat = nlm4_denied_grace_period;
-		return (host);
+		goto out;
 	}
 
 	error = nlm_get_vfs_state(host, rqstp, &fh, &vs);
@@ -1452,6 +1793,7 @@ nlm_do_test(nlm4_testargs *argp, nlm4_testres *result, struct svc_req *rqstp)
 			 * For the moment, return nothing in oh
 			 * (already zero'ed above).
 			 */
+			nlm_host_release(bhost);
 		}
 		result->stat.nlm4_testrply_u.holder.l_offset = fl.l_start;
 		result->stat.nlm4_testrply_u.holder.l_len = fl.l_len;
@@ -1459,12 +1801,15 @@ nlm_do_test(nlm4_testargs *argp, nlm4_testres *result, struct svc_req *rqstp)
 
 out:
 	nlm_release_vfs_state(&vs);
-	return (host);
+	if (rpcp)
+		*rpcp = nlm_host_get_rpc(host);
+	nlm_host_release(host);
+	return (0);
 }
 
-struct nlm_host *
+int
 nlm_do_lock(nlm4_lockargs *argp, nlm4_res *result, struct svc_req *rqstp,
-    bool_t monitor)
+    bool_t monitor, CLIENT **rpcp)
 {
 	fhandle_t fh;
 	struct vfs_state vs;
@@ -1473,11 +1818,13 @@ nlm_do_lock(nlm4_lockargs *argp, nlm4_res *result, struct svc_req *rqstp,
 	struct flock fl;
 	
 	memset(result, 0, sizeof(*result));
+	memset(&vs, 0, sizeof(vs));
 
-	host = nlm_find_host_by_name(argp->alock.caller_name, rqstp);
+	host = nlm_find_host_by_name(argp->alock.caller_name,
+	    (struct sockaddr *) rqstp->rq_xprt->xp_rtaddr.buf, rqstp->rq_vers);
 	if (!host) {
 		result->stat.stat = nlm4_denied_nolocks;
-		return (NULL);
+		return (ENOMEM);
 	}
 
 	if (nlm_debug_level >= 3)
@@ -1490,7 +1837,7 @@ nlm_do_lock(nlm4_lockargs *argp, nlm4_res *result, struct svc_req *rqstp,
 		 * The host rebooted without telling us. Trash its
 		 * locks.
 		 */
-		nlm_host_notify(host, argp->state, FALSE);
+		nlm_host_notify(host, argp->state);
 	}
 
 	nlm_free_finished_locks(host);
@@ -1501,7 +1848,7 @@ nlm_do_lock(nlm4_lockargs *argp, nlm4_res *result, struct svc_req *rqstp,
 
 	if (time_uptime < nlm_grace_threshold && !argp->reclaim) {
 		result->stat.stat = nlm4_denied_grace_period;
-		return (host);
+		goto out;
 	}
 
 	error = nlm_get_vfs_state(host, rqstp, &fh, &vs);
@@ -1521,11 +1868,13 @@ nlm_do_lock(nlm4_lockargs *argp, nlm4_res *result, struct svc_req *rqstp,
 		fl.l_type = F_RDLCK;
 	if (argp->block) {
 		struct nlm_async_lock *af;
+		CLIENT *client;
 
 		/*
 		 * First, make sure we can contact the host's NLM.
 		 */
-		if (!nlm_host_get_rpc(host)) {
+		client = nlm_host_get_rpc(host);
+		if (!client) {
 			result->stat.stat = nlm4_failed;
 			goto out;
 		}
@@ -1547,6 +1896,7 @@ nlm_do_lock(nlm4_lockargs *argp, nlm4_res *result, struct svc_req *rqstp,
 		}
 		mtx_unlock(&host->nh_lock);
 		if (af) {
+			CLNT_RELEASE(client);
 			result->stat.stat = nlm4_blocked;
 			goto out;
 		}
@@ -1557,6 +1907,7 @@ nlm_do_lock(nlm4_lockargs *argp, nlm4_res *result, struct svc_req *rqstp,
 		af->af_vp = vs.vs_vp;
 		af->af_fl = fl;
 		af->af_host = host;
+		af->af_rpc = client;
 		/*
 		 * We use M_RPC here so that we can xdr_free the thing
 		 * later.
@@ -1592,6 +1943,7 @@ nlm_do_lock(nlm4_lockargs *argp, nlm4_res *result, struct svc_req *rqstp,
 		 * tracking structure now.
 		 */
 		if (error != EINPROGRESS) {
+			CLNT_RELEASE(af->af_rpc);
 			mtx_lock(&host->nh_lock);
 			TAILQ_REMOVE(&host->nh_pending, af, af_link);
 			mtx_unlock(&host->nh_lock);
@@ -1632,12 +1984,15 @@ nlm_do_lock(nlm4_lockargs *argp, nlm4_res *result, struct svc_req *rqstp,
 
 out:
 	nlm_release_vfs_state(&vs);
-
-	return (host);
+	if (rpcp)
+		*rpcp = nlm_host_get_rpc(host);
+	nlm_host_release(host);
+	return (0);
 }
 
-struct nlm_host *
-nlm_do_cancel(nlm4_cancargs *argp, nlm4_res *result, struct svc_req *rqstp)
+int
+nlm_do_cancel(nlm4_cancargs *argp, nlm4_res *result, struct svc_req *rqstp,
+    CLIENT **rpcp)
 {
 	fhandle_t fh;
 	struct vfs_state vs;
@@ -1647,11 +2002,13 @@ nlm_do_cancel(nlm4_cancargs *argp, nlm4_res *result, struct svc_req *rqstp)
 	struct nlm_async_lock *af;
 	
 	memset(result, 0, sizeof(*result));
+	memset(&vs, 0, sizeof(vs));
 
-	host = nlm_find_host_by_name(argp->alock.caller_name, rqstp);
+	host = nlm_find_host_by_name(argp->alock.caller_name,
+	    (struct sockaddr *) rqstp->rq_xprt->xp_rtaddr.buf, rqstp->rq_vers);
 	if (!host) {
 		result->stat.stat = nlm4_denied_nolocks;
-		return (NULL);
+		return (ENOMEM);
 	}
 
 	if (nlm_debug_level >= 3)
@@ -1666,7 +2023,7 @@ nlm_do_cancel(nlm4_cancargs *argp, nlm4_res *result, struct svc_req *rqstp)
 
 	if (time_uptime < nlm_grace_threshold) {
 		result->stat.stat = nlm4_denied_grace_period;
-		return (host);
+		goto out;
 	}
 
 	error = nlm_get_vfs_state(host, rqstp, &fh, &vs);
@@ -1718,12 +2075,15 @@ nlm_do_cancel(nlm4_cancargs *argp, nlm4_res *result, struct svc_req *rqstp)
 
 out:
 	nlm_release_vfs_state(&vs);
-
-	return (host);
+	if (rpcp)
+		*rpcp = nlm_host_get_rpc(host);
+	nlm_host_release(host);
+	return (0);
 }
 
-struct nlm_host *
-nlm_do_unlock(nlm4_unlockargs *argp, nlm4_res *result, struct svc_req *rqstp)
+int
+nlm_do_unlock(nlm4_unlockargs *argp, nlm4_res *result, struct svc_req *rqstp,
+    CLIENT **rpcp)
 {
 	fhandle_t fh;
 	struct vfs_state vs;
@@ -1732,11 +2092,13 @@ nlm_do_unlock(nlm4_unlockargs *argp, nlm4_res *result, struct svc_req *rqstp)
 	struct flock fl;
 	
 	memset(result, 0, sizeof(*result));
+	memset(&vs, 0, sizeof(vs));
 
-	host = nlm_find_host_by_name(argp->alock.caller_name, rqstp);
+	host = nlm_find_host_by_name(argp->alock.caller_name,
+	    (struct sockaddr *) rqstp->rq_xprt->xp_rtaddr.buf, rqstp->rq_vers);
 	if (!host) {
 		result->stat.stat = nlm4_denied_nolocks;
-		return (NULL);
+		return (ENOMEM);
 	}
 
 	if (nlm_debug_level >= 3)
@@ -1751,7 +2113,7 @@ nlm_do_unlock(nlm4_unlockargs *argp, nlm4_res *result, struct svc_req *rqstp)
 
 	if (time_uptime < nlm_grace_threshold) {
 		result->stat.stat = nlm4_denied_grace_period;
-		return (host);
+		goto out;
 	}
 
 	error = nlm_get_vfs_state(host, rqstp, &fh, &vs);
@@ -1776,8 +2138,54 @@ nlm_do_unlock(nlm4_unlockargs *argp, nlm4_res *result, struct svc_req *rqstp)
 
 out:
 	nlm_release_vfs_state(&vs);
+	if (rpcp)
+		*rpcp = nlm_host_get_rpc(host);
+	nlm_host_release(host);
+	return (0);
+}
 
-	return (host);
+int
+nlm_do_granted(nlm4_testargs *argp, nlm4_res *result, struct svc_req *rqstp,
+
+    CLIENT **rpcp)
+{
+	struct nlm_host *host;
+	struct nlm_waiting_lock *nw;
+	
+	memset(result, 0, sizeof(*result));
+
+	host = nlm_find_host_by_addr(
+		(struct sockaddr *) rqstp->rq_xprt->xp_rtaddr.buf,
+		rqstp->rq_vers);
+	if (!host) {
+		result->stat.stat = nlm4_denied_nolocks;
+		return (ENOMEM);
+	}
+
+	nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC);
+	result->stat.stat = nlm4_denied;
+
+	mtx_lock(&nlm_global_lock);
+	TAILQ_FOREACH(nw, &nlm_waiting_locks, nw_link) {
+		if (!nw->nw_waiting)
+			continue;
+		if (argp->alock.svid == nw->nw_lock.svid
+		    && argp->alock.l_offset == nw->nw_lock.l_offset
+		    && argp->alock.l_len == nw->nw_lock.l_len
+		    && argp->alock.fh.n_len == nw->nw_lock.fh.n_len
+		    && !memcmp(argp->alock.fh.n_bytes, nw->nw_lock.fh.n_bytes,
+			nw->nw_lock.fh.n_len)) {
+			nw->nw_waiting = FALSE;
+			wakeup(nw);
+			result->stat.stat = nlm4_granted;
+			break;
+		}
+	}
+	mtx_unlock(&nlm_global_lock);
+	if (rpcp)
+		*rpcp = nlm_host_get_rpc(host);
+	nlm_host_release(host);
+	return (0);
 }
 
 void
@@ -1787,45 +2195,10 @@ nlm_do_free_all(nlm4_notify *argp)
 
 	TAILQ_FOREACH_SAFE(host, &nlm_hosts, nh_link, thost) {
 		if (!strcmp(host->nh_caller_name, argp->name))
-			nlm_host_notify(host, argp->state, FALSE);
+			nlm_host_notify(host, argp->state);
 	}
 }
 
-#define _PATH_RPCLOCKDSOCK	"/var/run/rpclockd.sock"
-
-/*
- * Make a connection to the userland lockd - we push anything we can't
- * handle out to userland.
- */
-CLIENT *
-nlm_user_lockd(void)
-{
-	struct sockaddr_un sun;
-	struct netconfig *nconf;
-	struct timeval zero;
-
-	if (nlm_lockd)
-		return (nlm_lockd);
-
-	sun.sun_family = AF_LOCAL;
-	strcpy(sun.sun_path, _PATH_RPCLOCKDSOCK);
-	sun.sun_len = SUN_LEN(&sun);
-
-	nconf = getnetconfigent("local");
-	nlm_lockd = clnt_reconnect_create(nconf, (struct sockaddr *) &sun,
-	    NLM_PROG, NLM_VERS4, RPC_MAXDATASIZE, RPC_MAXDATASIZE);
-
-	/*
-	 * Set the send timeout to zero - we only use this rpc handle
-	 * for sending async replies which have no return value.
-	 */
-	zero.tv_sec = 0;
-	zero.tv_usec = 0;
-	CLNT_CONTROL(nlm_lockd, CLSET_TIMEOUT, &zero);
-
-	return (nlm_lockd);
-}
-
 /*
  * Kernel module glue
  */
diff --git a/sys/nlm/nlm_prot_server.c b/sys/nlm/nlm_prot_server.c
index 320680a..fd6b449 100644
--- a/sys/nlm/nlm_prot_server.c
+++ b/sys/nlm/nlm_prot_server.c
@@ -232,7 +232,6 @@ nlm_test_msg_1_svc(struct nlm_testargs *argp, void *result, struct svc_req *rqst
 	nlm4_testargs args4;
 	nlm4_testres res4;
 	nlm_testres res;
-	struct nlm_host *host;
 	CLIENT *rpc;
 	char dummy;
 
@@ -240,7 +239,8 @@ nlm_test_msg_1_svc(struct nlm_testargs *argp, void *result, struct svc_req *rqst
 	args4.exclusive = argp->exclusive;
 	nlm_convert_to_nlm4_lock(&args4.alock, &argp->alock);
 
-	host = nlm_do_test(&args4, &res4, rqstp);
+	if (nlm_do_test(&args4, &res4, rqstp, &rpc))
+		return (FALSE);
 
 	res.cookie = res4.cookie;
 	res.stat.stat = nlm_convert_to_nlm_stats(res4.stat.stat);
@@ -249,9 +249,10 @@ nlm_test_msg_1_svc(struct nlm_testargs *argp, void *result, struct svc_req *rqst
 			&res.stat.nlm_testrply_u.holder,
 			&res4.stat.nlm4_testrply_u.holder);
 
-	rpc = nlm_host_get_rpc(host);
-	if (rpc)
-		nlm_test_res_1(&res, &dummy, rpc);
+	if (rpc) {
+		nlm_test_res_1(&res, &dummy, rpc, NULL, nlm_zero_tv);
+		CLNT_RELEASE(rpc);
+	}
 	xdr_free((xdrproc_t) xdr_nlm_testres, &res);
 
 	return (FALSE);
@@ -263,7 +264,6 @@ nlm_lock_msg_1_svc(struct nlm_lockargs *argp, void *result, struct svc_req *rqst
 	nlm4_lockargs args4;
 	nlm4_res res4;
 	nlm_res res;
-	struct nlm_host *host;
 	CLIENT *rpc;
 	char dummy;
 
@@ -274,13 +274,15 @@ nlm_lock_msg_1_svc(struct nlm_lockargs *argp, void *result, struct svc_req *rqst
 	args4.reclaim = argp->reclaim;
 	args4.state = argp->state;
 
-	host = nlm_do_lock(&args4, &res4, rqstp, TRUE);
+	if (nlm_do_lock(&args4, &res4, rqstp, TRUE, &rpc))
+		return (FALSE);
 
 	nlm_convert_to_nlm_res(&res, &res4);
 
-	rpc = nlm_host_get_rpc(host);
-	if (rpc)
-		nlm_lock_res_1(&res, &dummy, rpc);
+	if (rpc) {
+		nlm_lock_res_1(&res, &dummy, rpc, NULL, nlm_zero_tv);
+		CLNT_RELEASE(rpc);
+	}
 	xdr_free((xdrproc_t) xdr_nlm_res, &res);
 
 	return (FALSE);
@@ -292,7 +294,6 @@ nlm_cancel_msg_1_svc(struct nlm_cancargs *argp, void *result, struct svc_req *rq
 	nlm4_cancargs args4;
 	nlm4_res res4;
 	nlm_res res;
-	struct nlm_host *host;
 	CLIENT *rpc;
 	char dummy;
 
@@ -301,13 +302,15 @@ nlm_cancel_msg_1_svc(struct nlm_cancargs *argp, void *result, struct svc_req *rq
 	args4.exclusive = argp->exclusive;
 	nlm_convert_to_nlm4_lock(&args4.alock, &argp->alock);
 
-	host = nlm_do_cancel(&args4, &res4, rqstp);
+	if (nlm_do_cancel(&args4, &res4, rqstp, &rpc))
+		return (FALSE);
 
 	nlm_convert_to_nlm_res(&res, &res4);
 
-	rpc = nlm_host_get_rpc(host);
-	if (rpc)
-		nlm_cancel_res_1(&res, &dummy, rpc);
+	if (rpc) {
+		nlm_cancel_res_1(&res, &dummy, rpc, NULL, nlm_zero_tv);
+		CLNT_RELEASE(rpc);
+	}
 	xdr_free((xdrproc_t) xdr_nlm_res, &res);
 
 	return (FALSE);
@@ -319,20 +322,21 @@ nlm_unlock_msg_1_svc(struct nlm_unlockargs *argp, void *result, struct svc_req *
 	nlm4_unlockargs args4;
 	nlm4_res res4;
 	nlm_res res;
-	struct nlm_host *host;
 	CLIENT *rpc;
 	char dummy;
 
 	args4.cookie = argp->cookie;
 	nlm_convert_to_nlm4_lock(&args4.alock, &argp->alock);
 
-	host = nlm_do_unlock(&args4, &res4, rqstp);
+	if (nlm_do_unlock(&args4, &res4, rqstp, &rpc))
+		return (FALSE);
 
 	nlm_convert_to_nlm_res(&res, &res4);
 
-	rpc = nlm_host_get_rpc(host);
-	if (rpc)
-		nlm_unlock_res_1(&res, &dummy, rpc);
+	if (rpc) {
+		nlm_unlock_res_1(&res, &dummy, rpc, NULL, nlm_zero_tv);
+		CLNT_RELEASE(rpc);
+	}
 	xdr_free((xdrproc_t) xdr_nlm_res, &res);
 
 	return (FALSE);
@@ -344,7 +348,6 @@ nlm_granted_msg_1_svc(struct nlm_testargs *argp, void *result, struct svc_req *r
 	nlm4_testargs args4;
 	nlm4_res res4;
 	nlm_res res;
-	struct nlm_host *host;
 	CLIENT *rpc;
 	char dummy;
 
@@ -352,20 +355,15 @@ nlm_granted_msg_1_svc(struct nlm_testargs *argp, void *result, struct svc_req *r
 	args4.exclusive = argp->exclusive;
 	nlm_convert_to_nlm4_lock(&args4.alock, &argp->alock);
 
-	/*
-	 * We make a synchronous call to userland and send the reply
-	 * back async.
-	 */
-	nlm4_granted_4_svc(&args4, &res4, rqstp);
+	if (nlm_do_granted(&args4, &res4, rqstp, &rpc))
+		return (FALSE);
 
 	nlm_convert_to_nlm_res(&res, &res4);
 
-	host = nlm_find_host_by_addr(
-		(struct sockaddr *) rqstp->rq_xprt->xp_rtaddr.buf,
-		rqstp->rq_vers);
-	rpc = nlm_host_get_rpc(host);
-	if (rpc)
-		nlm_granted_res_1(&res, &dummy, rpc);
+	if (rpc) {
+		nlm_granted_res_1(&res, &dummy, rpc, NULL, nlm_zero_tv);
+		CLNT_RELEASE(rpc);
+	}
 	xdr_free((xdrproc_t) xdr_nlm_res, &res);
 
 	return (FALSE);
@@ -515,7 +513,7 @@ bool_t
 nlm4_test_4_svc(nlm4_testargs *argp, nlm4_testres *result, struct svc_req *rqstp)
 {
 
-	nlm_do_test(argp, result, rqstp);
+	nlm_do_test(argp, result, rqstp, NULL);
 	return (TRUE);
 }
 
@@ -523,7 +521,7 @@ bool_t
 nlm4_lock_4_svc(nlm4_lockargs *argp, nlm4_res *result, struct svc_req *rqstp)
 {
 
-	nlm_do_lock(argp, result, rqstp, TRUE);
+	nlm_do_lock(argp, result, rqstp, TRUE, NULL);
 	return (TRUE);
 }
 
@@ -531,7 +529,7 @@ bool_t
 nlm4_cancel_4_svc(nlm4_cancargs *argp, nlm4_res *result, struct svc_req *rqstp)
 {
 
-	nlm_do_cancel(argp, result, rqstp);
+	nlm_do_cancel(argp, result, rqstp, NULL);
 	return (TRUE);
 }
 
@@ -539,35 +537,15 @@ bool_t
 nlm4_unlock_4_svc(nlm4_unlockargs *argp, nlm4_res *result, struct svc_req *rqstp)
 {
 
-	nlm_do_unlock(argp, result, rqstp);
+	nlm_do_unlock(argp, result, rqstp, NULL);
 	return (TRUE);
 }
 
 bool_t
 nlm4_granted_4_svc(nlm4_testargs *argp, nlm4_res *result, struct svc_req *rqstp)
 {
-	CLIENT* lockd;
-	struct timeval tv;
-
-	memset(result, 0, sizeof(*result));
-	nlm_copy_netobj(&result->cookie, &argp->cookie, M_RPC);
-
-	/*
-	 * Set a non-zero timeout to give the userland a chance to reply.
-	 */
-	lockd = nlm_user_lockd();
-	if (!lockd) {
-		result->stat.stat = nlm4_failed;
-		return (TRUE);
-	}
-	tv.tv_sec = 20;
-	tv.tv_usec = 0;
-	CLNT_CONTROL(lockd, CLSET_TIMEOUT, &tv);
-	nlm4_granted_4(argp, result, lockd);
-	tv.tv_sec = 0;
-	tv.tv_usec = 0;
-	CLNT_CONTROL(lockd, CLSET_TIMEOUT, &tv);
 
+	nlm_do_granted(argp, result, rqstp, NULL);
 	return (TRUE);
 }
 
@@ -575,14 +553,15 @@ bool_t
 nlm4_test_msg_4_svc(nlm4_testargs *argp, void *result, struct svc_req *rqstp)
 {
 	nlm4_testres res4;
-	struct nlm_host *host;
 	CLIENT *rpc;
 	char dummy;
 
-	host = nlm_do_test(argp, &res4, rqstp);
-	rpc = nlm_host_get_rpc(host);
-	if (rpc)
-		nlm4_test_res_4(&res4, &dummy, rpc);
+	if (nlm_do_test(argp, &res4, rqstp, &rpc))
+		return (FALSE);
+	if (rpc) {
+		nlm4_test_res_4(&res4, &dummy, rpc, NULL, nlm_zero_tv);
+		CLNT_RELEASE(rpc);
+	}
 	xdr_free((xdrproc_t) xdr_nlm4_testres, &res4);
 
 	return (FALSE);
@@ -592,14 +571,15 @@ bool_t
 nlm4_lock_msg_4_svc(nlm4_lockargs *argp, void *result, struct svc_req *rqstp)
 {
 	nlm4_res res4;
-	struct nlm_host *host;
 	CLIENT *rpc;
 	char dummy;
 
-	host = nlm_do_lock(argp, &res4, rqstp, TRUE);
-	rpc = nlm_host_get_rpc(host);
-	if (rpc)
-		nlm4_lock_res_4(&res4, &dummy, rpc);
+	if (nlm_do_lock(argp, &res4, rqstp, TRUE, &rpc))
+		return (FALSE);
+	if (rpc) {
+		nlm4_lock_res_4(&res4, &dummy, rpc, NULL, nlm_zero_tv);
+		CLNT_RELEASE(rpc);
+	}
 	xdr_free((xdrproc_t) xdr_nlm4_res, &res4);
 
 	return (FALSE);
@@ -609,14 +589,15 @@ bool_t
 nlm4_cancel_msg_4_svc(nlm4_cancargs *argp, void *result, struct svc_req *rqstp)
 {
 	nlm4_res res4;
-	struct nlm_host *host;
 	CLIENT *rpc;
 	char dummy;
 
-	host = nlm_do_cancel(argp, &res4, rqstp);
-	rpc = nlm_host_get_rpc(host);
-	if (rpc)
-		nlm4_cancel_res_4(&res4, &dummy, rpc);
+	if (nlm_do_cancel(argp, &res4, rqstp, &rpc))
+		return (FALSE);
+	if (rpc) {
+		nlm4_cancel_res_4(&res4, &dummy, rpc, NULL, nlm_zero_tv);
+		CLNT_RELEASE(rpc);
+	}
 	xdr_free((xdrproc_t) xdr_nlm4_res, &res4);
 
 	return (FALSE);
@@ -626,14 +607,14 @@ bool_t
 nlm4_unlock_msg_4_svc(nlm4_unlockargs *argp, void *result, struct svc_req *rqstp)
 {
 	nlm4_res res4;
-	struct nlm_host *host;
 	CLIENT *rpc;
 	char dummy;
 
-	host = nlm_do_unlock(argp, &res4, rqstp);
-	rpc = nlm_host_get_rpc(host);
-	if (rpc)
-		nlm4_unlock_res_4(&res4, &dummy, rpc);
+	if (nlm_do_unlock(argp, &res4, rqstp, &rpc))
+	if (rpc) {
+		nlm4_unlock_res_4(&res4, &dummy, rpc, NULL, nlm_zero_tv);
+		CLNT_RELEASE(rpc);
+	}
 	xdr_free((xdrproc_t) xdr_nlm4_res, &res4);
 
 	return (FALSE);
@@ -642,23 +623,16 @@ nlm4_unlock_msg_4_svc(nlm4_unlockargs *argp, void *result, struct svc_req *rqstp
 bool_t
 nlm4_granted_msg_4_svc(nlm4_testargs *argp, void *result, struct svc_req *rqstp)
 {
-	struct nlm_host *host;
-	CLIENT *rpc;
 	nlm4_res res4;
+	CLIENT *rpc;
 	char dummy;
 
-	/*
-	 * We make a synchronous call to userland and send the reply
-	 * back async.
-	 */
-	nlm4_granted_4_svc(argp, &res4, rqstp);
-
-	host = nlm_find_host_by_addr(
-		(struct sockaddr *) rqstp->rq_xprt->xp_rtaddr.buf,
-		rqstp->rq_vers);
-	rpc = nlm_host_get_rpc(host);
-	if (rpc)
-		nlm4_granted_res_4(&res4, &dummy, rpc);
+	if (nlm_do_granted(argp, &res4, rqstp, &rpc))
+		return (FALSE);
+	if (rpc) {
+		nlm4_granted_res_4(&res4, &dummy, rpc, NULL, nlm_zero_tv);
+		CLNT_RELEASE(rpc);
+	}
 	xdr_free((xdrproc_t) xdr_nlm4_res, &res4);
 
 	return (FALSE);
@@ -667,11 +641,6 @@ nlm4_granted_msg_4_svc(nlm4_testargs *argp, void *result, struct svc_req *rqstp)
 bool_t
 nlm4_test_res_4_svc(nlm4_testres *argp, void *result, struct svc_req *rqstp)
 {
-	CLIENT* lockd;
-
-	lockd = nlm_user_lockd();
-	if (lockd)
-		nlm4_test_res_4(argp, result, lockd);
 
 	return (FALSE);
 }
@@ -679,11 +648,6 @@ nlm4_test_res_4_svc(nlm4_testres *argp, void *result, struct svc_req *rqstp)
 bool_t
 nlm4_lock_res_4_svc(nlm4_res *argp, void *result, struct svc_req *rqstp)
 {
-	CLIENT* lockd;
-
-	lockd = nlm_user_lockd();
-	if (lockd)
-		nlm4_lock_res_4(argp, result, lockd);
 
 	return (FALSE);
 }
@@ -691,11 +655,6 @@ nlm4_lock_res_4_svc(nlm4_res *argp, void *result, struct svc_req *rqstp)
 bool_t
 nlm4_cancel_res_4_svc(nlm4_res *argp, void *result, struct svc_req *rqstp)
 {
-	CLIENT* lockd;
-
-	lockd = nlm_user_lockd();
-	if (lockd)
-		nlm4_cancel_res_4(argp, result, lockd);
 
 	return (FALSE);
 }
@@ -703,11 +662,6 @@ nlm4_cancel_res_4_svc(nlm4_res *argp, void *result, struct svc_req *rqstp)
 bool_t
 nlm4_unlock_res_4_svc(nlm4_res *argp, void *result, struct svc_req *rqstp)
 {
-	CLIENT* lockd;
-
-	lockd = nlm_user_lockd();
-	if (lockd)
-		nlm4_unlock_res_4(argp, result, lockd);
 
 	return (FALSE);
 }
@@ -741,7 +695,7 @@ bool_t
 nlm4_nm_lock_4_svc(nlm4_lockargs *argp, nlm4_res *result, struct svc_req *rqstp)
 {
 
-	nlm_do_lock(argp, result, rqstp, FALSE);
+	nlm_do_lock(argp, result, rqstp, FALSE, NULL);
 	return (TRUE);
 }
 
diff --git a/sys/rpc/auth_unix.c b/sys/rpc/auth_unix.c
index a2f5fd2..757d2dd 100644
--- a/sys/rpc/auth_unix.c
+++ b/sys/rpc/auth_unix.c
@@ -50,9 +50,11 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/hash.h>
+#include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
-#include <sys/mutex.h>
+#include <sys/sx.h>
 #include <sys/ucred.h>
 
 #include <rpc/types.h>
@@ -81,14 +83,39 @@ static struct auth_ops authunix_ops = {
  * This struct is pointed to by the ah_private field of an auth_handle.
  */
 struct audata {
+	TAILQ_ENTRY(audata)	au_link;
+	TAILQ_ENTRY(audata)	au_alllink;
+	int			au_refs;
+	struct xucred		au_xcred;
 	struct opaque_auth	au_origcred;	/* original credentials */
 	struct opaque_auth	au_shcred;	/* short hand cred */
 	u_long			au_shfaults;	/* short hand cache faults */
 	char			au_marshed[MAX_AUTH_BYTES];
 	u_int			au_mpos;	/* xdr pos at end of marshed */
+	AUTH			*au_auth;	/* link back to AUTH */
 };
+TAILQ_HEAD(audata_list, audata);
 #define	AUTH_PRIVATE(auth)	((struct audata *)auth->ah_private)
 
+#define AUTH_UNIX_HASH_SIZE	16
+#define AUTH_UNIX_MAX		256
+static struct audata_list auth_unix_cache[AUTH_UNIX_HASH_SIZE];
+static struct audata_list auth_unix_all;
+static struct sx auth_unix_lock;
+static int auth_unix_count;
+
+static void
+authunix_init(void *dummy)
+{
+	int i;
+
+	for (i = 0; i < AUTH_UNIX_HASH_SIZE; i++)
+		TAILQ_INIT(&auth_unix_cache[i]);
+	TAILQ_INIT(&auth_unix_all);
+	sx_init(&auth_unix_lock, "auth_unix_lock");
+}
+SYSINIT(authunix_init, SI_SUB_KMEM, SI_ORDER_ANY, authunix_init, NULL);
+
 /*
  * Create a unix style authenticator.
  * Returns an auth handle with the given stuff in it.
@@ -96,38 +123,70 @@ struct audata {
 AUTH *
 authunix_create(struct ucred *cred)
 {
+	uint32_t h, th;
 	struct xucred xcr;
 	char mymem[MAX_AUTH_BYTES];
 	XDR xdrs;
 	AUTH *auth;
-	struct audata *au;
+	struct audata *au, *tau;
 	struct timeval now;
 	uint32_t time;
 	int len;
 
+	if (auth_unix_count > AUTH_UNIX_MAX) {
+		while (auth_unix_count > AUTH_UNIX_MAX) {
+			sx_xlock(&auth_unix_lock);
+			tau = TAILQ_FIRST(&auth_unix_all);
+			th = HASHSTEP(HASHINIT, tau->au_xcred.cr_uid)
+				% AUTH_UNIX_HASH_SIZE;
+			TAILQ_REMOVE(&auth_unix_cache[th], tau, au_link);
+			TAILQ_REMOVE(&auth_unix_all, tau, au_alllink);
+			auth_unix_count--;
+			sx_xunlock(&auth_unix_lock);
+			AUTH_DESTROY(tau->au_auth);
+		}
+	}
+
+	/*
+	 * Hash the uid to see if we already have an AUTH with this cred.
+	 */
+	h = HASHSTEP(HASHINIT, cred->cr_uid) % AUTH_UNIX_HASH_SIZE;
+	cru2x(cred, &xcr);
+again:
+	sx_slock(&auth_unix_lock);
+	TAILQ_FOREACH(au, &auth_unix_cache[h], au_link) {
+		if (!memcmp(&xcr, &au->au_xcred, sizeof(xcr))) {
+			if (sx_try_upgrade(&auth_unix_lock)) {
+				/*
+				 * Keep auth_unix_all LRU sorted.
+				 */
+				TAILQ_REMOVE(&auth_unix_all, au, au_alllink);
+				TAILQ_INSERT_TAIL(&auth_unix_all, au,
+				    au_alllink);
+				au->au_refs++;
+				sx_xunlock(&auth_unix_lock);
+				return (au->au_auth);
+			} else {
+				sx_sunlock(&auth_unix_lock);
+				goto again;
+			}
+		}
+	}
+
 	/*
 	 * Allocate and set up auth handle
 	 */
 	au = NULL;
 	auth = mem_alloc(sizeof(*auth));
-#ifndef _KERNEL
-	if (auth == NULL) {
-		printf("authunix_create: out of memory");
-		goto cleanup_authunix_create;
-	}
-#endif
 	au = mem_alloc(sizeof(*au));
-#ifndef _KERNEL
-	if (au == NULL) {
-		printf("authunix_create: out of memory");
-		goto cleanup_authunix_create;
-	}
-#endif
 	auth->ah_ops = &authunix_ops;
 	auth->ah_private = (caddr_t)au;
 	auth->ah_verf = au->au_shcred = _null_auth;
+	au->au_refs = 1;
+	au->au_xcred = xcr;
 	au->au_shfaults = 0;
 	au->au_origcred.oa_base = NULL;
+	au->au_auth = auth;
 
 	getmicrotime(&now);
 	time = now.tv_sec;
@@ -141,14 +200,7 @@ authunix_create(struct ucred *cred)
 		panic("authunix_create: failed to encode creds");
 	au->au_origcred.oa_length = len = XDR_GETPOS(&xdrs);
 	au->au_origcred.oa_flavor = AUTH_UNIX;
-#ifdef _KERNEL
 	au->au_origcred.oa_base = mem_alloc((u_int) len);
-#else
-	if ((au->au_origcred.oa_base = mem_alloc((u_int) len)) == NULL) {
-		printf("authunix_create: out of memory");
-		goto cleanup_authunix_create;
-	}
-#endif
 	memcpy(au->au_origcred.oa_base, mymem, (size_t)len);
 
 	/*
@@ -156,18 +208,19 @@ authunix_create(struct ucred *cred)
 	 */
 	auth->ah_cred = au->au_origcred;
 	marshal_new_auth(auth);
-	return (auth);
-#ifndef _KERNEL
- cleanup_authunix_create:
-	if (auth)
-		mem_free(auth, sizeof(*auth));
-	if (au) {
-		if (au->au_origcred.oa_base)
-			mem_free(au->au_origcred.oa_base, (u_int)len);
-		mem_free(au, sizeof(*au));
+
+	if (sx_try_upgrade(&auth_unix_lock)) {
+		auth_unix_count++;
+		TAILQ_INSERT_TAIL(&auth_unix_cache[h], au, au_link);
+		TAILQ_INSERT_TAIL(&auth_unix_all, au, au_alllink);
+		au->au_refs++;	/* one for the cache, one for user */
+		sx_xunlock(&auth_unix_lock);
+		return (auth);
+	} else {
+		sx_sunlock(&auth_unix_lock);
+		AUTH_DESTROY(auth);
+		goto again;
 	}
-	return (NULL);
-#endif
 }
 
 /*
@@ -262,8 +315,18 @@ static void
 authunix_destroy(AUTH *auth)
 {
 	struct audata *au;
+	int refs;
 
 	au = AUTH_PRIVATE(auth);
+
+	sx_xlock(&auth_unix_lock);
+	au->au_refs--;
+	refs = au->au_refs;
+	sx_xunlock(&auth_unix_lock);
+
+	if (refs > 0)
+		return;
+
 	mem_free(au->au_origcred.oa_base, au->au_origcred.oa_length);
 
 	if (au->au_shcred.oa_base != NULL)
diff --git a/sys/rpc/authunix_prot.c b/sys/rpc/authunix_prot.c
index 3092b1f..141f594 100644
--- a/sys/rpc/authunix_prot.c
+++ b/sys/rpc/authunix_prot.c
@@ -68,7 +68,12 @@ xdr_authunix_parms(XDR *xdrs, uint32_t *time, struct xucred *cred)
 	uint32_t junk;
 
 	if (xdrs->x_op == XDR_ENCODE) {
+		/*
+		 * Restrict name length to 255 according to RFC 1057.
+		 */
 		namelen = strlen(hostname);
+		if (namelen > 255)
+			namelen = 255;
 	} else {
 		namelen = 0;
 	}
diff --git a/sys/rpc/clnt.h b/sys/rpc/clnt.h
index 4d6a778..03e3112 100644
--- a/sys/rpc/clnt.h
+++ b/sys/rpc/clnt.h
@@ -62,6 +62,7 @@
 #include <rpc/clnt_stat.h>
 #include <sys/cdefs.h>
 #ifdef _KERNEL
+#include <sys/refcount.h>
 #include <rpc/netconfig.h>
 #else
 #include <netconfig.h>
@@ -109,6 +110,23 @@ struct rpc_err {
 #define	re_lb		ru.RE_lb
 };
 
+#ifdef _KERNEL
+/*
+ * Functions of this type may be used to receive notification when RPC
+ * calls have to be re-transmitted etc.
+ */
+typedef void rpc_feedback(int cmd, int procnum, void *);
+
+/*
+ * A structure used with CLNT_CALL_EXT to pass extra information used
+ * while processing an RPC call.
+ */
+struct rpc_callextra {
+	AUTH		*rc_auth;	/* auth handle to use for this call */
+	rpc_feedback	*rc_feedback;	/* callback for retransmits etc. */
+	void		*rc_feedback_arg; /* argument for callback */
+};
+#endif
 
 /*
  * Client rpc handle.
@@ -116,12 +134,35 @@ struct rpc_err {
  * Client is responsible for initializing auth, see e.g. auth_none.c.
  */
 typedef struct __rpc_client {
+#ifdef _KERNEL
+	volatile u_int cl_refs;			/* reference count */
+	AUTH	*cl_auth;			/* authenticator */
+	struct clnt_ops {
+		/* call remote procedure */
+		enum clnt_stat	(*cl_call)(struct __rpc_client *,
+		    struct rpc_callextra *, rpcproc_t, xdrproc_t, void *,
+		    xdrproc_t, void *, struct timeval);
+		/* abort a call */
+		void		(*cl_abort)(struct __rpc_client *);
+		/* get specific error code */
+		void		(*cl_geterr)(struct __rpc_client *,
+					struct rpc_err *);
+		/* frees results */
+		bool_t		(*cl_freeres)(struct __rpc_client *,
+					xdrproc_t, void *);
+		/* destroy this structure */
+		void		(*cl_destroy)(struct __rpc_client *);
+		/* the ioctl() of rpc */
+		bool_t          (*cl_control)(struct __rpc_client *, u_int,
+				    void *);
+	} *cl_ops;
+#else
 	AUTH	*cl_auth;			/* authenticator */
 	struct clnt_ops {
 		/* call remote procedure */
 		enum clnt_stat	(*cl_call)(struct __rpc_client *,
-				    rpcproc_t, xdrproc_t, void *, xdrproc_t,
-				        void *, struct timeval);
+		    rpcproc_t, xdrproc_t, void *, xdrproc_t,
+		    void *, struct timeval);
 		/* abort a call */
 		void		(*cl_abort)(struct __rpc_client *);
 		/* get specific error code */
@@ -136,12 +177,12 @@ typedef struct __rpc_client {
 		bool_t          (*cl_control)(struct __rpc_client *, u_int,
 				    void *);
 	} *cl_ops;
+#endif
 	void 			*cl_private;	/* private stuff */
 	char			*cl_netid;	/* network token */
 	char			*cl_tp;		/* device name */
 } CLIENT;
 
-
 /*
  * Timers used for the pseudo-transport protocol when using datagrams
  */
@@ -154,8 +195,10 @@ struct rpc_timers {
 /*      
  * Feedback values used for possible congestion and rate control
  */
-#define FEEDBACK_REXMIT1	1	/* first retransmit */
-#define FEEDBACK_OK		2	/* no retransmits */    
+#define FEEDBACK_OK		1	/* no retransmits */    
+#define FEEDBACK_REXMIT1	2	/* first retransmit */
+#define FEEDBACK_REXMIT2	3	/* second and subsequent retransmit */
+#define FEEDBACK_RECONNECT	4	/* client reconnect */
 
 /* Used to set version of portmapper used in broadcast */
   
@@ -171,6 +214,30 @@ struct rpc_timers {
  *
  */
 
+#ifdef _KERNEL
+#define CLNT_ACQUIRE(rh)			\
+	refcount_acquire(&(rh)->cl_refs)
+#define CLNT_RELEASE(rh)			\
+	if (refcount_release(&(rh)->cl_refs))	\
+		CLNT_DESTROY(rh)
+
+/*
+ * enum clnt_stat
+ * CLNT_CALL_EXT(rh, ext, proc, xargs, argsp, xres, resp, timeout)
+ * 	CLIENT *rh;
+ *	struct rpc_callextra *ext;
+ *	rpcproc_t proc;
+ *	xdrproc_t xargs;
+ *	void *argsp;
+ *	xdrproc_t xres;
+ *	void *resp;
+ *	struct timeval timeout;
+ */
+#define	CLNT_CALL_EXT(rh, ext, proc, xargs, argsp, xres, resp, secs)	\
+	((*(rh)->cl_ops->cl_call)(rh, ext, proc, xargs,		\
+		argsp, xres, resp, secs))
+#endif
+
 /*
  * enum clnt_stat
  * CLNT_CALL(rh, proc, xargs, argsp, xres, resp, timeout)
@@ -182,12 +249,21 @@ struct rpc_timers {
  *	void *resp;
  *	struct timeval timeout;
  */
-#define	CLNT_CALL(rh, proc, xargs, argsp, xres, resp, secs) \
-	((*(rh)->cl_ops->cl_call)(rh, proc, xargs, \
+#ifdef _KERNEL
+#define	CLNT_CALL(rh, proc, xargs, argsp, xres, resp, secs)		\
+	((*(rh)->cl_ops->cl_call)(rh, NULL, proc, xargs,	\
 		argsp, xres, resp, secs))
-#define	clnt_call(rh, proc, xargs, argsp, xres, resp, secs) \
-	((*(rh)->cl_ops->cl_call)(rh, proc, xargs, \
+#define	clnt_call(rh, proc, xargs, argsp, xres, resp, secs)		\
+	((*(rh)->cl_ops->cl_call)(rh, NULL, proc, xargs,	\
 		argsp, xres, resp, secs))
+#else
+#define	CLNT_CALL(rh, proc, xargs, argsp, xres, resp, secs)		\
+	((*(rh)->cl_ops->cl_call)(rh, proc, xargs,	\
+		argsp, xres, resp, secs))
+#define	clnt_call(rh, proc, xargs, argsp, xres, resp, secs)		\
+	((*(rh)->cl_ops->cl_call)(rh, proc, xargs,	\
+		argsp, xres, resp, secs))
+#endif
 
 /*
  * void
@@ -262,6 +338,8 @@ struct rpc_timers {
 #define CLGET_WAITCHAN		22	/* get string used in msleep call */
 #define CLSET_INTERRUPTIBLE	23	/* set interruptible flag */
 #define CLGET_INTERRUPTIBLE	24	/* set interruptible flag */
+#define CLSET_RETRIES		25	/* set retry count for reconnect */
+#define CLGET_RETRIES		26	/* get retry count for reconnect */
 #endif
 
 
@@ -534,6 +612,7 @@ __END_DECLS
 #define rpc_createerr		(*(__rpc_createerr()))
 #endif
 
+#ifndef _KERNEL
 /*
  * The simplified interface:
  * enum clnt_stat
@@ -612,7 +691,6 @@ extern enum clnt_stat rpc_broadcast_exp(const rpcprog_t, const rpcvers_t,
 					const int, const char *);
 __END_DECLS
 
-#ifndef _KERNEL
 /* For backward compatibility */
 #include <rpc/clnt_soc.h>
 #endif
diff --git a/sys/rpc/clnt_dg.c b/sys/rpc/clnt_dg.c
index c66ac50..f14e1d6 100644
--- a/sys/rpc/clnt_dg.c
+++ b/sys/rpc/clnt_dg.c
@@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
@@ -70,8 +71,8 @@ __FBSDID("$FreeBSD$");
 #endif
 
 static bool_t time_not_ok(struct timeval *);
-static enum clnt_stat clnt_dg_call(CLIENT *, rpcproc_t, xdrproc_t, void *,
-	    xdrproc_t, void *, struct timeval);
+static enum clnt_stat clnt_dg_call(CLIENT *, struct rpc_callextra *,
+    rpcproc_t, xdrproc_t, void *, xdrproc_t, void *, struct timeval);
 static void clnt_dg_geterr(CLIENT *, struct rpc_err *);
 static bool_t clnt_dg_freeres(CLIENT *, xdrproc_t, void *);
 static void clnt_dg_abort(CLIENT *);
@@ -91,10 +92,13 @@ static struct clnt_ops clnt_dg_ops = {
 static const char mem_err_clnt_dg[] = "clnt_dg_create: out of memory";
 
 /*
- * A pending RPC request which awaits a reply.
+ * A pending RPC request which awaits a reply. Requests which have
+ * received their reply will have cr_xid set to zero and cr_mrep to
+ * the mbuf chain of the reply.
  */
 struct cu_request {
 	TAILQ_ENTRY(cu_request) cr_link;
+	CLIENT			*cr_client;	/* owner */
 	uint32_t		cr_xid;		/* XID of request */
 	struct mbuf		*cr_mrep;	/* reply received by upcall */
 	int			cr_error;	/* any error from upcall */
@@ -123,6 +127,8 @@ struct cu_socket {
  * Private data kept per client handle
  */
 struct cu_data {
+	int			cu_threads;	/* # threads in clnt_vc_call */
+	bool_t			cu_closing;	/* TRUE if we are destroying */
 	struct socket		*cu_socket;	/* connection socket */
 	bool_t			cu_closeit;	/* opened by library */
 	struct sockaddr_storage	cu_raddr;	/* remote address */
@@ -203,10 +209,12 @@ clnt_dg_create(
 	sendsz = ((sendsz + 3) / 4) * 4;
 	recvsz = ((recvsz + 3) / 4) * 4;
 	cu = mem_alloc(sizeof (*cu));
+	cu->cu_threads = 0;
+	cu->cu_closing = FALSE;
 	(void) memcpy(&cu->cu_raddr, svcaddr, (size_t)svcaddr->sa_len);
 	cu->cu_rlen = svcaddr->sa_len;
 	/* Other values can also be set through clnt_control() */
-	cu->cu_wait.tv_sec = 15;	/* heuristically chosen */
+	cu->cu_wait.tv_sec = 3;	/* heuristically chosen */
 	cu->cu_wait.tv_usec = 0;
 	cu->cu_total.tv_sec = -1;
 	cu->cu_total.tv_usec = -1;
@@ -237,6 +245,7 @@ clnt_dg_create(
 	 */
 	cu->cu_closeit = FALSE;
 	cu->cu_socket = so;
+	soreserve(so, 256*1024, 256*1024);
 
 	SOCKBUF_LOCK(&so->so_rcv);
 recheck_socket:
@@ -274,6 +283,7 @@ recheck_socket:
 	}
 	SOCKBUF_UNLOCK(&so->so_rcv);
 
+	cl->cl_refs = 1;
 	cl->cl_ops = &clnt_dg_ops;
 	cl->cl_private = (caddr_t)(void *)cu;
 	cl->cl_auth = authnone_create();
@@ -291,7 +301,8 @@ err2:
 
 static enum clnt_stat
 clnt_dg_call(
-	CLIENT	*cl,			/* client handle */
+	CLIENT		*cl,		/* client handle */
+	struct rpc_callextra *ext,	/* call metadata */
 	rpcproc_t	proc,		/* procedure number */
 	xdrproc_t	xargs,		/* xdr routine for args */
 	void		*argsp,		/* pointer to args */
@@ -301,30 +312,52 @@ clnt_dg_call(
 {
 	struct cu_data *cu = (struct cu_data *)cl->cl_private;
 	struct cu_socket *cs = (struct cu_socket *) cu->cu_socket->so_upcallarg;
+	AUTH *auth;
 	XDR xdrs;
 	struct rpc_msg reply_msg;
 	bool_t ok;
+	int retrans;			/* number of re-transmits so far */
 	int nrefreshes = 2;		/* number of times to refresh cred */
-	struct timeval timeout;
-	struct timeval retransmit_time;
-	struct timeval next_sendtime, starttime, time_waited, tv;
+	struct timeval *tvp;
+	int timeout;
+	int retransmit_time;
+	int next_sendtime, starttime, time_waited, tv;
 	struct sockaddr *sa;
 	socklen_t salen;
 	uint32_t xid;
 	struct mbuf *mreq = NULL;
-	struct cu_request cr;
+	struct cu_request *cr;
 	int error;
 
+	cr = malloc(sizeof(struct cu_request), M_RPC, M_WAITOK);
+
 	mtx_lock(&cs->cs_lock);
 
-	cr.cr_mrep = NULL;
-	cr.cr_error = 0;
+	if (cu->cu_closing) {
+		mtx_unlock(&cs->cs_lock);
+		free(cr, M_RPC);
+		return (RPC_CANTSEND);
+	}
+	cu->cu_threads++;
+
+	if (ext)
+		auth = ext->rc_auth;
+	else
+		auth = cl->cl_auth;
+
+	cr->cr_client = cl;
+	cr->cr_mrep = NULL;
+	cr->cr_error = 0;
 
 	if (cu->cu_total.tv_usec == -1) {
-		timeout = utimeout;	/* use supplied timeout */
+		tvp = &utimeout; /* use supplied timeout */
 	} else {
-		timeout = cu->cu_total;	/* use default timeout */
+		tvp = &cu->cu_total; /* use default timeout */
 	}
+	if (tvp->tv_sec || tvp->tv_usec)
+		timeout = tvtohz(tvp);
+	else
+		timeout = 0;
 
 	if (cu->cu_connect && !cu->cu_connected) {
 		mtx_unlock(&cs->cs_lock);
@@ -345,11 +378,11 @@ clnt_dg_call(
 		sa = (struct sockaddr *)&cu->cu_raddr;
 		salen = cu->cu_rlen;
 	}
-	time_waited.tv_sec = 0;
-	time_waited.tv_usec = 0;
-	retransmit_time = next_sendtime = cu->cu_wait;
+	time_waited = 0;
+	retrans = 0;
+	retransmit_time = next_sendtime = tvtohz(&cu->cu_wait);
 
-	getmicrotime(&starttime);
+	starttime = ticks;
 
 call_again:
 	mtx_assert(&cs->cs_lock, MA_OWNED);
@@ -376,7 +409,7 @@ send_again:
 		goto get_reply;
 
 	if ((! XDR_PUTINT32(&xdrs, &proc)) ||
-	    (! AUTH_MARSHALL(cl->cl_auth, &xdrs)) ||
+	    (! AUTH_MARSHALL(auth, &xdrs)) ||
 	    (! (*xargs)(&xdrs, argsp))) {
 		cu->cu_error.re_status = RPC_CANTENCODEARGS;
 		mtx_lock(&cs->cs_lock);
@@ -384,9 +417,9 @@ send_again:
 	}
 	m_fixhdr(mreq);
 
-	cr.cr_xid = xid;
+	cr->cr_xid = xid;
 	mtx_lock(&cs->cs_lock);
-	TAILQ_INSERT_TAIL(&cs->cs_pending, &cr, cr_link);
+	TAILQ_INSERT_TAIL(&cs->cs_pending, cr, cr_link);
 	mtx_unlock(&cs->cs_lock);
 
 	/*
@@ -406,8 +439,7 @@ send_again:
 
 	mtx_lock(&cs->cs_lock);
 	if (error) {
-		TAILQ_REMOVE(&cs->cs_pending, &cr, cr_link);
-
+		TAILQ_REMOVE(&cs->cs_pending, cr, cr_link);
 		cu->cu_error.re_errno = error;
 		cu->cu_error.re_status = RPC_CANTSEND;
 		goto out;
@@ -415,24 +447,24 @@ send_again:
 
 	/*
 	 * Check to see if we got an upcall while waiting for the
-	 * lock. In both these cases, the request has been removed
-	 * from cs->cs_pending.
+	 * lock.
 	 */
-	if (cr.cr_error) {
-		cu->cu_error.re_errno = cr.cr_error;
+	if (cr->cr_error) {
+		TAILQ_REMOVE(&cs->cs_pending, cr, cr_link);
+		cu->cu_error.re_errno = cr->cr_error;
 		cu->cu_error.re_status = RPC_CANTRECV;
 		goto out;
 	}
-	if (cr.cr_mrep) {
+	if (cr->cr_mrep) {
+		TAILQ_REMOVE(&cs->cs_pending, cr, cr_link);
 		goto got_reply;
 	}
 
 	/*
 	 * Hack to provide rpc-based message passing
 	 */
-	if (timeout.tv_sec == 0 && timeout.tv_usec == 0) {
-		if (cr.cr_xid)
-			TAILQ_REMOVE(&cs->cs_pending, &cr, cr_link);
+	if (timeout == 0) {
+		TAILQ_REMOVE(&cs->cs_pending, cr, cr_link);
 		cu->cu_error.re_status = RPC_TIMEDOUT;
 		goto out;
 	}
@@ -440,17 +472,23 @@ send_again:
 get_reply:
 	for (;;) {
 		/* Decide how long to wait. */
-		if (timevalcmp(&next_sendtime, &timeout, <)) {
+		if (next_sendtime < timeout)
 			tv = next_sendtime;
-		} else {
+		else
 			tv = timeout;
+		tv -= time_waited;
+
+		if (tv > 0) {
+			if (cu->cu_closing)
+				error = 0;
+			else
+				error = msleep(cr, &cs->cs_lock,
+				    cu->cu_waitflag, cu->cu_waitchan, tv);
+		} else {
+			error = EWOULDBLOCK;
 		}
-		timevalsub(&tv, &time_waited);
-		if (tv.tv_sec < 0 || tv.tv_usec < 0)
-			tv.tv_sec = tv.tv_usec = 0;
 
-		error = msleep(&cr, &cs->cs_lock, cu->cu_waitflag,
-		    cu->cu_waitchan, tvtohz(&tv));
+		TAILQ_REMOVE(&cs->cs_pending, cr, cr_link);
 
 		if (!error) {
 			/*
@@ -458,8 +496,8 @@ get_reply:
 			 * upcall had a receive error, report that,
 			 * otherwise we have a reply.
 			 */
-			if (cr.cr_error) {
-				cu->cu_error.re_errno = cr.cr_error;
+			if (cr->cr_error) {
+				cu->cu_error.re_errno = cr->cr_error;
 				cu->cu_error.re_status = RPC_CANTRECV;
 				goto out;
 			}
@@ -472,8 +510,6 @@ get_reply:
 		 * re-send the request.
 		 */
 		if (error != EWOULDBLOCK) {
-			if (cr.cr_xid)
-				TAILQ_REMOVE(&cs->cs_pending, &cr, cr_link);
 			cu->cu_error.re_errno = error;
 			if (error == EINTR)
 				cu->cu_error.re_status = RPC_INTR;
@@ -482,29 +518,40 @@ get_reply:
 			goto out;
 		}
 
-		getmicrotime(&tv);
-		time_waited = tv;
-		timevalsub(&time_waited, &starttime);
+		time_waited = ticks - starttime;
 
 		/* Check for timeout. */
-		if (timevalcmp(&time_waited, &timeout, >)) {
-			if (cr.cr_xid)
-				TAILQ_REMOVE(&cs->cs_pending, &cr, cr_link);
+		if (time_waited > timeout) {
 			cu->cu_error.re_errno = EWOULDBLOCK;
 			cu->cu_error.re_status = RPC_TIMEDOUT;
 			goto out;
 		}
 
 		/* Retransmit if necessary. */		
-		if (timevalcmp(&time_waited, &next_sendtime, >)) {
-			if (cr.cr_xid)
-				TAILQ_REMOVE(&cs->cs_pending, &cr, cr_link);
+		if (time_waited >= next_sendtime) {
+			if (ext && ext->rc_feedback) {
+				mtx_unlock(&cs->cs_lock);
+				if (retrans == 0)
+					ext->rc_feedback(FEEDBACK_REXMIT1,
+					    proc, ext->rc_feedback_arg);
+				else
+					ext->rc_feedback(FEEDBACK_REXMIT2,
+					    proc, ext->rc_feedback_arg);
+				mtx_lock(&cs->cs_lock);
+			}
+			if (cu->cu_closing) {
+				cu->cu_error.re_errno = ESHUTDOWN;
+				cu->cu_error.re_status = RPC_CANTRECV;
+				goto out;
+			}
+			retrans++;
 			/* update retransmit_time */
-			if (retransmit_time.tv_sec < RPC_MAX_BACKOFF)
-				timevaladd(&retransmit_time, &retransmit_time);
-			timevaladd(&next_sendtime, &retransmit_time);
+			if (retransmit_time < RPC_MAX_BACKOFF * hz)
+				retransmit_time = 2 * retransmit_time;
+			next_sendtime += retransmit_time;
 			goto send_again;
 		}
+		TAILQ_INSERT_TAIL(&cs->cs_pending, cr, cr_link);
 	}
 
 got_reply:
@@ -514,10 +561,13 @@ got_reply:
 	 */
 	mtx_unlock(&cs->cs_lock);
 
-	xdrmbuf_create(&xdrs, cr.cr_mrep, XDR_DECODE);
+	if (ext && ext->rc_feedback)
+		ext->rc_feedback(FEEDBACK_OK, proc, ext->rc_feedback_arg);
+
+	xdrmbuf_create(&xdrs, cr->cr_mrep, XDR_DECODE);
 	ok = xdr_replymsg(&xdrs, &reply_msg);
 	XDR_DESTROY(&xdrs);
-	cr.cr_mrep = NULL;
+	cr->cr_mrep = NULL;
 
 	mtx_lock(&cs->cs_lock);
 
@@ -562,10 +612,17 @@ out:
 
 	if (mreq)
 		m_freem(mreq);
-	if (cr.cr_mrep)
-		m_freem(cr.cr_mrep);
+	if (cr->cr_mrep)
+		m_freem(cr->cr_mrep);
 
+	cu->cu_threads--;
+	if (cu->cu_closing)
+		wakeup(cu);
+		
 	mtx_unlock(&cs->cs_lock);
+
+	free(cr, M_RPC);
+
 	return (cu->cu_error.re_status);
 }
 
@@ -732,30 +789,44 @@ clnt_dg_destroy(CLIENT *cl)
 {
 	struct cu_data *cu = (struct cu_data *)cl->cl_private;
 	struct cu_socket *cs = (struct cu_socket *) cu->cu_socket->so_upcallarg;
+	struct cu_request *cr;
 	struct socket *so = NULL;
 	bool_t lastsocketref;
 
-	SOCKBUF_LOCK(&cu->cu_socket->so_rcv);
-
 	mtx_lock(&cs->cs_lock);
+
+	/*
+	 * Abort any pending requests and wait until everyone
+	 * has finished with clnt_vc_call.
+	 */
+	cu->cu_closing = TRUE;
+	TAILQ_FOREACH(cr, &cs->cs_pending, cr_link) {
+		if (cr->cr_client == cl) {
+			cr->cr_xid = 0;
+			cr->cr_error = ESHUTDOWN;
+			wakeup(cr);
+		}
+	}
+
+	while (cu->cu_threads)
+		msleep(cu, &cs->cs_lock, 0, "rpcclose", 0);
+
 	cs->cs_refs--;
 	if (cs->cs_refs == 0) {
+		mtx_destroy(&cs->cs_lock);
+		SOCKBUF_LOCK(&cu->cu_socket->so_rcv);
 		cu->cu_socket->so_upcallarg = NULL;
 		cu->cu_socket->so_upcall = NULL;
 		cu->cu_socket->so_rcv.sb_flags &= ~SB_UPCALL;
-		mtx_destroy(&cs->cs_lock);
 		SOCKBUF_UNLOCK(&cu->cu_socket->so_rcv);
 		mem_free(cs, sizeof(*cs));
 		lastsocketref = TRUE;
 	} else {
 		mtx_unlock(&cs->cs_lock);
-		SOCKBUF_UNLOCK(&cu->cu_socket->so_rcv);
 		lastsocketref = FALSE;
 	}
 
-	if (cu->cu_closeit) {
-		KASSERT(lastsocketref, ("clnt_dg_destroy(): closing a socket "
-			"shared with other clients"));
+	if (cu->cu_closeit && lastsocketref) {
 		so = cu->cu_socket;
 		cu->cu_socket = NULL;
 	}
@@ -812,10 +883,10 @@ clnt_dg_soupcall(struct socket *so, void *arg, int waitflag)
 		if (error) {
 			mtx_lock(&cs->cs_lock);
 			TAILQ_FOREACH(cr, &cs->cs_pending, cr_link) {
+				cr->cr_xid = 0;
 				cr->cr_error = error;
 				wakeup(cr);
 			}
-			TAILQ_INIT(&cs->cs_pending);
 			mtx_unlock(&cs->cs_lock);
 			break;
 		}
@@ -825,7 +896,11 @@ clnt_dg_soupcall(struct socket *so, void *arg, int waitflag)
 		 */
 		m = m_pullup(m, sizeof(xid));
 		if (!m)
-			break;
+			/*
+			 * Should never happen.
+			 */
+			continue;
+
 		xid = ntohl(*mtod(m, uint32_t *));
 
 		/*
@@ -836,14 +911,13 @@ clnt_dg_soupcall(struct socket *so, void *arg, int waitflag)
 		TAILQ_FOREACH(cr, &cs->cs_pending, cr_link) {
 			if (cr->cr_xid == xid) {
 				/*
-				 * This one matches. We snip it out of
-				 * the pending list and leave the
+				 * This one matches. We leave the
 				 * reply mbuf in cr->cr_mrep. Set the
-				 * XID to zero so that clnt_dg_call
-				 * can know not to repeat the
-				 * TAILQ_REMOVE.
+				 * XID to zero so that we will ignore
+				 * any duplicated replies that arrive
+				 * before clnt_dg_call removes it from
+				 * the queue.
 				 */
-				TAILQ_REMOVE(&cs->cs_pending, cr, cr_link);
 				cr->cr_xid = 0;
 				cr->cr_mrep = m;
 				cr->cr_error = 0;
diff --git a/sys/rpc/clnt_rc.c b/sys/rpc/clnt_rc.c
index 8b5fc26..a6b2dfd 100644
--- a/sys/rpc/clnt_rc.c
+++ b/sys/rpc/clnt_rc.c
@@ -30,6 +30,7 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
@@ -44,8 +45,8 @@ __FBSDID("$FreeBSD$");
 #include <rpc/rpc.h>
 #include <rpc/rpc_com.h>
 
-static enum clnt_stat clnt_reconnect_call(CLIENT *, rpcproc_t,
-    xdrproc_t, void *, xdrproc_t, void *, struct timeval);
+static enum clnt_stat clnt_reconnect_call(CLIENT *, struct rpc_callextra *,
+    rpcproc_t, xdrproc_t, void *, xdrproc_t, void *, struct timeval);
 static void clnt_reconnect_geterr(CLIENT *, struct rpc_err *);
 static bool_t clnt_reconnect_freeres(CLIENT *, xdrproc_t, void *);
 static void clnt_reconnect_abort(CLIENT *);
@@ -62,6 +63,7 @@ static struct clnt_ops clnt_reconnect_ops = {
 };
 
 struct rc_data {
+	struct mtx		rc_lock;
 	struct sockaddr_storage	rc_addr; /* server address */
 	struct netconfig*	rc_nconf; /* network type */
 	rpcprog_t		rc_prog;  /* program number */
@@ -70,8 +72,10 @@ struct rc_data {
 	size_t			rc_recvsz;
 	struct timeval		rc_timeout;
 	struct timeval		rc_retry;
+	int			rc_retries;
 	const char		*rc_waitchan;
 	int			rc_intr;
+	int			rc_connecting;
 	CLIENT*			rc_client; /* underlying RPC client */
 };
 
@@ -94,6 +98,7 @@ clnt_reconnect_create(
 
 	cl = mem_alloc(sizeof (CLIENT));
 	rc = mem_alloc(sizeof (*rc));
+	mtx_init(&rc->rc_lock, "rc->rc_lock", NULL, MTX_DEF);
 	(void) memcpy(&rc->rc_addr, svcaddr, (size_t)svcaddr->sa_len);
 	rc->rc_nconf = nconf;
 	rc->rc_prog = program;
@@ -102,12 +107,15 @@ clnt_reconnect_create(
 	rc->rc_recvsz = recvsz;
 	rc->rc_timeout.tv_sec = -1;
 	rc->rc_timeout.tv_usec = -1;
-	rc->rc_retry.tv_sec = 15;
+	rc->rc_retry.tv_sec = 3;
 	rc->rc_retry.tv_usec = 0;
+	rc->rc_retries = INT_MAX;
 	rc->rc_waitchan = "rpcrecv";
 	rc->rc_intr = 0;
+	rc->rc_connecting = FALSE;
 	rc->rc_client = NULL;
 
+	cl->cl_refs = 1;
 	cl->cl_ops = &clnt_reconnect_ops;
 	cl->cl_private = (caddr_t)(void *)rc;
 	cl->cl_auth = authnone_create();
@@ -121,13 +129,39 @@ clnt_reconnect_connect(CLIENT *cl)
 {
 	struct rc_data *rc = (struct rc_data *)cl->cl_private;
 	struct socket *so;
+	enum clnt_stat stat;
+	int error;
 	int one = 1;
 
+	mtx_lock(&rc->rc_lock);
+again:
+	if (rc->rc_connecting) {
+		while (!rc->rc_client) {
+			error = msleep(rc, &rc->rc_lock,
+			    rc->rc_intr ? PCATCH : 0, "rpcrecon", 0);
+			if (error) {
+				mtx_unlock(&rc->rc_lock);
+				return (RPC_INTR);
+			}
+		}
+		/*
+		 * If the other guy failed to connect, we might as
+		 * well have another go.
+		 */
+		if (!rc->rc_client && !rc->rc_connecting)
+			goto again;
+		mtx_unlock(&rc->rc_lock);
+		return (RPC_SUCCESS);
+	} else {
+		rc->rc_connecting = TRUE;
+	}
+	mtx_unlock(&rc->rc_lock);
+
 	so = __rpc_nconf2socket(rc->rc_nconf);
 	if (!so) {
-		rpc_createerr.cf_stat = RPC_TLIERROR;
+		stat = rpc_createerr.cf_stat = RPC_TLIERROR;
 		rpc_createerr.cf_error.re_errno = 0;
-		return (RPC_TLIERROR);
+		goto out;
 	}
 
 	if (rc->rc_nconf->nc_semantics == NC_TPI_CLTS)
@@ -139,8 +173,10 @@ clnt_reconnect_connect(CLIENT *cl)
 		    (struct sockaddr *) &rc->rc_addr, rc->rc_prog, rc->rc_vers,
 		    rc->rc_sendsz, rc->rc_recvsz);
 
-	if (!rc->rc_client)
-		return (rpc_createerr.cf_stat);
+	if (!rc->rc_client) {
+		stat = rpc_createerr.cf_stat;
+		goto out;
+	}
 
 	CLNT_CONTROL(rc->rc_client, CLSET_FD_CLOSE, 0);
 	CLNT_CONTROL(rc->rc_client, CLSET_CONNECT, &one);
@@ -148,13 +184,21 @@ clnt_reconnect_connect(CLIENT *cl)
 	CLNT_CONTROL(rc->rc_client, CLSET_RETRY_TIMEOUT, &rc->rc_retry);
 	CLNT_CONTROL(rc->rc_client, CLSET_WAITCHAN, &rc->rc_waitchan);
 	CLNT_CONTROL(rc->rc_client, CLSET_INTERRUPTIBLE, &rc->rc_intr);
+	stat = RPC_SUCCESS;
+
+out:
+	mtx_lock(&rc->rc_lock);
+	rc->rc_connecting = FALSE;
+	wakeup(rc);
+	mtx_unlock(&rc->rc_lock);
 
-	return (RPC_SUCCESS);
+	return (stat);
 }
 
 static enum clnt_stat
 clnt_reconnect_call(
-	CLIENT	*cl,			/* client handle */
+	CLIENT		*cl,		/* client handle */
+	struct rpc_callextra *ext,	/* call metadata */
 	rpcproc_t	proc,		/* procedure number */
 	xdrproc_t	xargs,		/* xdr routine for args */
 	void		*argsp,		/* pointer to args */
@@ -163,8 +207,11 @@ clnt_reconnect_call(
 	struct timeval	utimeout)	/* seconds to wait before giving up */
 {
 	struct rc_data *rc = (struct rc_data *)cl->cl_private;
+	CLIENT *client;
 	enum clnt_stat stat;
+	int tries;
 
+	tries = 0;
 	do {
 		if (!rc->rc_client) {
 			stat = clnt_reconnect_connect(cl);
@@ -172,9 +219,14 @@ clnt_reconnect_call(
 				return (stat);
 		}
 
-		stat = CLNT_CALL(rc->rc_client, proc, xargs, argsp,
+		mtx_lock(&rc->rc_lock);
+		CLNT_ACQUIRE(rc->rc_client);
+		client = rc->rc_client;
+		mtx_unlock(&rc->rc_lock);
+		stat = CLNT_CALL_EXT(client, ext, proc, xargs, argsp,
 		    xresults, resultsp, utimeout);
 
+		CLNT_RELEASE(client);
 		if (stat == RPC_TIMEDOUT) {
 			/*
 			 * Check for async send misfeature for NLM
@@ -184,16 +236,33 @@ clnt_reconnect_call(
 				&& rc->rc_timeout.tv_usec == 0)
 			    || (rc->rc_timeout.tv_sec == -1
 				&& utimeout.tv_sec == 0
-				&& utimeout.tv_usec == 0))
+				&& utimeout.tv_usec == 0)) {
 				break;
+			}
 		}
 
 		if (stat == RPC_INTR)
 			break;
 
 		if (stat != RPC_SUCCESS) {
-			CLNT_DESTROY(rc->rc_client);
-			rc->rc_client = NULL;
+			tries++;
+			if (tries >= rc->rc_retries)
+				break;
+
+			if (ext && ext->rc_feedback)
+				ext->rc_feedback(FEEDBACK_RECONNECT, proc,
+				    ext->rc_feedback_arg);
+
+			mtx_lock(&rc->rc_lock);
+			/*
+			 * Make sure that someone else hasn't already
+			 * reconnected.
+			 */
+			if (rc->rc_client == client) {
+				CLNT_RELEASE(rc->rc_client);
+				rc->rc_client = NULL;
+			}
+			mtx_unlock(&rc->rc_lock);
 		}
 	} while (stat != RPC_SUCCESS);
 
@@ -294,6 +363,14 @@ clnt_reconnect_control(CLIENT *cl, u_int request, void *info)
 		*(int *) info = rc->rc_intr;
 		break;
 
+	case CLSET_RETRIES:
+		rc->rc_retries = *(int *) info;
+		break;
+
+	case CLGET_RETRIES:
+		*(int *) info = rc->rc_retries;
+		break;
+
 	default:
 		return (FALSE);
 	}
diff --git a/sys/rpc/clnt_vc.c b/sys/rpc/clnt_vc.c
index 5731e1e..cb09352 100644
--- a/sys/rpc/clnt_vc.c
+++ b/sys/rpc/clnt_vc.c
@@ -80,8 +80,8 @@ struct cmessage {
         struct cmsgcred cmcred;
 };
 
-static enum clnt_stat clnt_vc_call(CLIENT *, rpcproc_t, xdrproc_t, void *,
-    xdrproc_t, void *, struct timeval);
+static enum clnt_stat clnt_vc_call(CLIENT *, struct rpc_callextra *,
+    rpcproc_t, xdrproc_t, void *, xdrproc_t, void *, struct timeval);
 static void clnt_vc_geterr(CLIENT *, struct rpc_err *);
 static bool_t clnt_vc_freeres(CLIENT *, xdrproc_t, void *);
 static void clnt_vc_abort(CLIENT *);
@@ -100,7 +100,9 @@ static struct clnt_ops clnt_vc_ops = {
 };
 
 /*
- * A pending RPC request which awaits a reply.
+ * A pending RPC request which awaits a reply. Requests which have
+ * received their reply will have cr_xid set to zero and cr_mrep to
+ * the mbuf chain of the reply.
  */
 struct ct_request {
 	TAILQ_ENTRY(ct_request) cr_link;
@@ -113,6 +115,8 @@ TAILQ_HEAD(ct_request_list, ct_request);
 
 struct ct_data {
 	struct mtx	ct_lock;
+	int		ct_threads;	/* number of threads in clnt_vc_call */
+	bool_t		ct_closing;	/* TRUE if we are destroying client */
 	struct socket	*ct_socket;	/* connection socket */
 	bool_t		ct_closeit;	/* close it on destroy */
 	struct timeval	ct_wait;	/* wait interval in milliseconds */
@@ -161,7 +165,7 @@ clnt_vc_create(
 	static uint32_t disrupt;
 	struct __rpc_sockinfo si;
 	XDR xdrs;
-	int error;
+	int error, interrupted;
 
 	if (disrupt == 0)
 		disrupt = (uint32_t)(long)raddr;
@@ -170,10 +174,31 @@ clnt_vc_create(
 	ct = (struct ct_data *)mem_alloc(sizeof (*ct));
 
 	mtx_init(&ct->ct_lock, "ct->ct_lock", NULL, MTX_DEF);
+	ct->ct_threads = 0;
+	ct->ct_closing = FALSE;
 
 	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
 		error = soconnect(so, raddr, curthread);
+		SOCK_LOCK(so);
+		interrupted = 0;
+		while ((so->so_state & SS_ISCONNECTING)
+		    && so->so_error == 0) {
+			error = msleep(&so->so_timeo, SOCK_MTX(so),
+			    PSOCK | PCATCH, "connec", 0);
+			if (error) {
+				if (error == EINTR || error == ERESTART)
+					interrupted = 1;
+				break;
+			}
+		}
+		if (error == 0) {
+			error = so->so_error;
+			so->so_error = 0;
+		}
+		SOCK_UNLOCK(so);
 		if (error) {
+			if (!interrupted)
+				so->so_state &= ~SS_ISCONNECTING;
 			rpc_createerr.cf_stat = RPC_SYSTEMERROR;
 			rpc_createerr.cf_error.re_errno = error;
 			goto err;
@@ -224,6 +249,7 @@ clnt_vc_create(
 	 * Create a client handle which uses xdrrec for serialization
 	 * and authnone for authentication.
 	 */
+	cl->cl_refs = 1;
 	cl->cl_ops = &clnt_vc_ops;
 	cl->cl_private = ct;
 	cl->cl_auth = authnone_create();
@@ -255,6 +281,7 @@ err:
 static enum clnt_stat
 clnt_vc_call(
 	CLIENT *cl,
+	struct rpc_callextra *ext,
 	rpcproc_t proc,
 	xdrproc_t xdr_args,
 	void *args_ptr,
@@ -263,6 +290,7 @@ clnt_vc_call(
 	struct timeval utimeout)
 {
 	struct ct_data *ct = (struct ct_data *) cl->cl_private;
+	AUTH *auth;
 	XDR xdrs;
 	struct rpc_msg reply_msg;
 	bool_t ok;
@@ -270,13 +298,27 @@ clnt_vc_call(
 	struct timeval timeout;
 	uint32_t xid;
 	struct mbuf *mreq = NULL;
-	struct ct_request cr;
+	struct ct_request *cr;
 	int error;
 
+	cr = malloc(sizeof(struct ct_request), M_RPC, M_WAITOK);
+
 	mtx_lock(&ct->ct_lock);
 
-	cr.cr_mrep = NULL;
-	cr.cr_error = 0;
+	if (ct->ct_closing) {
+		mtx_unlock(&ct->ct_lock);
+		free(cr, M_RPC);
+		return (RPC_CANTSEND);
+	}
+	ct->ct_threads++;
+
+	if (ext)
+		auth = ext->rc_auth;
+	else
+		auth = cl->cl_auth;
+
+	cr->cr_mrep = NULL;
+	cr->cr_error = 0;
 
 	if (ct->ct_wait.tv_usec == -1) {
 		timeout = utimeout;	/* use supplied timeout */
@@ -311,12 +353,12 @@ call_again:
 	ct->ct_error.re_status = RPC_SUCCESS;
 
 	if ((! XDR_PUTINT32(&xdrs, &proc)) ||
-	    (! AUTH_MARSHALL(cl->cl_auth, &xdrs)) ||
+	    (! AUTH_MARSHALL(auth, &xdrs)) ||
 	    (! (*xdr_args)(&xdrs, args_ptr))) {
 		if (ct->ct_error.re_status == RPC_SUCCESS)
 			ct->ct_error.re_status = RPC_CANTENCODEARGS;
-		m_freem(mreq);
-		return (ct->ct_error.re_status);
+		mtx_lock(&ct->ct_lock);
+		goto out;
 	}
 	m_fixhdr(mreq);
 
@@ -327,9 +369,9 @@ call_again:
 	*mtod(mreq, uint32_t *) =
 		htonl(0x80000000 | (mreq->m_pkthdr.len - sizeof(uint32_t)));
 
-	cr.cr_xid = xid;
+	cr->cr_xid = xid;
 	mtx_lock(&ct->ct_lock);
-	TAILQ_INSERT_TAIL(&ct->ct_pending, &cr, cr_link);
+	TAILQ_INSERT_TAIL(&ct->ct_pending, cr, cr_link);
 	mtx_unlock(&ct->ct_lock);
 
 	/*
@@ -343,10 +385,8 @@ call_again:
 	reply_msg.acpted_rply.ar_results.proc = xdr_results;
 
 	mtx_lock(&ct->ct_lock);
-
 	if (error) {
-		TAILQ_REMOVE(&ct->ct_pending, &cr, cr_link);
-
+		TAILQ_REMOVE(&ct->ct_pending, cr, cr_link);
 		ct->ct_error.re_errno = error;
 		ct->ct_error.re_status = RPC_CANTSEND;
 		goto out;
@@ -357,12 +397,14 @@ call_again:
 	 * lock. In both these cases, the request has been removed
 	 * from ct->ct_pending.
 	 */
-	if (cr.cr_error) {
-		ct->ct_error.re_errno = cr.cr_error;
+	if (cr->cr_error) {
+		TAILQ_REMOVE(&ct->ct_pending, cr, cr_link);
+		ct->ct_error.re_errno = cr->cr_error;
 		ct->ct_error.re_status = RPC_CANTRECV;
 		goto out;
 	}
-	if (cr.cr_mrep) {
+	if (cr->cr_mrep) {
+		TAILQ_REMOVE(&ct->ct_pending, cr, cr_link);
 		goto got_reply;
 	}
 
@@ -370,23 +412,22 @@ call_again:
 	 * Hack to provide rpc-based message passing
 	 */
 	if (timeout.tv_sec == 0 && timeout.tv_usec == 0) {
-		if (cr.cr_xid)
-			TAILQ_REMOVE(&ct->ct_pending, &cr, cr_link);
+		TAILQ_REMOVE(&ct->ct_pending, cr, cr_link);
 		ct->ct_error.re_status = RPC_TIMEDOUT;
 		goto out;
 	}
 
-	error = msleep(&cr, &ct->ct_lock, ct->ct_waitflag, ct->ct_waitchan,
+	error = msleep(cr, &ct->ct_lock, ct->ct_waitflag, ct->ct_waitchan,
 	    tvtohz(&timeout));
 
+	TAILQ_REMOVE(&ct->ct_pending, cr, cr_link);
+
 	if (error) {
 		/*
 		 * The sleep returned an error so our request is still
 		 * on the list. Turn the error code into an
 		 * appropriate client status.
 		 */
-		if (cr.cr_xid)
-			TAILQ_REMOVE(&ct->ct_pending, &cr, cr_link);
 		ct->ct_error.re_errno = error;
 		switch (error) {
 		case EINTR:
@@ -405,8 +446,8 @@ call_again:
 		 * upcall had a receive error, report that,
 		 * otherwise we have a reply.
 		 */
-		if (cr.cr_error) {
-			ct->ct_error.re_errno = cr.cr_error;
+		if (cr->cr_error) {
+			ct->ct_error.re_errno = cr->cr_error;
 			ct->ct_error.re_status = RPC_CANTRECV;
 			goto out;
 		}
@@ -419,10 +460,10 @@ got_reply:
 	 */
 	mtx_unlock(&ct->ct_lock);
 
-	xdrmbuf_create(&xdrs, cr.cr_mrep, XDR_DECODE);
+	xdrmbuf_create(&xdrs, cr->cr_mrep, XDR_DECODE);
 	ok = xdr_replymsg(&xdrs, &reply_msg);
 	XDR_DESTROY(&xdrs);
-	cr.cr_mrep = NULL;
+	cr->cr_mrep = NULL;
 
 	mtx_lock(&ct->ct_lock);
 
@@ -466,10 +507,17 @@ out:
 
 	if (mreq)
 		m_freem(mreq);
-	if (cr.cr_mrep)
-		m_freem(cr.cr_mrep);
+	if (cr->cr_mrep)
+		m_freem(cr->cr_mrep);
 
+	ct->ct_threads--;
+	if (ct->ct_closing)
+		wakeup(ct);
+		
 	mtx_unlock(&ct->ct_lock);
+
+	free(cr, M_RPC);
+
 	return (ct->ct_error.re_status);
 }
 
@@ -628,6 +676,7 @@ static void
 clnt_vc_destroy(CLIENT *cl)
 {
 	struct ct_data *ct = (struct ct_data *) cl->cl_private;
+	struct ct_request *cr;
 	struct socket *so = NULL;
 
 	mtx_lock(&ct->ct_lock);
@@ -639,8 +688,19 @@ clnt_vc_destroy(CLIENT *cl)
 		ct->ct_socket->so_rcv.sb_flags &= ~SB_UPCALL;
 		SOCKBUF_UNLOCK(&ct->ct_socket->so_rcv);
 
-		KASSERT(!TAILQ_FIRST(&ct->ct_pending),
-		    ("Destroying RPC client with pending RPC requests"));
+		/*
+		 * Abort any pending requests and wait until everyone
+		 * has finished with clnt_vc_call.
+		 */
+		ct->ct_closing = TRUE;
+		TAILQ_FOREACH(cr, &ct->ct_pending, cr_link) {
+			cr->cr_xid = 0;
+			cr->cr_error = ESHUTDOWN;
+			wakeup(cr);
+		}
+
+		while (ct->ct_threads)
+			msleep(ct, &ct->ct_lock, 0, "rpcclose", 0);
 
 		if (ct->ct_closeit) {
 			so = ct->ct_socket;
@@ -732,7 +792,6 @@ clnt_vc_soupcall(struct socket *so, void *arg, int waitflag)
 					cr->cr_error = error;
 					wakeup(cr);
 				}
-				TAILQ_INIT(&ct->ct_pending);
 				mtx_unlock(&ct->ct_lock);
 				break;
 			}
@@ -795,19 +854,14 @@ clnt_vc_soupcall(struct socket *so, void *arg, int waitflag)
 					if (cr->cr_xid == xid) {
 						/*
 						 * This one
-						 * matches. We snip it
-						 * out of the pending
-						 * list and leave the
-						 * reply mbuf in
+						 * matches. We leave
+						 * the reply mbuf in
 						 * cr->cr_mrep. Set
 						 * the XID to zero so
-						 * that clnt_vc_call
-						 * can know not to
-						 * repeat the
-						 * TAILQ_REMOVE.
+						 * that we will ignore
+						 * any duplicaed
+						 * replies.
 						 */
-						TAILQ_REMOVE(&ct->ct_pending,
-						    cr, cr_link);
 						cr->cr_xid = 0;
 						cr->cr_mrep = ct->ct_record;
 						cr->cr_error = 0;
diff --git a/sys/rpc/svc_vc.c b/sys/rpc/svc_vc.c
index 54edfd0..47530da 100644
--- a/sys/rpc/svc_vc.c
+++ b/sys/rpc/svc_vc.c
@@ -132,6 +132,15 @@ svc_vc_create(SVCPOOL *pool, struct socket *so, size_t sendsize,
 	struct sockaddr* sa;
 	int error;
 
+	if (so->so_state & SS_ISCONNECTED) {
+		error = so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa);
+		if (error)
+			return (NULL);
+		xprt = svc_vc_create_conn(pool, so, sa);
+		free(sa, M_SONAME);
+		return (xprt);
+	}
+
 	xprt = mem_alloc(sizeof(SVCXPRT));
 	mtx_init(&xprt->xp_lock, "xprt->xp_lock", NULL, MTX_DEF);
 	xprt->xp_pool = pool;
@@ -180,8 +189,32 @@ svc_vc_create_conn(SVCPOOL *pool, struct socket *so, struct sockaddr *raddr)
 	SVCXPRT *xprt = NULL;
 	struct cf_conn *cd = NULL;
 	struct sockaddr* sa = NULL;
+	struct sockopt opt;
+	int one = 1;
 	int error;
 
+	bzero(&opt, sizeof(struct sockopt));
+	opt.sopt_dir = SOPT_SET;
+	opt.sopt_level = SOL_SOCKET;
+	opt.sopt_name = SO_KEEPALIVE;
+	opt.sopt_val = &one;
+	opt.sopt_valsize = sizeof(one);
+	error = sosetopt(so, &opt);
+	if (error)
+		return (NULL);
+
+	if (so->so_proto->pr_protocol == IPPROTO_TCP) {
+		bzero(&opt, sizeof(struct sockopt));
+		opt.sopt_dir = SOPT_SET;
+		opt.sopt_level = IPPROTO_TCP;
+		opt.sopt_name = TCP_NODELAY;
+		opt.sopt_val = &one;
+		opt.sopt_valsize = sizeof(one);
+		error = sosetopt(so, &opt);
+		if (error)
+			return (NULL);
+	}
+
 	cd = mem_alloc(sizeof(*cd));
 	cd->strm_stat = XPRT_IDLE;
 
@@ -306,8 +339,6 @@ svc_vc_rendezvous_recv(SVCXPRT *xprt, struct rpc_msg *msg)
 {
 	struct socket *so = NULL;
 	struct sockaddr *sa = NULL;
-	struct sockopt opt;
-	int one = 1;
 	int error;
 
 	/*
@@ -351,16 +382,6 @@ svc_vc_rendezvous_recv(SVCXPRT *xprt, struct rpc_msg *msg)
 	sa = 0;
 	error = soaccept(so, &sa);
 
-	if (!error) {
-		bzero(&opt, sizeof(struct sockopt));
-		opt.sopt_dir = SOPT_SET;
-		opt.sopt_level = IPPROTO_TCP;
-		opt.sopt_name = TCP_NODELAY;
-		opt.sopt_val = &one;
-		opt.sopt_valsize = sizeof(one);
-		error = sosetopt(so, &opt);
-	}
-
 	if (error) {
 		/*
 		 * XXX not sure if I need to call sofree or soclose here.
@@ -374,7 +395,9 @@ svc_vc_rendezvous_recv(SVCXPRT *xprt, struct rpc_msg *msg)
 	 * svc_vc_create_conn will call xprt_register - we don't need
 	 * to do anything with the new connection.
 	 */
-	svc_vc_create_conn(xprt->xp_pool, so, sa);
+	if (!svc_vc_create_conn(xprt->xp_pool, so, sa))
+		soclose(so);
+
 	free(sa, M_SONAME);
 
 	return (FALSE); /* there is never an rpc msg to be processed */
diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h
index f6f5b8d..600ddd8 100644
--- a/sys/sys/fcntl.h
+++ b/sys/sys/fcntl.h
@@ -227,6 +227,7 @@ typedef	__pid_t		pid_t;
 #define	F_FLOCK		0x020	 	/* Use flock(2) semantics for lock */
 #define	F_POSIX		0x040	 	/* Use POSIX semantics for lock */
 #define	F_REMOTE	0x080		/* Lock owner is remote NFS client */
+#define F_NOINTR	0x100		/* Ignore signals when waiting */
 #endif
 
 /*
diff --git a/sys/sys/lockf.h b/sys/sys/lockf.h
index ee20a7d..3239066 100644
--- a/sys/sys/lockf.h
+++ b/sys/sys/lockf.h
@@ -40,6 +40,7 @@
 #include <sys/_lock.h>
 #include <sys/_sx.h>
 
+struct flock;
 struct vop_advlock_args;
 struct vop_advlockasync_args;
 
@@ -118,9 +119,13 @@ struct lockf {
 };
 LIST_HEAD(lockf_list, lockf);
 
+typedef int lf_iterator(struct vnode *, struct flock *, void *);
+
 int	 lf_advlock(struct vop_advlock_args *, struct lockf **, u_quad_t);
 int	 lf_advlockasync(struct vop_advlockasync_args *, struct lockf **, u_quad_t);
 void	 lf_purgelocks(struct vnode *vp, struct lockf **statep);
+int	 lf_iteratelocks_sysid(int sysid, lf_iterator *, void *);
+int	 lf_iteratelocks_vnode(struct vnode *vp, lf_iterator *, void *);
 int	 lf_countlocks(int sysid);
 void	 lf_clearremotesys(int sysid);
 
diff --git a/sys/sys/param.h b/sys/sys/param.h
index 8b19a64..280a34b 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -57,7 +57,7 @@
  *		is created, otherwise 1.
  */
 #undef __FreeBSD_version
-#define __FreeBSD_version 800039	/* Master, propagated to newvers */
+#define __FreeBSD_version 800040	/* Master, propagated to newvers */
 
 #ifndef LOCORE
 #include <sys/types.h>
diff --git a/tools/regression/file/flock/flock.c b/tools/regression/file/flock/flock.c
index 9068b08..2b42290 100644
--- a/tools/regression/file/flock/flock.c
+++ b/tools/regression/file/flock/flock.c
@@ -31,6 +31,7 @@
 #ifdef __FreeBSD__
 #include <sys/mount.h>
 #endif
+#include <sys/stat.h>
 #include <sys/wait.h>
 
 #include <err.h>
@@ -56,16 +57,28 @@
 int verbose = 0;
 
 static int
-make_file(const char *dir, off_t sz)
+make_file(const char *pathname, off_t sz)
 {
+	struct stat st;
 	const char *template = "/flocktempXXXXXX";
 	size_t len;
 	char *filename;
 	int fd;
 
-	len = strlen(dir) + strlen(template) + 1;
+	if (stat(pathname, &st) == 0) {
+		if (S_ISREG(st.st_mode)) {
+			fd = open(pathname, O_RDWR);
+			if (fd < 0)
+				err(1, "open(%s)", pathname);
+			if (ftruncate(fd, sz) < 0)
+				err(1, "ftruncate");
+			return (fd);
+		}
+	}
+
+	len = strlen(pathname) + strlen(template) + 1;
 	filename = malloc(len);
-	strcpy(filename, dir);
+	strcpy(filename, pathname);
 	strcat(filename, template);
 	fd = mkstemp(filename);
 	if (fd < 0)
@@ -84,6 +97,24 @@ ignore_alarm(int __unused sig)
 {
 }
 
+static int
+safe_waitpid(pid_t pid)
+{
+	int save_errno;
+	int status;
+
+	save_errno = errno;
+	errno = 0;
+	while (waitpid(pid, &status, 0) != pid) {
+		if (errno == EINTR)
+			continue;
+		err(1, "waitpid");
+	}
+	errno = save_errno;
+
+	return (status);
+}
+
 #define FAIL(test)					\
 	do {						\
 		if (test) {				\
@@ -103,7 +134,7 @@ ignore_alarm(int __unused sig)
  * except for the lock type which is set to F_UNLCK.
  */
 static int
-test1(int fd)
+test1(int fd, __unused int argc, const __unused char **argv)
 {
 	struct flock fl1, fl2;
 
@@ -128,24 +159,6 @@ test1(int fd)
 	SUCCEED;
 }
 
-static int
-safe_waitpid(pid_t pid)
-{
-	int save_errno;
-	int stat;
-
-	save_errno = errno;
-	errno = 0;
-	while (waitpid(pid, &stat, 0) != pid) {
-		if (errno == EINTR)
-			continue;
-		err(1, "waitpid");
-	}
-	errno = save_errno;
-
-	return (stat);
-}
-
 /*
  * Test 2 - F_SETLK on locked region
  *
@@ -153,7 +166,7 @@ safe_waitpid(pid_t pid)
  * immediately with EACCES or EAGAIN.
  */
 static int
-test2(int fd)
+test2(int fd, __unused int argc, const __unused char **argv)
 {
 	/*
 	 * We create a child process to hold the lock which we will
@@ -224,7 +237,7 @@ test2(int fd)
  * in FreeBSD's client (and server) lockd implementation.
  */
 static int
-test3(int fd)
+test3(int fd, __unused int argc, const __unused char **argv)
 {
 	/*
 	 * We create a child process to hold the lock which we will
@@ -294,7 +307,7 @@ test3(int fd)
  * Get the first lock that blocks the lock.
  */
 static int
-test4(int fd)
+test4(int fd, __unused int argc, const __unused char **argv)
 {
 	/*
 	 * We create a child process to hold the lock which we will
@@ -371,7 +384,7 @@ test4(int fd)
  * EDEADLK is returned.
  */
 static int
-test5(int fd)
+test5(int fd, __unused int argc, const __unused char **argv)
 {
 	/*
 	 * We create a child process to hold the lock which we will
@@ -426,8 +439,11 @@ test5(int fd)
 	sleep(1);
 
 	/*
-	 * fcntl should immediately return -1 with errno set to EDEADLK.
+	 * fcntl should immediately return -1 with errno set to
+	 * EDEADLK. If the alarm fires, we failed to detect the
+	 * deadlock.
 	 */
+	alarm(1);
 	printf("5 - F_SETLKW simple deadlock: ");
 
 	fl.l_start = 1;
@@ -444,6 +460,11 @@ test5(int fd)
 	if (fcntl(fd, F_SETLK, &fl) < 0)
 		err(1, "F_UNLCK");
 
+	/*
+	 * Cancel the alarm to avoid confusing later tests.
+	 */
+	alarm(0);
+
 	SUCCEED;
 }
 
@@ -457,7 +478,7 @@ test5(int fd)
  * (due to C2's blocking attempt to lock byte zero).
  */
 static int
-test6(int fd)
+test6(int fd, __unused int argc, const __unused char **argv)
 {
 	/*
 	 * Because our test relies on the child process being blocked
@@ -560,7 +581,7 @@ test6(int fd)
  * immediately with EACCES or EAGAIN.
  */
 static int
-test7(int fd)
+test7(int fd, __unused int argc, const __unused char **argv)
 {
 	/*
 	 * We create a child process to hold the lock which we will
@@ -632,7 +653,7 @@ test7(int fd)
  * it.
  */
 static int
-test8(int fd)
+test8(int fd, __unused int argc, const __unused char **argv)
 {
 	/*
 	 * We create a child process to hold the lock which we will
@@ -709,7 +730,7 @@ test8(int fd)
  * immediately with EACCES or EAGAIN.
  */
 static int
-test9(int fd)
+test9(int fd, __unused int argc, const __unused char **argv)
 {
 	/*
 	 * We create a child process to hold the lock which we will
@@ -781,7 +802,7 @@ test9(int fd)
  * system ID of the system that owns that process
  */
 static int
-test10(int fd)
+test10(int fd, __unused int argc, const __unused char **argv)
 {
 	/*
 	 * We create a child process to hold the lock which we will
@@ -854,7 +875,7 @@ test10(int fd)
  * is added.
  */
 static int
-test11(int fd)
+test11(int fd, __unused int argc, const __unused char **argv)
 {
 #ifdef F_SETLK_REMOTE
 	struct flock fl;
@@ -934,7 +955,7 @@ test11(int fd)
  * process waits until the request can be satisfied.
  */
 static int
-test12(int fd)
+test12(int fd, __unused int argc, const __unused char **argv)
 {
 	/*
 	 * We create a child process to hold the lock which we will
@@ -1011,7 +1032,7 @@ test12(int fd)
  * process waits until the request can be satisfied.
  */
 static int
-test13(int fd)
+test13(int fd, __unused int argc, const __unused char **argv)
 {
 	/*
 	 * We create a child process to hold the lock which we will
@@ -1096,14 +1117,14 @@ test13(int fd)
  * Test 14 - soak test
  */
 static int
-test14(int fd)
+test14(int fd, int argc, const char **argv)
 {
 #define CHILD_COUNT 20
 	/*
 	 * We create a set of child processes and let each one run
 	 * through a random sequence of locks and unlocks.
 	 */
-	int i, j, id;
+	int i, j, id, id_base;
 	int pids[CHILD_COUNT], pid;
 	char buf[128];
 	char tbuf[128];
@@ -1113,11 +1134,13 @@ test14(int fd)
 	struct itimerval itv;
 	int status;
 
+	id_base = 0;
+	if (argc >= 2)
+		id_base = strtol(argv[1], NULL, 0);
+
 	printf("14 - soak test: ");
 	fflush(stdout);
 
-	memset(buf, 255, sizeof(buf));
-	pwrite(fd, buf, sizeof(buf), 0);
 	for (i = 0; i < 128; i++)
 		map[i] = F_UNLCK;
 
@@ -1137,8 +1160,8 @@ test14(int fd)
 		/*
 		 * Child - do some work and exit.
 		 */
-		id = getpid();
-		srandom(id);
+		id = id_base + i;
+		srandom(getpid());
 
 		for (j = 0; j < 50; j++) {
 			int start, end, len;
@@ -1277,8 +1300,109 @@ test14(int fd)
 	SUCCEED;
 }
 
+/*
+ * Test 15 - flock(2) semantcs
+ *
+ * When a lock holder has a shared lock and attempts to upgrade that
+ * shared lock to exclusive, it must drop the shared lock before
+ * blocking on the exclusive lock.
+ *
+ * To test this, we first arrange for two shared locks on the file,
+ * and then attempt to upgrade one of them to exclusive. This should
+ * drop one of the shared locks and block. We interrupt the blocking
+ * lock request and examine the lock state of the file after dropping
+ * the other shared lock - there should be no active locks at this
+ * point.
+ */
+static int
+test15(int fd, __unused int argc, const __unused char **argv)
+{
+#ifdef LOCK_EX
+	/*
+	 * We create a child process to hold the lock which we will
+	 * test. We use a pipe to communicate with the child.
+	 *
+	 * Since we only have one file descriptors and lock ownership
+	 * for flock(2) goes with the file descriptor, we use fcntl to
+	 * set the child's shared lock.
+	 */
+	int pid;
+	int pfd[2];
+	int fd2;
+	struct flock fl;
+	char ch;
+	int res;
+
+	if (pipe(pfd) < 0)
+		err(1, "pipe");
+
+	pid = fork();
+	if (pid < 0)
+		err(1, "fork");
+
+	if (pid == 0) {
+		/*
+		 * We are the child. We set a shared lock and then
+		 * write one byte back to the parent to tell it. The
+		 * parent will kill us when its done.
+		 */
+		fl.l_start = 0;
+		fl.l_len = 0;
+		fl.l_type = F_RDLCK;
+		fl.l_whence = SEEK_SET;
+		if (fcntl(fd, F_SETLK, &fl) < 0)
+			err(1, "fcntl(F_SETLK) (child)");
+		if (write(pfd[1], "a", 1) < 0)
+			err(1, "writing to pipe (child)");
+		pause();
+		exit(0);
+	}
+
+	/*
+	 * Wait until the child has set its lock and then perform the
+	 * test.
+	 */
+	if (read(pfd[0], &ch, 1) != 1)
+		err(1, "reading from pipe (child)");
+
+	fd2 = dup(fd);
+	if (flock(fd, LOCK_SH) < 0)
+		err(1, "flock shared");
+
+	/*
+	 * flock should wait until the alarm and then return -1 with
+	 * errno set to EINTR.
+	 */
+	printf("15 - flock(2) semantics: ");
+
+	alarm(1);
+	flock(fd, LOCK_EX);
+
+	/*
+	 * Kill the child to force it to drop its locks.
+	 */
+	kill(pid, SIGTERM);
+	safe_waitpid(pid);
+
+	fl.l_start = 0;
+	fl.l_len = 0;
+	fl.l_type = F_WRLCK;
+	fl.l_whence = SEEK_SET;
+	res = fcntl(fd, F_GETLK, &fl);
+
+	close(pfd[0]);
+	close(pfd[1]);
+	FAIL(res != 0);
+	FAIL(fl.l_type != F_UNLCK);
+
+	SUCCEED;
+#else
+	return 0;
+#endif
+}
+
 struct test {
-	int (*testfn)(int);	/* function to perform the test */
+	int (*testfn)(int, int, const char **);	/* function to perform the test */
 	int num;		/* test number */
 	int intr;		/* non-zero if the test interrupts a lock */
 };
@@ -1298,6 +1422,7 @@ struct test tests[] = {
 	{	test12,		12,	0	},
 	{	test13,		13,	1	},
 	{	test14,		14,	0	},
+	{	test15,		15,	1	},
 };
 int test_count = sizeof(tests) / sizeof(tests[0]);
 
@@ -1309,16 +1434,23 @@ main(int argc, const char *argv[])
 	int nointr;
 	int i;
 	struct sigaction sa;
+	int test_argc;
+	const char **test_argv;
 
-	if (argc < 2 || argc > 3) {
-		errx(1, "usage: flock <directory> [test number]");
+	if (argc < 2) {
+		errx(1, "usage: flock <directory> [test number] ...");
 	}
 
 	fd = make_file(argv[1], 1024);
-	if (argc == 3)
+	if (argc >= 3) {
 		testnum = strtol(argv[2], NULL, 0);
-	else
+		test_argc = argc - 2;
+		test_argv = argv + 2;
+	} else {
 		testnum = 0;
+		test_argc = 0;
+		test_argv = 0;
+	}
 
 	sa.sa_handler = ignore_alarm;
 	sigemptyset(&sa.sa_mask);
@@ -1326,11 +1458,11 @@ main(int argc, const char *argv[])
 	sigaction(SIGALRM, &sa, 0);
 
 	nointr = 0;
-#ifdef __FreeBSD__
+#if defined(__FreeBSD__) && __FreeBSD_version < 800040
 	{
 		/*
-		 * FreeBSD can't interrupt a blocked lock request on
-		 * an NFS mounted filesystem.
+		 * FreeBSD with userland NLM can't interrupt a blocked
+		 * lock request on an NFS mounted filesystem.
 		 */
 		struct statfs st;
 		fstatfs(fd, &st);
@@ -1342,7 +1474,7 @@ main(int argc, const char *argv[])
 		if (tests[i].intr && nointr)
 			continue;
 		if (!testnum || tests[i].num == testnum)
-			tests[i].testfn(fd);
+			tests[i].testfn(fd, test_argc, test_argv);
 	}
 
 	return 0;
diff --git a/usr.sbin/rpc.lockd/lockd.c b/usr.sbin/rpc.lockd/lockd.c
index f2fedce..c111b58 100644
--- a/usr.sbin/rpc.lockd/lockd.c
+++ b/usr.sbin/rpc.lockd/lockd.c
@@ -80,6 +80,7 @@ int		_rpcsvcdirty = 0;
 int grace_expired;
 int nsm_state;
 int kernel_lockd;
+int kernel_lockd_client;
 pid_t client_pid;
 struct mon mon_host;
 char **hosts, *svcport_str = NULL;
@@ -175,6 +176,7 @@ main(int argc, char **argv)
         }
 
 	kernel_lockd = FALSE;
+	kernel_lockd_client = FALSE;
 	if (modfind("nfslockd") < 0) {
 		if (kldload("nfslockd") < 0) {
 			fprintf(stderr, "Can't find or load kernel support for rpc.lockd - using non-kernel implementation\n");
@@ -184,6 +186,10 @@ main(int argc, char **argv)
 	} else {
 		kernel_lockd = TRUE;
 	}
+	if (kernel_lockd) {
+		if (getosreldate() >= 800040)
+			kernel_lockd_client = TRUE;
+	}
 
 	(void)rpcb_unset(NLM_PROG, NLM_SM, NULL);
 	(void)rpcb_unset(NLM_PROG, NLM_VERS, NULL);
@@ -245,41 +251,42 @@ main(int argc, char **argv)
 	}
 
 	if (kernel_lockd) {
-		/*
-		 * For the kernel lockd case, we run a cut-down RPC
-		 * service on a local-domain socket. The kernel's RPC
-		 * server will pass what it can't handle (mainly
-		 * client replies) down to us. This can go away
-		 * entirely if/when we move the client side of NFS
-		 * locking into the kernel.
-		 */
-		struct sockaddr_un sun;
-		int fd, oldmask;
-		SVCXPRT *xprt;
-
-		memset(&sun, 0, sizeof sun);
-		sun.sun_family = AF_LOCAL;
-		unlink(_PATH_RPCLOCKDSOCK);
-		strcpy(sun.sun_path, _PATH_RPCLOCKDSOCK);
-		sun.sun_len = SUN_LEN(&sun);
-		fd = socket(AF_LOCAL, SOCK_STREAM, 0);
-		if (!fd) {
-			err(1, "Can't create local lockd socket");
-		}
-		oldmask = umask(S_IXUSR|S_IRWXG|S_IRWXO);
-		if (bind(fd, (struct sockaddr *) &sun, sun.sun_len) < 0) {
-			err(1, "Can't bind local lockd socket");
-		}
-		umask(oldmask);
-		if (listen(fd, SOMAXCONN) < 0) {
-			err(1, "Can't listen on local lockd socket");
-		}
-		xprt = svc_vc_create(fd, RPC_MAXDATASIZE, RPC_MAXDATASIZE);
-		if (!xprt) {
-			err(1, "Can't create transport for local lockd socket");
-		}
-		if (!svc_reg(xprt, NLM_PROG, NLM_VERS4, nlm_prog_4, NULL)) {
-			err(1, "Can't register service for local lockd socket");
+		if (!kernel_lockd_client) {
+			/*
+			 * For the case where we have a kernel lockd but it
+			 * doesn't provide client locking, we run a cut-down
+			 * RPC service on a local-domain socket. The kernel's
+			 * RPC server will pass what it can't handle (mainly
+			 * client replies) down to us.
+			 */
+			struct sockaddr_un sun;
+			int fd, oldmask;
+			SVCXPRT *xprt;
+
+			memset(&sun, 0, sizeof sun);
+			sun.sun_family = AF_LOCAL;
+			unlink(_PATH_RPCLOCKDSOCK);
+			strcpy(sun.sun_path, _PATH_RPCLOCKDSOCK);
+			sun.sun_len = SUN_LEN(&sun);
+			fd = socket(AF_LOCAL, SOCK_STREAM, 0);
+			if (!fd) {
+				err(1, "Can't create local lockd socket");
+			}
+			oldmask = umask(S_IXUSR|S_IRWXG|S_IRWXO);
+			if (bind(fd, (struct sockaddr *) &sun, sun.sun_len) < 0) {
+				err(1, "Can't bind local lockd socket");
+			}
+			umask(oldmask);
+			if (listen(fd, SOMAXCONN) < 0) {
+				err(1, "Can't listen on local lockd socket");
+			}
+			xprt = svc_vc_create(fd, RPC_MAXDATASIZE, RPC_MAXDATASIZE);
+			if (!xprt) {
+				err(1, "Can't create transport for local lockd socket");
+			}
+			if (!svc_reg(xprt, NLM_PROG, NLM_VERS4, nlm_prog_4, NULL)) {
+				err(1, "Can't register service for local lockd socket");
+			}
 		}
 
 		/*
@@ -342,17 +349,27 @@ main(int argc, char **argv)
 	}
 
 	if (kernel_lockd) {
-		init_nsm();
-		client_pid = client_request();
-
-		/*
-		 * Create a child process to enter the kernel and then
-		 * wait for RPCs on our local domain socket.
-		 */
-		if (!fork())
+		if (!kernel_lockd_client) {
+			init_nsm();
+			client_pid = client_request();
+
+			/*
+			 * Create a child process to enter the kernel and then
+			 * wait for RPCs on our local domain socket.
+			 */
+			if (!fork())
+				nlm_syscall(debug_level, grace_period,
+				    naddrs, addrs);
+			else
+				svc_run();
+		} else {
+			/*
+			 * The kernel lockd implementation provides
+			 * both client and server so we don't need to
+			 * do anything else.
+			 */
 			nlm_syscall(debug_level, grace_period, naddrs, addrs);
-		else
-			svc_run();
+		}
 	} else {
 		grace_expired = 0;
 		alarm(grace_period);
diff --git a/usr.sbin/rpc.statd/file.c b/usr.sbin/rpc.statd/file.c
index efcaaaf..0625e30 100644
--- a/usr.sbin/rpc.statd/file.c
+++ b/usr.sbin/rpc.statd/file.c
@@ -36,6 +36,7 @@
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
+#include <netdb.h>
 #include <stdio.h>
 #include <string.h>
 #include <unistd.h>
@@ -78,8 +79,11 @@ HostInfo *find_host(char *hostname, int create)
   HostInfo *hp;
   HostInfo *spare_slot = NULL;
   HostInfo *result = NULL;
+  struct addrinfo *ai1, *ai2;
   int i;
 
+  if (getaddrinfo(hostname, NULL, NULL, &ai1) != 0)
+    ai1 = NULL;
   for (i = 0, hp = status_info->hosts; i < status_info->noOfHosts; i++, hp++)
   {
     if (!strncasecmp(hostname, hp->hostname, SM_MAXSTRLEN))
@@ -87,9 +91,35 @@ HostInfo *find_host(char *hostname, int create)
       result = hp;
       break;
     }
+    if (hp->hostname[0] && 
+	getaddrinfo(hp->hostname, NULL, NULL, &ai2) != 0)
+      ai2 = NULL;
+    if (ai1 && ai2)
+    {
+       struct addrinfo *p1, *p2;
+       for (p1 = ai1; !result && p1; p1 = p1->ai_next)
+       {
+	 for (p2 = ai2; !result && p2; p2 = p2->ai_next)
+	 {
+	   if (p1->ai_family == p2->ai_family
+	       && p1->ai_addrlen == p2->ai_addrlen
+	       && !memcmp(p1->ai_addr, p2->ai_addr, p1->ai_addrlen))
+	   {
+	     result = hp;
+	     break;
+	   }
+	 }
+       }
+       if (result)
+	 break;
+    }
+    if (ai2)
+      freeaddrinfo(ai2);
     if (!spare_slot && !hp->monList && !hp->notifyReqd)
       spare_slot = hp;
   }
+  if (ai1)
+    freeaddrinfo(ai1);
 
   /* Return if entry found, or if not asked to create one.		*/
   if (result || !create) return (result);
author	dfr <dfr@FreeBSD.org>	2008-06-26 10:21:54 +0000
committer	dfr <dfr@FreeBSD.org>	2008-06-26 10:21:54 +0000
commit	41cea6d5ca71b8cf057f9face8055b218b30e18e (patch)
tree	994a214037913bc4e44eaee5070c65aeadf53485
parent	ca3c788812715a263f83dcec4bdabaf6c10eb922 (diff)
download	FreeBSD-src-41cea6d5ca71b8cf057f9face8055b218b30e18e.zip FreeBSD-src-41cea6d5ca71b8cf057f9face8055b218b30e18e.tar.gz