jwd@ reported a problem via email where the old NFS client would

get a reply of EEXIST from an NFS server when a Mkdir RPC was retried, for an NFS over UDP mount. Upon investigation, it was found that the client was retransmitting the Mkdir RPC request over UDP, but with a different xid. As such, the retransmitted message would miss the Duplicate Request Cache in the server, causing it to reply EEXIST. The kernel client side UDP rpc code has two timers. The first one causes a retransmit using the same xid and socket and was set to a fixed value of 3seconds. (The default can be overridden via CLSET_RETRY_TIMEOUT.) The second one creates a new socket and xid and should be larger than the first. However, both NFS clients were setting the second timer to nm_timeo ("timeout=<value>" mount argument), which defaulted to 1second, so the first timer would never time out. This patch fixes both NFS clients so that they set the first timer using nm_timeo and makes the second timer larger than the first one. Reported by: jwd Tested by: jwd Reviewed by: jhb MFC after: 2 weeks
author: rmacklem <rmacklem@FreeBSD.org> 2011-12-21 02:45:51 +0000
committer: rmacklem <rmacklem@FreeBSD.org> 2011-12-21 02:45:51 +0000
commit: 9a222cbb4c06161a806d8912cf934d60493ba235 (patch)
tree: 7bd580195a28371e048832986507c7cc4fd2ba82 /sys/fs/nfs
parent: d4ce8a6024f35a5f56ef5e86222ace3e3340d15d (diff)
download: FreeBSD-src-9a222cbb4c06161a806d8912cf934d60493ba235.zip
FreeBSD-src-9a222cbb4c06161a806d8912cf934d60493ba235.tar.gz
1 files changed, 57 insertions, 6 deletions
diff --git a/sys/fs/nfs/nfs_commonkrpc.c b/sys/fs/nfs/nfs_commonkrpc.c
index 1f65380..eb15881 100644
--- a/sys/fs/nfs/nfs_commonkrpc.c
+++ b/sys/fs/nfs/nfs_commonkrpc.c
@@ -168,6 +168,7 @@ newnfs_connect(struct nfsmount *nmp, struct nfssockreq *nrp,
 	struct socket *so;
 	int one = 1, retries, error = 0;
 	struct thread *td = curthread;
+	struct timeval timo;
 
 	/*
 	 * We need to establish the socket using the credentials of
@@ -264,9 +265,18 @@ newnfs_connect(struct nfsmount *nmp, struct nfssockreq *nrp,
 			CLNT_CONTROL(client, CLSET_INTERRUPTIBLE, &one);
 		if ((nmp->nm_flag & NFSMNT_RESVPORT))
 			CLNT_CONTROL(client, CLSET_PRIVPORT, &one);
-		if (NFSHASSOFT(nmp))
-			retries = nmp->nm_retry;
-		else
+		if (NFSHASSOFT(nmp)) {
+			if (nmp->nm_sotype == SOCK_DGRAM)
+				/*
+				 * For UDP, the large timeout for a reconnect
+				 * will be set to "nm_retry * nm_timeo / 2", so
+				 * we only want to do 2 reconnect timeout
+				 * retries.
+				 */
+				retries = 2;
+			else
+				retries = nmp->nm_retry;
+		} else
 			retries = INT_MAX;
 	} else {
 		/*
@@ -284,6 +294,27 @@ newnfs_connect(struct nfsmount *nmp, struct nfssockreq *nrp,
 	}
 	CLNT_CONTROL(client, CLSET_RETRIES, &retries);
 
+	if (nmp != NULL) {
+		/*
+		 * For UDP, there are 2 timeouts:
+		 * - CLSET_RETRY_TIMEOUT sets the initial timeout for the timer
+		 *   that does a retransmit of an RPC request using the same 
+		 *   socket and xid. This is what you normally want to do,
+		 *   since NFS servers depend on "same xid" for their
+		 *   Duplicate Request Cache.
+		 * - timeout specified in CLNT_CALL_MBUF(), which specifies when
+		 *   retransmits on the same socket should fail and a fresh
+		 *   socket created. Each of these timeouts counts as one
+		 *   CLSET_RETRIES as set above.
+		 * Set the initial retransmit timeout for UDP. This timeout
+		 * doesn't exist for TCP and the following call just fails,
+		 * which is ok.
+		 */
+		timo.tv_sec = nmp->nm_timeo / NFS_HZ;
+		timo.tv_usec = (nmp->nm_timeo % NFS_HZ) * 1000000 / NFS_HZ;
+		CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, &timo);
+	}
+
 	mtx_lock(&nrp->nr_mtx);
 	if (nrp->nr_client != NULL) {
 		/*
@@ -442,7 +473,7 @@ newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp,
 {
 	u_int32_t *tl;
 	time_t waituntil;
-	int i, j, set_uid = 0, set_sigset = 0;
+	int i, j, set_uid = 0, set_sigset = 0, timeo;
 	int trycnt, error = 0, usegssname = 0, secflavour = AUTH_SYS;
 	u_int16_t procnum;
 	u_int trylater_delay = 1;
@@ -628,6 +659,12 @@ newnfs_request(struct nfsrv_descript *nd, struct nfsmount *nmp,
 	}
 	trycnt = 0;
 tryagain:
+	/*
+	 * This timeout specifies when a new socket should be created,
+	 * along with new xid values. For UDP, this should be done
+	 * infrequently, since retransmits of RPC requests should normally
+	 * use the same xid.
+	 */
 	if (nmp == NULL) {
 		timo.tv_usec = 0;
 		if (clp == NULL)
@@ -642,8 +679,22 @@ tryagain:
 			else
 				timo.tv_sec = NFS_TCPTIMEO;
 		} else {
-			timo.tv_sec = nmp->nm_timeo / NFS_HZ;
-			timo.tv_usec = (nmp->nm_timeo * 1000000) / NFS_HZ;
+			if (NFSHASSOFT(nmp)) {
+				/*
+				 * CLSET_RETRIES is set to 2, so this should be
+				 * half of the total timeout required.
+				 */
+				timeo = nmp->nm_retry * nmp->nm_timeo / 2;
+				if (timeo < 1)
+					timeo = 1;
+				timo.tv_sec = timeo / NFS_HZ;
+				timo.tv_usec = (timeo % NFS_HZ) * 1000000 /
+				    NFS_HZ;
+			} else {
+				/* For UDP hard mounts, use a large value. */
+				timo.tv_sec = NFS_MAXTIMEO / NFS_HZ;
+				timo.tv_usec = 0;
+			}
 		}
 
 		if (rep != NULL) {
author	rmacklem <rmacklem@FreeBSD.org>	2011-12-21 02:45:51 +0000
committer	rmacklem <rmacklem@FreeBSD.org>	2011-12-21 02:45:51 +0000
commit	9a222cbb4c06161a806d8912cf934d60493ba235 (patch)
tree	7bd580195a28371e048832986507c7cc4fd2ba82 /sys/fs/nfs
parent	d4ce8a6024f35a5f56ef5e86222ace3e3340d15d (diff)
download	FreeBSD-src-9a222cbb4c06161a806d8912cf934d60493ba235.zip FreeBSD-src-9a222cbb4c06161a806d8912cf934d60493ba235.tar.gz