summaryrefslogtreecommitdiffstats
path: root/sys/nfs/nfs_socket.c
diff options
context:
space:
mode:
authordfr <dfr@FreeBSD.org>1996-10-11 10:15:33 +0000
committerdfr <dfr@FreeBSD.org>1996-10-11 10:15:33 +0000
commitde60fb9205631ede6a176edd5d010140cfa266d3 (patch)
tree7d089c2d16ff47871a314141686d876a406a3741 /sys/nfs/nfs_socket.c
parent2d340b514c77a95e6ab924510db5f79b7d3f448a (diff)
downloadFreeBSD-src-de60fb9205631ede6a176edd5d010140cfa266d3.zip
FreeBSD-src-de60fb9205631ede6a176edd5d010140cfa266d3.tar.gz
This fixes a problem with the nfs socket handling code which happens
if a single process is performing a large number of requests (in this case writing a large file). The writing process could monopolise the recieve lock and prevent any other processes from recieving their replies. It also adds a new sysctl variable 'vfs.nfs.dwrite' which controls the behaviour which originally pointed out the problem. When a process writes to a file over NFS, it usually arranges for another process (the 'iod') to perform the request. If no iods are available, then it turns the write into a 'delayed write' which is later picked up by the next iod to do a write request for that file. This can cause that particular iod to do a disproportionate number of requests from a single process which can harm performance on some NFS servers. The alternative is to perform the write synchronously in the context of the original writing process if no iod is avaiable for asynchronous writing. The 'delayed write' behaviour is selected when vfs.nfs.dwrite=1 and the non-delayed behaviour is selected when vfs.nfs.dwrite=0. The default is vfs.nfs.dwrite=1; if many people tell me that performance is better if vfs.nfs.dwrite=0 then I will change the default. Submitted by: Hidetoshi Shimokawa <simokawa@sat.t.u-tokyo.ac.jp>
Diffstat (limited to 'sys/nfs/nfs_socket.c')
-rw-r--r--sys/nfs/nfs_socket.c22
1 files changed, 16 insertions, 6 deletions
diff --git a/sys/nfs/nfs_socket.c b/sys/nfs/nfs_socket.c
index fb29864..6cafa8a 100644
--- a/sys/nfs/nfs_socket.c
+++ b/sys/nfs/nfs_socket.c
@@ -34,7 +34,7 @@
* SUCH DAMAGE.
*
* @(#)nfs_socket.c 8.3 (Berkeley) 1/12/94
- * $Id: nfs_socket.c,v 1.16 1996/06/14 11:13:18 phk Exp $
+ * $Id: nfs_socket.c,v 1.17 1996/07/11 16:32:45 wollman Exp $
*/
/*
@@ -681,15 +681,17 @@ nfs_reply(myrep)
* sbwait() after someone else has received my reply for me.
* Also necessary for connection based protocols to avoid
* race conditions during a reconnect.
+ * If nfs_rcvlock() returns EALREADY, that means that
+ * the reply has already been recieved by another
+ * process and we can return immediately. In this
+ * case, the lock is not taken to avoid races with
+ * other processes.
*/
error = nfs_rcvlock(myrep);
+ if (error == EALREADY)
+ return (0);
if (error)
return (error);
- /* Already received, bye bye */
- if (myrep->r_mrep != NULL) {
- nfs_rcvunlock(&nmp->nm_flag);
- return (0);
- }
/*
* Get the next Rpc reply off the socket
*/
@@ -1494,6 +1496,14 @@ nfs_rcvlock(rep)
*flagp |= NFSMNT_WANTRCV;
(void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsrcvlk",
slptimeo);
+ /*
+ * If our reply was recieved while we were sleeping,
+ * then just return without taking the lock to avoid a
+ * situation where a single iod could 'capture' the
+ * recieve lock.
+ */
+ if (rep->r_mrep != NULL)
+ return (EALREADY);
if (slpflag == PCATCH) {
slpflag = 0;
slptimeo = 2 * hz;
OpenPOWER on IntegriCloud