Improve the queuing algorithms used by NFS' asynchronous i/o. The

existing mechanism uses a global queue for some buffers and the vp->b_dirtyblkhd queue for others. This turns sequential writes into randomly ordered writes to the server, affecting both read and write performance. The existing mechanism also copes badly with hung servers, tending to block accesses to other servers when all the iods are waiting for a hung server. The new mechanism uses a queue for each mount point. All asynchronous i/o goes through this queue which preserves the ordering of requests. A simple mechanism ensures that the iods are shared out fairly between active mount points. This removes the sysctl variable vfs.nfs.dwrite since the new queueing mechanism removes the old delayed write code completely. This should go into the 2.2 branch.
author: dfr <dfr@FreeBSD.org> 1996-11-06 10:53:16 +0000
committer: dfr <dfr@FreeBSD.org> 1996-11-06 10:53:16 +0000
commit: 4c8f7388e578751aab31ce0b8a982373a002aa7e (patch)
tree: b99fa8ba7a7ef13dafb400913ede1cef59f59743 /sys/nfsclient/nfs_bio.c
parent: d7d2cfcbf0e341b9b9672dcde6e1be024ed506b9 (diff)
download: FreeBSD-src-4c8f7388e578751aab31ce0b8a982373a002aa7e.zip
FreeBSD-src-4c8f7388e578751aab31ce0b8a982373a002aa7e.tar.gz
1 files changed, 85 insertions, 35 deletions
diff --git a/sys/nfsclient/nfs_bio.c b/sys/nfsclient/nfs_bio.c
index c57db67..da03a9d 100644
--- a/sys/nfsclient/nfs_bio.c
+++ b/sys/nfsclient/nfs_bio.c
@@ -34,7 +34,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)nfs_bio.c	8.5 (Berkeley) 1/4/94
- * $Id: nfs_bio.c,v 1.27 1996/10/12 17:39:39 bde Exp $
+ * $Id: nfs_bio.c,v 1.28 1996/10/21 10:07:48 dfr Exp $
  */
 
 #include <sys/param.h>
@@ -62,13 +62,9 @@
 static struct buf *nfs_getcacheblk __P((struct vnode *vp, daddr_t bn, int size,
 					struct proc *p));
 
-extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
 extern int nfs_numasync;
 extern struct nfsstats nfsstats;
 
-static int nfs_dwrite = 1;
-SYSCTL_INT(_vfs_nfs, OID_AUTO, dwrite, CTLFLAG_RW, &nfs_dwrite, 0, "");
-
 /*
  * Ifdefs for FreeBSD-current's merged VM/buffer cache. It is unfortunate
  * that this isn't done inside getblk() and brelse() so these calls
@@ -727,12 +723,87 @@ nfs_asyncio(bp, cred)
 	register struct buf *bp;
 	struct ucred *cred;
 {
-	register int i;
+	struct nfsmount *nmp;
+	int i;
+	int gotiod;
+	int slpflag = 0;
+	int slptimeo = 0;
+	int error;
 
 	if (nfs_numasync == 0)
 		return (EIO);
+	
+	nmp = VFSTONFS(bp->b_vp->v_mount);
+again:
+	if (nmp->nm_flag & NFSMNT_INT)
+		slpflag = PCATCH;
+	gotiod = FALSE;
+
+	/*
+	 * Find a free iod to process this request.
+	 */
 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
-	    if (nfs_iodwant[i]) {
+		if (nfs_iodwant[i]) {
+			/*
+			 * Found one, so wake it up and tell it which
+			 * mount to process.
+			 */
+			NFS_DPF(ASYNCIO,
+				("nfs_asyncio: waking iod %d for mount %p\n",
+				 i, nmp));
+			nfs_iodwant[i] = (struct proc *)0;
+			nfs_iodmount[i] = nmp;
+			nmp->nm_bufqiods++;
+			wakeup((caddr_t)&nfs_iodwant[i]);
+			gotiod = TRUE;
+		}
+
+	/*
+	 * If none are free, we may already have an iod working on this mount
+	 * point.  If so, it will process our request.
+	 */
+	if (!gotiod) {
+		if (nmp->nm_bufqiods > 0) {
+			NFS_DPF(ASYNCIO,
+				("nfs_asyncio: %d iods are already processing mount %p\n",
+				 nmp->nm_bufqiods, nmp));
+			gotiod = TRUE;
+		}
+	}
+
+	/*
+	 * If we have an iod which can process the request, then queue
+	 * the buffer.
+	 */
+	if (gotiod) {
+		/*
+		 * Ensure that the queue never grows too large.
+		 */
+		while (nmp->nm_bufqlen >= 2*nfs_numasync) {
+			NFS_DPF(ASYNCIO,
+				("nfs_asyncio: waiting for mount %p queue to drain\n", nmp));
+			nmp->nm_bufqwant = TRUE;
+			error = tsleep(&nmp->nm_bufq, slpflag | PRIBIO,
+				       "nfsaio", slptimeo);
+			if (error) {
+				if (nfs_sigintr(nmp, NULL, bp->b_proc))
+					return (EINTR);
+				if (slpflag == PCATCH) {
+					slpflag = 0;
+					slptimeo = 2 * hz;
+				}
+			}
+			/*
+			 * We might have lost our iod while sleeping,
+			 * so check and loop if nescessary.
+			 */
+			if (nmp->nm_bufqiods == 0) {
+				NFS_DPF(ASYNCIO,
+					("nfs_asyncio: no iods after mount %p queue was drained, looping\n", nmp));
+				goto again;
+			}
+		}
+
 		if (bp->b_flags & B_READ) {
 			if (bp->b_rcred == NOCRED && cred != NOCRED) {
 				crhold(cred);
@@ -746,38 +817,17 @@ nfs_asyncio(bp, cred)
 			}
 		}
 
-		TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist);
-		nfs_iodwant[i] = (struct proc *)0;
-		wakeup((caddr_t)&nfs_iodwant[i]);
+		TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist);
+		nmp->nm_bufqlen++;
 		return (0);
-	    }
-
-	/*
-	 * If it is a read or a write already marked B_WRITEINPROG or B_NOCACHE
-	 * return EIO so the process will call nfs_doio() and do it
-	 * synchronously.
-	 */
-	if (bp->b_flags & (B_READ | B_WRITEINPROG | B_NOCACHE))
-		return (EIO);
-
-	/*
-	 * Allow the administrator to override the choice of using a delayed
-	 * write since it is a pessimization for some servers, notably some
-	 * Solaris servers.
-	 */
-	if (!nfs_dwrite)
-		return (EIO);
+	}
 
 	/*
-	 * Just turn the async write into a delayed write, instead of
-	 * doing in synchronously. Hopefully, at least one of the nfsiods
-	 * is currently doing a write for this file and will pick up the
-	 * delayed writes before going back to sleep.
+	 * All the iods are busy on other mounts, so return EIO to
+	 * force the caller to process the i/o synchronously.
 	 */
-	bp->b_flags |= B_DELWRI;
-	reassignbuf(bp, bp->b_vp);
-	biodone(bp);
-	return (0);
+	NFS_DPF(ASYNCIO, ("nfs_asyncio: no iods available, i/o is synchronous\n"));
+	return (EIO);
 }
 
 /*
author	dfr <dfr@FreeBSD.org>	1996-11-06 10:53:16 +0000
committer	dfr <dfr@FreeBSD.org>	1996-11-06 10:53:16 +0000
commit	4c8f7388e578751aab31ce0b8a982373a002aa7e (patch)
tree	b99fa8ba7a7ef13dafb400913ede1cef59f59743 /sys/nfsclient/nfs_bio.c
parent	d7d2cfcbf0e341b9b9672dcde6e1be024ed506b9 (diff)
download	FreeBSD-src-4c8f7388e578751aab31ce0b8a982373a002aa7e.zip FreeBSD-src-4c8f7388e578751aab31ce0b8a982373a002aa7e.tar.gz