summaryrefslogtreecommitdiffstats
path: root/sys/nfsclient/nfs_bio.c
diff options
context:
space:
mode:
authordfr <dfr@FreeBSD.org>1996-11-06 10:53:16 +0000
committerdfr <dfr@FreeBSD.org>1996-11-06 10:53:16 +0000
commit4c8f7388e578751aab31ce0b8a982373a002aa7e (patch)
treeb99fa8ba7a7ef13dafb400913ede1cef59f59743 /sys/nfsclient/nfs_bio.c
parentd7d2cfcbf0e341b9b9672dcde6e1be024ed506b9 (diff)
downloadFreeBSD-src-4c8f7388e578751aab31ce0b8a982373a002aa7e.zip
FreeBSD-src-4c8f7388e578751aab31ce0b8a982373a002aa7e.tar.gz
Improve the queuing algorithms used by NFS' asynchronous i/o. The
existing mechanism uses a global queue for some buffers and the vp->b_dirtyblkhd queue for others. This turns sequential writes into randomly ordered writes to the server, affecting both read and write performance. The existing mechanism also copes badly with hung servers, tending to block accesses to other servers when all the iods are waiting for a hung server. The new mechanism uses a queue for each mount point. All asynchronous i/o goes through this queue which preserves the ordering of requests. A simple mechanism ensures that the iods are shared out fairly between active mount points. This removes the sysctl variable vfs.nfs.dwrite since the new queueing mechanism removes the old delayed write code completely. This should go into the 2.2 branch.
Diffstat (limited to 'sys/nfsclient/nfs_bio.c')
-rw-r--r--sys/nfsclient/nfs_bio.c120
1 files changed, 85 insertions, 35 deletions
diff --git a/sys/nfsclient/nfs_bio.c b/sys/nfsclient/nfs_bio.c
index c57db67..da03a9d 100644
--- a/sys/nfsclient/nfs_bio.c
+++ b/sys/nfsclient/nfs_bio.c
@@ -34,7 +34,7 @@
* SUCH DAMAGE.
*
* @(#)nfs_bio.c 8.5 (Berkeley) 1/4/94
- * $Id: nfs_bio.c,v 1.27 1996/10/12 17:39:39 bde Exp $
+ * $Id: nfs_bio.c,v 1.28 1996/10/21 10:07:48 dfr Exp $
*/
#include <sys/param.h>
@@ -62,13 +62,9 @@
static struct buf *nfs_getcacheblk __P((struct vnode *vp, daddr_t bn, int size,
struct proc *p));
-extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
extern int nfs_numasync;
extern struct nfsstats nfsstats;
-static int nfs_dwrite = 1;
-SYSCTL_INT(_vfs_nfs, OID_AUTO, dwrite, CTLFLAG_RW, &nfs_dwrite, 0, "");
-
/*
* Ifdefs for FreeBSD-current's merged VM/buffer cache. It is unfortunate
* that this isn't done inside getblk() and brelse() so these calls
@@ -727,12 +723,87 @@ nfs_asyncio(bp, cred)
register struct buf *bp;
struct ucred *cred;
{
- register int i;
+ struct nfsmount *nmp;
+ int i;
+ int gotiod;
+ int slpflag = 0;
+ int slptimeo = 0;
+ int error;
if (nfs_numasync == 0)
return (EIO);
+
+ nmp = VFSTONFS(bp->b_vp->v_mount);
+again:
+ if (nmp->nm_flag & NFSMNT_INT)
+ slpflag = PCATCH;
+ gotiod = FALSE;
+
+ /*
+ * Find a free iod to process this request.
+ */
for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
- if (nfs_iodwant[i]) {
+ if (nfs_iodwant[i]) {
+ /*
+ * Found one, so wake it up and tell it which
+ * mount to process.
+ */
+ NFS_DPF(ASYNCIO,
+ ("nfs_asyncio: waking iod %d for mount %p\n",
+ i, nmp));
+ nfs_iodwant[i] = (struct proc *)0;
+ nfs_iodmount[i] = nmp;
+ nmp->nm_bufqiods++;
+ wakeup((caddr_t)&nfs_iodwant[i]);
+ gotiod = TRUE;
+ }
+
+ /*
+ * If none are free, we may already have an iod working on this mount
+ * point. If so, it will process our request.
+ */
+ if (!gotiod) {
+ if (nmp->nm_bufqiods > 0) {
+ NFS_DPF(ASYNCIO,
+ ("nfs_asyncio: %d iods are already processing mount %p\n",
+ nmp->nm_bufqiods, nmp));
+ gotiod = TRUE;
+ }
+ }
+
+ /*
+ * If we have an iod which can process the request, then queue
+ * the buffer.
+ */
+ if (gotiod) {
+ /*
+ * Ensure that the queue never grows too large.
+ */
+ while (nmp->nm_bufqlen >= 2*nfs_numasync) {
+ NFS_DPF(ASYNCIO,
+ ("nfs_asyncio: waiting for mount %p queue to drain\n", nmp));
+ nmp->nm_bufqwant = TRUE;
+ error = tsleep(&nmp->nm_bufq, slpflag | PRIBIO,
+ "nfsaio", slptimeo);
+ if (error) {
+ if (nfs_sigintr(nmp, NULL, bp->b_proc))
+ return (EINTR);
+ if (slpflag == PCATCH) {
+ slpflag = 0;
+ slptimeo = 2 * hz;
+ }
+ }
+ /*
+ * We might have lost our iod while sleeping,
+ * so check and loop if nescessary.
+ */
+ if (nmp->nm_bufqiods == 0) {
+ NFS_DPF(ASYNCIO,
+ ("nfs_asyncio: no iods after mount %p queue was drained, looping\n", nmp));
+ goto again;
+ }
+ }
+
if (bp->b_flags & B_READ) {
if (bp->b_rcred == NOCRED && cred != NOCRED) {
crhold(cred);
@@ -746,38 +817,17 @@ nfs_asyncio(bp, cred)
}
}
- TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist);
- nfs_iodwant[i] = (struct proc *)0;
- wakeup((caddr_t)&nfs_iodwant[i]);
+ TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist);
+ nmp->nm_bufqlen++;
return (0);
- }
-
- /*
- * If it is a read or a write already marked B_WRITEINPROG or B_NOCACHE
- * return EIO so the process will call nfs_doio() and do it
- * synchronously.
- */
- if (bp->b_flags & (B_READ | B_WRITEINPROG | B_NOCACHE))
- return (EIO);
-
- /*
- * Allow the administrator to override the choice of using a delayed
- * write since it is a pessimization for some servers, notably some
- * Solaris servers.
- */
- if (!nfs_dwrite)
- return (EIO);
+ }
/*
- * Just turn the async write into a delayed write, instead of
- * doing in synchronously. Hopefully, at least one of the nfsiods
- * is currently doing a write for this file and will pick up the
- * delayed writes before going back to sleep.
+ * All the iods are busy on other mounts, so return EIO to
+ * force the caller to process the i/o synchronously.
*/
- bp->b_flags |= B_DELWRI;
- reassignbuf(bp, bp->b_vp);
- biodone(bp);
- return (0);
+ NFS_DPF(ASYNCIO, ("nfs_asyncio: no iods available, i/o is synchronous\n"));
+ return (EIO);
}
/*
OpenPOWER on IntegriCloud