diff options
author | dfr <dfr@FreeBSD.org> | 1996-11-06 10:53:16 +0000 |
---|---|---|
committer | dfr <dfr@FreeBSD.org> | 1996-11-06 10:53:16 +0000 |
commit | 4c8f7388e578751aab31ce0b8a982373a002aa7e (patch) | |
tree | b99fa8ba7a7ef13dafb400913ede1cef59f59743 /sys/nfsserver | |
parent | d7d2cfcbf0e341b9b9672dcde6e1be024ed506b9 (diff) | |
download | FreeBSD-src-4c8f7388e578751aab31ce0b8a982373a002aa7e.zip FreeBSD-src-4c8f7388e578751aab31ce0b8a982373a002aa7e.tar.gz |
Improve the queuing algorithms used by NFS' asynchronous i/o. The
existing mechanism uses a global queue for some buffers and the
vp->b_dirtyblkhd queue for others. This turns sequential writes into
randomly ordered writes to the server, affecting both read and write
performance. The existing mechanism also copes badly with hung
servers, tending to block accesses to other servers when all the iods
are waiting for a hung server.
The new mechanism uses a queue for each mount point. All asynchronous
i/o goes through this queue which preserves the ordering of requests.
A simple mechanism ensures that the iods are shared out fairly between
active mount points. This removes the sysctl variable vfs.nfs.dwrite
since the new queueing mechanism removes the old delayed write code
completely.
This should go into the 2.2 branch.
Diffstat (limited to 'sys/nfsserver')
-rw-r--r-- | sys/nfsserver/nfs.h | 4 | ||||
-rw-r--r-- | sys/nfsserver/nfs_srvsubs.c | 8 | ||||
-rw-r--r-- | sys/nfsserver/nfs_syscalls.c | 80 | ||||
-rw-r--r-- | sys/nfsserver/nfsrvstats.h | 4 |
4 files changed, 48 insertions, 48 deletions
diff --git a/sys/nfsserver/nfs.h b/sys/nfsserver/nfs.h index 3d9d801..bc2edeb 100644 --- a/sys/nfsserver/nfs.h +++ b/sys/nfsserver/nfs.h @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * @(#)nfs.h 8.1 (Berkeley) 6/10/93 - * $Id: nfs.h,v 1.17 1996/01/30 22:59:39 mpp Exp $ + * $Id: nfs.h,v 1.18 1996/08/21 21:55:44 dyson Exp $ */ #ifndef _NFS_NFS_H_ @@ -335,7 +335,7 @@ extern TAILQ_HEAD(nfs_reqq, nfsreq) nfs_reqq; #define NWDELAYHASH(sock, f) \ (&(sock)->ns_wdelayhashtbl[(*((u_long *)(f))) % NFS_WDELAYHASHSIZ]) #ifndef NFS_MUIDHASHSIZ -#define NFS_MUIDHASHSIZ 67 /* Tune the size of nfsmount with this */ +#define NFS_MUIDHASHSIZ 63 /* Tune the size of nfsmount with this */ #endif #define NMUIDHASH(nmp, uid) \ (&(nmp)->nm_uidhashtbl[(uid) % NFS_MUIDHASHSIZ]) diff --git a/sys/nfsserver/nfs_srvsubs.c b/sys/nfsserver/nfs_srvsubs.c index 7d97f09..2566fe6 100644 --- a/sys/nfsserver/nfs_srvsubs.c +++ b/sys/nfsserver/nfs_srvsubs.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * @(#)nfs_subs.c 8.3 (Berkeley) 1/4/94 - * $Id: nfs_subs.c,v 1.32 1996/08/21 21:55:51 dyson Exp $ + * $Id: nfs_subs.c,v 1.33 1996/09/19 18:20:59 nate Exp $ */ /* @@ -538,7 +538,6 @@ static short *nfsrv_v3errmap[] = { #endif /* NFS_NOSERVER */ -extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; extern struct nfsrtt nfsrtt; extern time_t nqnfsstarttime; extern int nqsrv_clockskew; @@ -1136,9 +1135,10 @@ nfs_init() if (nfs_ticks < 1) nfs_ticks = 1; /* Ensure async daemons disabled */ - for (i = 0; i < NFS_MAXASYNCDAEMON; i++) + for (i = 0; i < NFS_MAXASYNCDAEMON; i++) { nfs_iodwant[i] = (struct proc *)0; - TAILQ_INIT(&nfs_bufq); + nfs_iodmount[i] = (struct nfsmount *)0; + } nfs_nhinit(); /* Init the nfsnode table */ #ifndef NFS_NOSERVER nfsrv_init(0); /* Init server data structures */ diff --git a/sys/nfsserver/nfs_syscalls.c b/sys/nfsserver/nfs_syscalls.c index d94b7e6..e2f3435 100644 --- a/sys/nfsserver/nfs_syscalls.c +++ b/sys/nfsserver/nfs_syscalls.c @@ -34,13 +34,14 @@ * SUCH DAMAGE. * * @(#)nfs_syscalls.c 8.3 (Berkeley) 1/4/94 - * $Id: nfs_syscalls.c,v 1.13 1996/01/13 23:27:58 phk Exp $ + * $Id: nfs_syscalls.c,v 1.14 1996/04/30 23:26:52 bde Exp $ */ #include <sys/param.h> #include <sys/systm.h> #include <sys/sysproto.h> #include <sys/kernel.h> +#include <sys/sysctl.h> #include <sys/file.h> #include <sys/filedesc.h> #include <sys/stat.h> @@ -79,7 +80,6 @@ extern int (*nfsrv3_procs[NFS_NPROCS]) __P((struct nfsrv_descript *nd, struct nfssvc_sock *slp, struct proc *procp, struct mbuf **mreqp)); -extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; extern int nfs_numasync; extern time_t nqnfsstarttime; extern int nqsrv_writeslack; @@ -715,6 +715,9 @@ done: } #endif /* NFS_NOSERVER */ +int nfs_defect = 0; +SYSCTL_INT(_vfs_nfs, OID_AUTO, defect, CTLFLAG_RW, &nfs_defect, 0, ""); + /* * Asynchronous I/O daemons for client nfs. * They do read-ahead and write-behind operations on the block I/O cache. @@ -727,6 +730,7 @@ nfssvc_iod(p) register struct buf *bp, *nbp; register int i, myiod; struct vnode *vp; + struct nfsmount *nmp; int error = 0, s; /* @@ -746,53 +750,49 @@ nfssvc_iod(p) * Just loop around doin our stuff until SIGKILL */ for (;;) { - while (nfs_bufq.tqh_first == NULL && error == 0) { + while (((nmp = nfs_iodmount[myiod]) == NULL + || nmp->nm_bufq.tqh_first == NULL) + && error == 0) { + if (nmp) + nmp->nm_bufqiods--; nfs_iodwant[myiod] = p; + nfs_iodmount[myiod] = NULL; error = tsleep((caddr_t)&nfs_iodwant[myiod], PWAIT | PCATCH, "nfsidl", 0); } - while ((bp = nfs_bufq.tqh_first) != NULL) { - /* Take one off the front of the list */ - TAILQ_REMOVE(&nfs_bufq, bp, b_freelist); - if (bp->b_flags & B_READ) - (void) nfs_doio(bp, bp->b_rcred, (struct proc *)0); - else do { - /* - * Look for a delayed write for the same vnode, so I can do - * it now. We must grab it before calling nfs_doio() to - * avoid any risk of the vnode getting vclean()'d while - * we are doing the write rpc. - */ - vp = bp->b_vp; - s = splbio(); - for (nbp = vp->v_dirtyblkhd.lh_first; nbp; - nbp = nbp->b_vnbufs.le_next) { - if ((nbp->b_flags & - (B_BUSY|B_DELWRI|B_NEEDCOMMIT|B_NOCACHE))!=B_DELWRI) - continue; - bremfree(nbp); - vfs_busy_pages(nbp, 1); - nbp->b_flags |= (B_BUSY|B_ASYNC); - break; - } - splx(s); - /* - * For the delayed write, do the first part of nfs_bwrite() - * up to, but not including nfs_strategy(). - */ - if (nbp) { - nbp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI); - reassignbuf(nbp, nbp->b_vp); - nbp->b_vp->v_numoutput++; - } - (void) nfs_doio(bp, bp->b_wcred, (struct proc *)0); - } while (bp = nbp); - } if (error) { nfs_asyncdaemon[myiod] = 0; + if (nmp) nmp->nm_bufqiods--; + nfs_iodmount[myiod] = NULL; nfs_numasync--; return (error); } + while ((bp = nmp->nm_bufq.tqh_first) != NULL) { + /* Take one off the front of the list */ + TAILQ_REMOVE(&nmp->nm_bufq, bp, b_freelist); + nmp->nm_bufqlen--; + if (nmp->nm_bufqwant && nmp->nm_bufqlen < 2 * nfs_numasync) { + nmp->nm_bufqwant = FALSE; + wakeup(&nmp->nm_bufq); + } + if (bp->b_flags & B_READ) + (void) nfs_doio(bp, bp->b_rcred, (struct proc *)0); + else + (void) nfs_doio(bp, bp->b_wcred, (struct proc *)0); + + /* + * If there are more than one iod on this mount, then defect + * so that the iods can be shared out fairly between the mounts + */ + if (nfs_defect && nmp->nm_bufqiods > 1) { + NFS_DPF(ASYNCIO, + ("nfssvc_iod: iod %d defecting from mount %p\n", + myiod, nmp)); + nfs_iodmount[myiod] = NULL; + nmp->nm_bufqiods--; + break; + } + } } } diff --git a/sys/nfsserver/nfsrvstats.h b/sys/nfsserver/nfsrvstats.h index 3d9d801..bc2edeb 100644 --- a/sys/nfsserver/nfsrvstats.h +++ b/sys/nfsserver/nfsrvstats.h @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * @(#)nfs.h 8.1 (Berkeley) 6/10/93 - * $Id: nfs.h,v 1.17 1996/01/30 22:59:39 mpp Exp $ + * $Id: nfs.h,v 1.18 1996/08/21 21:55:44 dyson Exp $ */ #ifndef _NFS_NFS_H_ @@ -335,7 +335,7 @@ extern TAILQ_HEAD(nfs_reqq, nfsreq) nfs_reqq; #define NWDELAYHASH(sock, f) \ (&(sock)->ns_wdelayhashtbl[(*((u_long *)(f))) % NFS_WDELAYHASHSIZ]) #ifndef NFS_MUIDHASHSIZ -#define NFS_MUIDHASHSIZ 67 /* Tune the size of nfsmount with this */ +#define NFS_MUIDHASHSIZ 63 /* Tune the size of nfsmount with this */ #endif #define NMUIDHASH(nmp, uid) \ (&(nmp)->nm_uidhashtbl[(uid) % NFS_MUIDHASHSIZ]) |