summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordillon <dillon@FreeBSD.org>2000-01-05 05:11:37 +0000
committerdillon <dillon@FreeBSD.org>2000-01-05 05:11:37 +0000
commitc6689c797daca2e96b0ccaf727e86ebf09e00e37 (patch)
tree5cdfd2f2b906cb0407fd5a329f091623735a9701
parente112622878175672c0eaa5a87ccdf0e882ccc784 (diff)
downloadFreeBSD-src-c6689c797daca2e96b0ccaf727e86ebf09e00e37.zip
FreeBSD-src-c6689c797daca2e96b0ccaf727e86ebf09e00e37.tar.gz
Enhance reassignbuf(). When a buffer cannot be time-optimally inserted
into vnode dirtyblkhd we append it to the list instead of prepend it to the list in order to maintain a 'forward' locality of reference, which is arguably better then 'reverse'. The original algorithm did things this way to but at a huge time cost. Enhance the append interlock for NFS writes to handle intr/soft mounts better. Fix the hysteresis for NFS async daemon I/O requests to reduce the number of unnecessary context switches. Modify handling of NFS mount options. Any given user option that is too high now defaults to the kernel maximum for that option rather then the kernel default for that option. Reviewed by: Alfred Perlstein <bright@wintelcom.net>
-rw-r--r--sys/kern/vfs_export.c21
-rw-r--r--sys/kern/vfs_subr.c21
-rw-r--r--sys/nfs/nfs_bio.c48
-rw-r--r--sys/nfs/nfs_node.c14
-rw-r--r--sys/nfs/nfs_syscalls.c2
-rw-r--r--sys/nfs/nfs_vfsops.c36
-rw-r--r--sys/nfsclient/nfs_bio.c48
-rw-r--r--sys/nfsclient/nfs_nfsiod.c2
-rw-r--r--sys/nfsclient/nfs_node.c14
-rw-r--r--sys/nfsclient/nfs_vfsops.c36
-rw-r--r--sys/nfsserver/nfs_syscalls.c2
11 files changed, 201 insertions, 43 deletions
diff --git a/sys/kern/vfs_export.c b/sys/kern/vfs_export.c
index 76bd584..059ca2a 100644
--- a/sys/kern/vfs_export.c
+++ b/sys/kern/vfs_export.c
@@ -1208,6 +1208,7 @@ reassignbuf(bp, newvp)
tbp = TAILQ_FIRST(listheadp);
if (tbp == NULL ||
bp->b_lblkno == 0 ||
+ (bp->b_lblkno > 0 && tbp->b_lblkno < 0) ||
(bp->b_lblkno > 0 && bp->b_lblkno < tbp->b_lblkno)) {
TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs);
++reassignbufsortgood;
@@ -1217,14 +1218,30 @@ reassignbuf(bp, newvp)
} else if (reassignbufmethod == 1) {
/*
* New sorting algorithm, only handle sequential case,
- * otherwise guess.
+ * otherwise append to end (but before metadata)
*/
if ((tbp = gbincore(newvp, bp->b_lblkno - 1)) != NULL &&
(tbp->b_xflags & BX_VNDIRTY)) {
+ /*
+ * Found the best place to insert the buffer
+ */
TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
++reassignbufsortgood;
} else {
- TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs);
+ /*
+ * Missed, append to end, but before meta-data.
+ * We know that the head buffer in the list is
+ * not meta-data due to prior conditionals.
+ *
+ * Indirect effects: NFS second stage write
+ * tends to wind up here, giving maximum
+ * distance between the unstable write and the
+ * commit rpc.
+ */
+ tbp = TAILQ_LAST(listheadp, buflists);
+ while (tbp && tbp->b_lblkno < 0)
+ tbp = TAILQ_PREV(tbp, buflists, b_vnbufs);
+ TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
++reassignbufsortbad;
}
} else {
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 76bd584..059ca2a 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -1208,6 +1208,7 @@ reassignbuf(bp, newvp)
tbp = TAILQ_FIRST(listheadp);
if (tbp == NULL ||
bp->b_lblkno == 0 ||
+ (bp->b_lblkno > 0 && tbp->b_lblkno < 0) ||
(bp->b_lblkno > 0 && bp->b_lblkno < tbp->b_lblkno)) {
TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs);
++reassignbufsortgood;
@@ -1217,14 +1218,30 @@ reassignbuf(bp, newvp)
} else if (reassignbufmethod == 1) {
/*
* New sorting algorithm, only handle sequential case,
- * otherwise guess.
+ * otherwise append to end (but before metadata)
*/
if ((tbp = gbincore(newvp, bp->b_lblkno - 1)) != NULL &&
(tbp->b_xflags & BX_VNDIRTY)) {
+ /*
+ * Found the best place to insert the buffer
+ */
TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
++reassignbufsortgood;
} else {
- TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs);
+ /*
+ * Missed, append to end, but before meta-data.
+ * We know that the head buffer in the list is
+ * not meta-data due to prior conditionals.
+ *
+ * Indirect effects: NFS second stage write
+ * tends to wind up here, giving maximum
+ * distance between the unstable write and the
+ * commit rpc.
+ */
+ tbp = TAILQ_LAST(listheadp, buflists);
+ while (tbp && tbp->b_lblkno < 0)
+ tbp = TAILQ_PREV(tbp, buflists, b_vnbufs);
+ TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
++reassignbufsortbad;
}
} else {
diff --git a/sys/nfs/nfs_bio.c b/sys/nfs/nfs_bio.c
index 8e99d98..4b9dcec 100644
--- a/sys/nfs/nfs_bio.c
+++ b/sys/nfs/nfs_bio.c
@@ -471,9 +471,11 @@ nfs_bioread(vp, uio, ioflag, cred)
rabp->b_flags |= B_INVAL|B_ERROR;
vfs_unbusy_pages(rabp);
brelse(rabp);
+ break;
}
- } else
+ } else {
brelse(rabp);
+ }
}
}
}
@@ -497,8 +499,19 @@ again:
} else if ((off_t)(lbn + 1) * biosize > np->n_size) {
bcount = np->n_size - (off_t)lbn * biosize;
}
- if (bcount != biosize && nfs_rslock(np, p) == ENOLCK)
- goto again;
+ if (bcount != biosize) {
+ switch(nfs_rslock(np, p)) {
+ case ENOLCK:
+ goto again;
+ /* not reached */
+ case EINTR:
+ case ERESTART:
+ return(EINTR);
+ /* not reached */
+ default:
+ break;
+ }
+ }
bp = nfs_getcacheblk(vp, lbn, bcount, p);
@@ -785,8 +798,17 @@ restart:
*/
if ((ioflag & IO_APPEND) ||
uio->uio_offset + uio->uio_resid > np->n_size) {
- if (nfs_rslock(np, p) == ENOLCK)
+ switch(nfs_rslock(np, p)) {
+ case ENOLCK:
goto restart;
+ /* not reached */
+ case EINTR:
+ case ERESTART:
+ return(EINTR);
+ /* not reached */
+ default:
+ break;
+ }
haverslock = 1;
}
@@ -1196,10 +1218,25 @@ nfs_asyncio(bp, cred, procp)
int slptimeo = 0;
int error;
+ /*
+ * If no async daemons then return EIO to force caller to run the rpc
+ * synchronously.
+ */
if (nfs_numasync == 0)
return (EIO);
nmp = VFSTONFS(bp->b_vp->v_mount);
+
+ /*
+ * Commits are usually short and sweet so lets save some cpu and
+ * leave the async daemons for more important rpc's (such as reads
+ * and writes).
+ */
+ if ((bp->b_flags & (B_READ|B_NEEDCOMMIT)) == B_NEEDCOMMIT &&
+ (nmp->nm_bufqiods > nfs_numasync / 2)) {
+ return(EIO);
+ }
+
again:
if (nmp->nm_flag & NFSMNT_INT)
slpflag = PCATCH;
@@ -1244,7 +1281,8 @@ again:
*/
if (gotiod) {
/*
- * Ensure that the queue never grows too large.
+ * Ensure that the queue never grows too large. We still want
+ * to asynchronize so we block rather then return EIO.
*/
while (nmp->nm_bufqlen >= 2*nfs_numasync) {
NFS_DPF(ASYNCIO,
diff --git a/sys/nfs/nfs_node.c b/sys/nfs/nfs_node.c
index 89cbdce..1de8739 100644
--- a/sys/nfs/nfs_node.c
+++ b/sys/nfs/nfs_node.c
@@ -112,6 +112,18 @@ nfs_nget(mntp, fhp, fhsize, npp)
register struct vnode *vp;
struct vnode *nvp;
int error;
+ int rsflags;
+ struct nfsmount *nmp;
+
+ /*
+ * Calculate nfs mount point and figure out whether the rslock should
+ * be interruptable or not.
+ */
+ nmp = VFSTONFS(mntp);
+ if (nmp->nm_flag & NFSMNT_INT)
+ rsflags = PCATCH;
+ else
+ rsflags = 0;
retry:
nhpp = NFSNOHASH(nfs_hash(fhp, fhsize));
@@ -180,7 +192,7 @@ loop:
np->n_fhp = &np->n_fh;
bcopy((caddr_t)fhp, (caddr_t)np->n_fhp, fhsize);
np->n_fhsize = fhsize;
- lockinit(&np->n_rslock, PVFS, "nfrslk", 0, LK_NOPAUSE);
+ lockinit(&np->n_rslock, PVFS | rsflags, "nfrslk", 0, LK_NOPAUSE);
*npp = np;
if (nfs_node_hash_lock < 0)
diff --git a/sys/nfs/nfs_syscalls.c b/sys/nfs/nfs_syscalls.c
index da18842..15c6231 100644
--- a/sys/nfs/nfs_syscalls.c
+++ b/sys/nfs/nfs_syscalls.c
@@ -969,7 +969,7 @@ nfssvc_iod(p)
/* Take one off the front of the list */
TAILQ_REMOVE(&nmp->nm_bufq, bp, b_freelist);
nmp->nm_bufqlen--;
- if (nmp->nm_bufqwant && nmp->nm_bufqlen < 2 * nfs_numasync) {
+ if (nmp->nm_bufqwant && nmp->nm_bufqlen <= nfs_numasync) {
nmp->nm_bufqwant = FALSE;
wakeup(&nmp->nm_bufq);
}
diff --git a/sys/nfs/nfs_vfsops.c b/sys/nfs/nfs_vfsops.c
index 9ddb428..c2d365b 100644
--- a/sys/nfs/nfs_vfsops.c
+++ b/sys/nfs/nfs_vfsops.c
@@ -698,18 +698,30 @@ nfs_decode_args(nmp, argp)
if (nmp->nm_acregmin > nmp->nm_acregmax)
nmp->nm_acregmin = nmp->nm_acregmax;
- if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0 &&
- argp->maxgrouplist <= NFS_MAXGRPS)
- nmp->nm_numgrps = argp->maxgrouplist;
- if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0 &&
- argp->readahead <= NFS_MAXRAHEAD)
- nmp->nm_readahead = argp->readahead;
- if ((argp->flags & NFSMNT_LEASETERM) && argp->leaseterm >= 2 &&
- argp->leaseterm <= NQ_MAXLEASE)
- nmp->nm_leaseterm = argp->leaseterm;
- if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 1 &&
- argp->deadthresh <= NQ_NEVERDEAD)
- nmp->nm_deadthresh = argp->deadthresh;
+ if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0) {
+ if (argp->maxgrouplist <= NFS_MAXGRPS)
+ nmp->nm_numgrps = argp->maxgrouplist;
+ else
+ nmp->nm_numgrps = NFS_MAXGRPS;
+ }
+ if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
+ if (argp->readahead <= NFS_MAXRAHEAD)
+ nmp->nm_readahead = argp->readahead;
+ else
+ nmp->nm_readahead = NFS_MAXRAHEAD;
+ }
+ if ((argp->flags & NFSMNT_LEASETERM) && argp->leaseterm >= 2) {
+ if (argp->leaseterm <= NQ_MAXLEASE)
+ nmp->nm_leaseterm = argp->leaseterm;
+ else
+ nmp->nm_leaseterm = NQ_MAXLEASE;
+ }
+ if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 1) {
+ if (argp->deadthresh <= NQ_NEVERDEAD)
+ nmp->nm_deadthresh = argp->deadthresh;
+ else
+ nmp->nm_deadthresh = NQ_NEVERDEAD;
+ }
adjsock |= ((nmp->nm_sotype != argp->sotype) ||
(nmp->nm_soproto != argp->proto));
diff --git a/sys/nfsclient/nfs_bio.c b/sys/nfsclient/nfs_bio.c
index 8e99d98..4b9dcec 100644
--- a/sys/nfsclient/nfs_bio.c
+++ b/sys/nfsclient/nfs_bio.c
@@ -471,9 +471,11 @@ nfs_bioread(vp, uio, ioflag, cred)
rabp->b_flags |= B_INVAL|B_ERROR;
vfs_unbusy_pages(rabp);
brelse(rabp);
+ break;
}
- } else
+ } else {
brelse(rabp);
+ }
}
}
}
@@ -497,8 +499,19 @@ again:
} else if ((off_t)(lbn + 1) * biosize > np->n_size) {
bcount = np->n_size - (off_t)lbn * biosize;
}
- if (bcount != biosize && nfs_rslock(np, p) == ENOLCK)
- goto again;
+ if (bcount != biosize) {
+ switch(nfs_rslock(np, p)) {
+ case ENOLCK:
+ goto again;
+ /* not reached */
+ case EINTR:
+ case ERESTART:
+ return(EINTR);
+ /* not reached */
+ default:
+ break;
+ }
+ }
bp = nfs_getcacheblk(vp, lbn, bcount, p);
@@ -785,8 +798,17 @@ restart:
*/
if ((ioflag & IO_APPEND) ||
uio->uio_offset + uio->uio_resid > np->n_size) {
- if (nfs_rslock(np, p) == ENOLCK)
+ switch(nfs_rslock(np, p)) {
+ case ENOLCK:
goto restart;
+ /* not reached */
+ case EINTR:
+ case ERESTART:
+ return(EINTR);
+ /* not reached */
+ default:
+ break;
+ }
haverslock = 1;
}
@@ -1196,10 +1218,25 @@ nfs_asyncio(bp, cred, procp)
int slptimeo = 0;
int error;
+ /*
+ * If no async daemons then return EIO to force caller to run the rpc
+ * synchronously.
+ */
if (nfs_numasync == 0)
return (EIO);
nmp = VFSTONFS(bp->b_vp->v_mount);
+
+ /*
+ * Commits are usually short and sweet so lets save some cpu and
+ * leave the async daemons for more important rpc's (such as reads
+ * and writes).
+ */
+ if ((bp->b_flags & (B_READ|B_NEEDCOMMIT)) == B_NEEDCOMMIT &&
+ (nmp->nm_bufqiods > nfs_numasync / 2)) {
+ return(EIO);
+ }
+
again:
if (nmp->nm_flag & NFSMNT_INT)
slpflag = PCATCH;
@@ -1244,7 +1281,8 @@ again:
*/
if (gotiod) {
/*
- * Ensure that the queue never grows too large.
+ * Ensure that the queue never grows too large. We still want
+ * to asynchronize so we block rather then return EIO.
*/
while (nmp->nm_bufqlen >= 2*nfs_numasync) {
NFS_DPF(ASYNCIO,
diff --git a/sys/nfsclient/nfs_nfsiod.c b/sys/nfsclient/nfs_nfsiod.c
index da18842..15c6231 100644
--- a/sys/nfsclient/nfs_nfsiod.c
+++ b/sys/nfsclient/nfs_nfsiod.c
@@ -969,7 +969,7 @@ nfssvc_iod(p)
/* Take one off the front of the list */
TAILQ_REMOVE(&nmp->nm_bufq, bp, b_freelist);
nmp->nm_bufqlen--;
- if (nmp->nm_bufqwant && nmp->nm_bufqlen < 2 * nfs_numasync) {
+ if (nmp->nm_bufqwant && nmp->nm_bufqlen <= nfs_numasync) {
nmp->nm_bufqwant = FALSE;
wakeup(&nmp->nm_bufq);
}
diff --git a/sys/nfsclient/nfs_node.c b/sys/nfsclient/nfs_node.c
index 89cbdce..1de8739 100644
--- a/sys/nfsclient/nfs_node.c
+++ b/sys/nfsclient/nfs_node.c
@@ -112,6 +112,18 @@ nfs_nget(mntp, fhp, fhsize, npp)
register struct vnode *vp;
struct vnode *nvp;
int error;
+ int rsflags;
+ struct nfsmount *nmp;
+
+ /*
+ * Calculate nfs mount point and figure out whether the rslock should
+ * be interruptable or not.
+ */
+ nmp = VFSTONFS(mntp);
+ if (nmp->nm_flag & NFSMNT_INT)
+ rsflags = PCATCH;
+ else
+ rsflags = 0;
retry:
nhpp = NFSNOHASH(nfs_hash(fhp, fhsize));
@@ -180,7 +192,7 @@ loop:
np->n_fhp = &np->n_fh;
bcopy((caddr_t)fhp, (caddr_t)np->n_fhp, fhsize);
np->n_fhsize = fhsize;
- lockinit(&np->n_rslock, PVFS, "nfrslk", 0, LK_NOPAUSE);
+ lockinit(&np->n_rslock, PVFS | rsflags, "nfrslk", 0, LK_NOPAUSE);
*npp = np;
if (nfs_node_hash_lock < 0)
diff --git a/sys/nfsclient/nfs_vfsops.c b/sys/nfsclient/nfs_vfsops.c
index 9ddb428..c2d365b 100644
--- a/sys/nfsclient/nfs_vfsops.c
+++ b/sys/nfsclient/nfs_vfsops.c
@@ -698,18 +698,30 @@ nfs_decode_args(nmp, argp)
if (nmp->nm_acregmin > nmp->nm_acregmax)
nmp->nm_acregmin = nmp->nm_acregmax;
- if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0 &&
- argp->maxgrouplist <= NFS_MAXGRPS)
- nmp->nm_numgrps = argp->maxgrouplist;
- if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0 &&
- argp->readahead <= NFS_MAXRAHEAD)
- nmp->nm_readahead = argp->readahead;
- if ((argp->flags & NFSMNT_LEASETERM) && argp->leaseterm >= 2 &&
- argp->leaseterm <= NQ_MAXLEASE)
- nmp->nm_leaseterm = argp->leaseterm;
- if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 1 &&
- argp->deadthresh <= NQ_NEVERDEAD)
- nmp->nm_deadthresh = argp->deadthresh;
+ if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0) {
+ if (argp->maxgrouplist <= NFS_MAXGRPS)
+ nmp->nm_numgrps = argp->maxgrouplist;
+ else
+ nmp->nm_numgrps = NFS_MAXGRPS;
+ }
+ if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) {
+ if (argp->readahead <= NFS_MAXRAHEAD)
+ nmp->nm_readahead = argp->readahead;
+ else
+ nmp->nm_readahead = NFS_MAXRAHEAD;
+ }
+ if ((argp->flags & NFSMNT_LEASETERM) && argp->leaseterm >= 2) {
+ if (argp->leaseterm <= NQ_MAXLEASE)
+ nmp->nm_leaseterm = argp->leaseterm;
+ else
+ nmp->nm_leaseterm = NQ_MAXLEASE;
+ }
+ if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 1) {
+ if (argp->deadthresh <= NQ_NEVERDEAD)
+ nmp->nm_deadthresh = argp->deadthresh;
+ else
+ nmp->nm_deadthresh = NQ_NEVERDEAD;
+ }
adjsock |= ((nmp->nm_sotype != argp->sotype) ||
(nmp->nm_soproto != argp->proto));
diff --git a/sys/nfsserver/nfs_syscalls.c b/sys/nfsserver/nfs_syscalls.c
index da18842..15c6231 100644
--- a/sys/nfsserver/nfs_syscalls.c
+++ b/sys/nfsserver/nfs_syscalls.c
@@ -969,7 +969,7 @@ nfssvc_iod(p)
/* Take one off the front of the list */
TAILQ_REMOVE(&nmp->nm_bufq, bp, b_freelist);
nmp->nm_bufqlen--;
- if (nmp->nm_bufqwant && nmp->nm_bufqlen < 2 * nfs_numasync) {
+ if (nmp->nm_bufqwant && nmp->nm_bufqlen <= nfs_numasync) {
nmp->nm_bufqwant = FALSE;
wakeup(&nmp->nm_bufq);
}
OpenPOWER on IntegriCloud