diff options
author | julian <julian@FreeBSD.org> | 1999-03-12 02:24:58 +0000 |
---|---|---|
committer | julian <julian@FreeBSD.org> | 1999-03-12 02:24:58 +0000 |
commit | f27c95753fda68261265d744a5d8a9e59144d460 (patch) | |
tree | a77fd9dc92b2fd307830f703a502e5f52025ed29 /sys/nfs | |
parent | 10962136cf7fbc75a1bd22a787b0d92c5b8a655b (diff) | |
download | FreeBSD-src-f27c95753fda68261265d744a5d8a9e59144d460.zip FreeBSD-src-f27c95753fda68261265d744a5d8a9e59144d460.tar.gz |
Reviewed by: Many at differnt times in differnt parts,
including alan, john, me, luoqi, and kirk
Submitted by: Matt Dillon <dillon@frebsd.org>
This change implements a relatively sophisticated fix to getnewbuf().
There were two problems with getnewbuf(). First, the writerecursion
can lead to a system stack overflow when you have NFS and/or VN
devices in the system. Second, the free/dirty buffer accounting was
completely broken. Not only did the nfs routines blow it trying to
manually account for the buffer state, but the accounting that was
done did not work well with the purpose of their existance: figuring
out when getnewbuf() needs to sleep.
The meat of the change is to kern/vfs_bio.c. The remaining diffs are
all minor except for NFS, which includes both the fixes for bp
interaction AND fixes for a 'biodone(): buffer already done' lockup.
Sys/buf.h also contains a chaining structure which is not used by
this patchset but is used by other patches that are coming soon.
This patch deliniated by tags PRE_MAT_GETBUF and POST_MAT_GETBUF.
(sorry for the missing T matt)
Diffstat (limited to 'sys/nfs')
-rw-r--r-- | sys/nfs/nfs_bio.c | 57 | ||||
-rw-r--r-- | sys/nfs/nfs_vnops.c | 81 |
2 files changed, 87 insertions, 51 deletions
diff --git a/sys/nfs/nfs_bio.c b/sys/nfs/nfs_bio.c index fb437a5..2fb5353 100644 --- a/sys/nfs/nfs_bio.c +++ b/sys/nfs/nfs_bio.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95 - * $Id: nfs_bio.c,v 1.65 1998/12/14 17:51:30 dt Exp $ + * $Id: nfs_bio.c,v 1.66 1999/01/21 08:29:07 dillon Exp $ */ @@ -418,6 +418,7 @@ nfs_bioread(vp, uio, ioflag, cred, getpages) return (EINTR); if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) { rabp->b_flags |= (B_READ | B_ASYNC); + rabp->b_flags &= ~B_DONE; vfs_busy_pages(rabp, 0); if (nfs_asyncio(rabp, cred)) { rabp->b_flags |= B_INVAL|B_ERROR; @@ -513,6 +514,7 @@ again: return (EINTR); if ((bp->b_flags & B_CACHE) == 0) { bp->b_flags |= B_READ; + bp->b_flags &= ~B_DONE; vfs_busy_pages(bp, 0); error = nfs_doio(bp, cred, p); if (error) { @@ -537,6 +539,7 @@ again: return (EINTR); if ((bp->b_flags & B_CACHE) == 0) { bp->b_flags |= B_READ; + bp->b_flags &= ~B_DONE; vfs_busy_pages(bp, 0); error = nfs_doio(bp, cred, p); if (error) { @@ -560,6 +563,7 @@ again: return (EINTR); if ((bp->b_flags & B_DONE) == 0) { bp->b_flags |= B_READ; + bp->b_flags &= ~B_DONE; vfs_busy_pages(bp, 0); error = nfs_doio(bp, cred, p); if (error == 0 && (bp->b_flags & B_INVAL)) @@ -591,6 +595,7 @@ again: if (rabp) { if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) { rabp->b_flags |= (B_READ | B_ASYNC); + rabp->b_flags &= ~B_DONE; vfs_busy_pages(rabp, 0); if (nfs_asyncio(rabp, cred)) { rabp->b_flags |= B_INVAL|B_ERROR; @@ -840,6 +845,12 @@ again: bp->b_dirtyoff = on; bp->b_dirtyend = on + n; } + /* + * To avoid code complexity, we may have to throw away + * previously valid ranges when merging the new dirty range + * into the valid range. As long as we do not *ADD* an + * invalid valid range, we are ok. + */ if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff || bp->b_validoff > bp->b_dirtyend) { bp->b_validoff = bp->b_dirtyoff; @@ -1004,7 +1015,7 @@ nfs_asyncio(bp, cred) if (nfs_numasync == 0) return (EIO); - + nmp = VFSTONFS(bp->b_vp->v_mount); again: if (nmp->nm_flag & NFSMNT_INT) @@ -1109,12 +1120,12 @@ again: */ int nfs_doio(bp, cr, p) - register struct buf *bp; + struct buf *bp; struct ucred *cr; struct proc *p; { - register struct uio *uiop; - register struct vnode *vp; + struct uio *uiop; + struct vnode *vp; struct nfsnode *np; struct nfsmount *nmp; int error = 0, diff, len, iomode, must_commit = 0; @@ -1130,6 +1141,8 @@ nfs_doio(bp, cr, p) uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_procp = p; + KASSERT(!(bp->b_flags & B_DONE), ("nfs_doio: bp %p already marked done", bp)); + /* * Historically, paging was done with physio, but no more. */ @@ -1236,10 +1249,12 @@ nfs_doio(bp, cr, p) io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; uiop->uio_rw = UIO_WRITE; nfsstats.write_bios++; + if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE | B_CLUSTER)) == B_ASYNC) iomode = NFSV3WRITE_UNSTABLE; else iomode = NFSV3WRITE_FILESYNC; + bp->b_flags |= B_WRITEINPROG; error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit); if (!error && iomode == NFSV3WRITE_UNSTABLE) { @@ -1247,8 +1262,9 @@ nfs_doio(bp, cr, p) if (bp->b_dirtyoff == 0 && bp->b_dirtyend == bp->b_bufsize) bp->b_flags |= B_CLUSTEROK; - } else + } else { bp->b_flags &= ~B_NEEDCOMMIT; + } bp->b_flags &= ~B_WRITEINPROG; /* @@ -1265,31 +1281,30 @@ nfs_doio(bp, cr, p) * the B_DELWRI and B_NEEDCOMMIT flags. * * If the buffer is marked B_PAGING, it does not reside on - * the vp's paging queues so we do not ( and cannot ) reassign - * it. XXX numdirtybuffers should be integrated into - * reassignbuf() call. + * the vp's paging queues so we cannot call bdirty(). The + * bp in this case is not an NFS cache block so we should + * be safe. XXX */ if (error == EINTR || (!error && (bp->b_flags & B_NEEDCOMMIT))) { int s; + s = splbio(); bp->b_flags &= ~(B_INVAL|B_NOCACHE); if ((bp->b_flags & B_PAGING) == 0) { - ++numdirtybuffers; - bp->b_flags |= B_DELWRI; - s = splbio(); - reassignbuf(bp, vp); - splx(s); + bdirty(bp); + bp->b_flags &= ~B_DONE; } if ((bp->b_flags & B_ASYNC) == 0) bp->b_flags |= B_EINTR; + splx(s); } else { - if (error) { - bp->b_flags |= B_ERROR; - bp->b_error = np->n_error = error; - np->n_flag |= NWRITEERR; - } - bp->b_dirtyoff = bp->b_dirtyend = 0; + if (error) { + bp->b_flags |= B_ERROR; + bp->b_error = np->n_error = error; + np->n_flag |= NWRITEERR; + } + bp->b_dirtyoff = bp->b_dirtyend = 0; } } else { bp->b_resid = 0; @@ -1299,7 +1314,7 @@ nfs_doio(bp, cr, p) } bp->b_resid = uiop->uio_resid; if (must_commit) - nfs_clearcommit(vp->v_mount); + nfs_clearcommit(vp->v_mount); biodone(bp); return (error); } diff --git a/sys/nfs/nfs_vnops.c b/sys/nfs/nfs_vnops.c index 4afb469..a92bb22 100644 --- a/sys/nfs/nfs_vnops.c +++ b/sys/nfs/nfs_vnops.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95 - * $Id: nfs_vnops.c,v 1.122 1999/02/13 09:47:30 dillon Exp $ + * $Id: nfs_vnops.c,v 1.123 1999/02/16 10:49:54 dfr Exp $ */ @@ -2648,6 +2648,9 @@ nfs_strategy(ap) struct proc *p; int error = 0; + KASSERT(!(bp->b_flags & B_DONE), ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp)); + KASSERT((bp->b_flags & B_BUSY), ("nfs_strategy: buffer %p not B_BUSY", bp)); + if (bp->b_flags & B_PHYS) panic("nfs physio"); @@ -2797,6 +2800,10 @@ again: /* * Work out if all buffers are using the same cred * so we can deal with them all with one commit. + * + * NOTE: we are not clearing B_DONE here, so we have + * to do it later on in this routine if we intend to + * initiate I/O on the bp. */ if (wcred == NULL) wcred = bp->b_wcred; @@ -2804,6 +2811,14 @@ again: wcred = NOCRED; bp->b_flags |= (B_BUSY | B_WRITEINPROG); vfs_busy_pages(bp, 1); + + /* + * bp is protected by being B_BUSY, but nbp is not + * and vfs_busy_pages() may sleep. We have to + * recalculate nbp. + */ + nbp = TAILQ_NEXT(bp, b_vnbufs); + /* * A list of these buffers is kept so that the * second loop knows which buffers have actually @@ -2849,6 +2864,7 @@ again: if (retv == NFSERR_STALEWRITEVERF) nfs_clearcommit(vp->v_mount); + /* * Now, either mark the blocks I/O done or mark the * blocks dirty, depending on whether the commit @@ -2858,23 +2874,27 @@ again: bp = bvec[i]; bp->b_flags &= ~(B_NEEDCOMMIT | B_WRITEINPROG); if (retv) { - vfs_unbusy_pages(bp); - brelse(bp); + /* + * Error, leave B_DELWRI intact + */ + vfs_unbusy_pages(bp); + brelse(bp); } else { - s = splbio(); /* XXX check this positionning */ - vp->v_numoutput++; - bp->b_flags |= B_ASYNC; - if (bp->b_flags & B_DELWRI) { - --numdirtybuffers; - if (needsbuffer) { - vfs_bio_need_satisfy(); - } - } - bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI); - bp->b_dirtyoff = bp->b_dirtyend = 0; - reassignbuf(bp, vp); - splx(s); - biodone(bp); + /* + * Success, remove B_DELWRI ( bundirty() ). + * + * b_dirtyoff/b_dirtyend seem to be NFS + * specific. We should probably move that + * into bundirty(). XXX + */ + s = splbio(); + vp->v_numoutput++; + bp->b_flags |= B_ASYNC; + bundirty(bp); + bp->b_flags &= ~(B_READ|B_DONE|B_ERROR); + bp->b_dirtyoff = bp->b_dirtyend = 0; + splx(s); + biodone(bp); } } } @@ -2999,6 +3019,8 @@ nfs_print(ap) /* * Just call nfs_writebp() with the force argument set to 1. + * + * NOTE: B_DONE may or may not be set in a_bp on call. */ static int nfs_bwrite(ap) @@ -3020,26 +3042,24 @@ nfs_writebp(bp, force) int force; { int s; - register int oldflags = bp->b_flags, retv = 1; + int oldflags = bp->b_flags; + int retv = 1; off_t off; if(!(bp->b_flags & B_BUSY)) panic("bwrite: buffer is not busy???"); if (bp->b_flags & B_INVAL) - bp->b_flags |= B_INVAL | B_NOCACHE; + bp->b_flags |= B_NOCACHE; - if (bp->b_flags & B_DELWRI) { - --numdirtybuffers; - if (needsbuffer) - vfs_bio_need_satisfy(); - } - s = splbio(); /* XXX check if needed */ - bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI); + /* + * XXX we bundirty() the bp here. Shouldn't we do it later after + * the I/O has completed?? + */ - if ((oldflags & (B_ASYNC|B_DELWRI)) == (B_ASYNC|B_DELWRI)) { - reassignbuf(bp, bp->b_vp); - } + s = splbio(); + bundirty(bp); + bp->b_flags &= ~(B_READ|B_DONE|B_ERROR); bp->b_vp->v_numoutput++; curproc->p_stats->p_ru.ru_oublock++; @@ -3061,8 +3081,9 @@ nfs_writebp(bp, force) bp->b_dirtyoff = bp->b_dirtyend = 0; bp->b_flags &= ~B_NEEDCOMMIT; biodone(bp); - } else if (retv == NFSERR_STALEWRITEVERF) + } else if (retv == NFSERR_STALEWRITEVERF) { nfs_clearcommit(bp->b_vp->v_mount); + } } if (retv) { if (force) |