summaryrefslogtreecommitdiffstats
path: root/sys/nfs
diff options
context:
space:
mode:
authorjulian <julian@FreeBSD.org>1999-03-12 02:24:58 +0000
committerjulian <julian@FreeBSD.org>1999-03-12 02:24:58 +0000
commitf27c95753fda68261265d744a5d8a9e59144d460 (patch)
treea77fd9dc92b2fd307830f703a502e5f52025ed29 /sys/nfs
parent10962136cf7fbc75a1bd22a787b0d92c5b8a655b (diff)
downloadFreeBSD-src-f27c95753fda68261265d744a5d8a9e59144d460.zip
FreeBSD-src-f27c95753fda68261265d744a5d8a9e59144d460.tar.gz
Reviewed by: Many at differnt times in differnt parts,
including alan, john, me, luoqi, and kirk Submitted by: Matt Dillon <dillon@frebsd.org> This change implements a relatively sophisticated fix to getnewbuf(). There were two problems with getnewbuf(). First, the writerecursion can lead to a system stack overflow when you have NFS and/or VN devices in the system. Second, the free/dirty buffer accounting was completely broken. Not only did the nfs routines blow it trying to manually account for the buffer state, but the accounting that was done did not work well with the purpose of their existance: figuring out when getnewbuf() needs to sleep. The meat of the change is to kern/vfs_bio.c. The remaining diffs are all minor except for NFS, which includes both the fixes for bp interaction AND fixes for a 'biodone(): buffer already done' lockup. Sys/buf.h also contains a chaining structure which is not used by this patchset but is used by other patches that are coming soon. This patch deliniated by tags PRE_MAT_GETBUF and POST_MAT_GETBUF. (sorry for the missing T matt)
Diffstat (limited to 'sys/nfs')
-rw-r--r--sys/nfs/nfs_bio.c57
-rw-r--r--sys/nfs/nfs_vnops.c81
2 files changed, 87 insertions, 51 deletions
diff --git a/sys/nfs/nfs_bio.c b/sys/nfs/nfs_bio.c
index fb437a5..2fb5353 100644
--- a/sys/nfs/nfs_bio.c
+++ b/sys/nfs/nfs_bio.c
@@ -34,7 +34,7 @@
* SUCH DAMAGE.
*
* @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95
- * $Id: nfs_bio.c,v 1.65 1998/12/14 17:51:30 dt Exp $
+ * $Id: nfs_bio.c,v 1.66 1999/01/21 08:29:07 dillon Exp $
*/
@@ -418,6 +418,7 @@ nfs_bioread(vp, uio, ioflag, cred, getpages)
return (EINTR);
if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
rabp->b_flags |= (B_READ | B_ASYNC);
+ rabp->b_flags &= ~B_DONE;
vfs_busy_pages(rabp, 0);
if (nfs_asyncio(rabp, cred)) {
rabp->b_flags |= B_INVAL|B_ERROR;
@@ -513,6 +514,7 @@ again:
return (EINTR);
if ((bp->b_flags & B_CACHE) == 0) {
bp->b_flags |= B_READ;
+ bp->b_flags &= ~B_DONE;
vfs_busy_pages(bp, 0);
error = nfs_doio(bp, cred, p);
if (error) {
@@ -537,6 +539,7 @@ again:
return (EINTR);
if ((bp->b_flags & B_CACHE) == 0) {
bp->b_flags |= B_READ;
+ bp->b_flags &= ~B_DONE;
vfs_busy_pages(bp, 0);
error = nfs_doio(bp, cred, p);
if (error) {
@@ -560,6 +563,7 @@ again:
return (EINTR);
if ((bp->b_flags & B_DONE) == 0) {
bp->b_flags |= B_READ;
+ bp->b_flags &= ~B_DONE;
vfs_busy_pages(bp, 0);
error = nfs_doio(bp, cred, p);
if (error == 0 && (bp->b_flags & B_INVAL))
@@ -591,6 +595,7 @@ again:
if (rabp) {
if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
rabp->b_flags |= (B_READ | B_ASYNC);
+ rabp->b_flags &= ~B_DONE;
vfs_busy_pages(rabp, 0);
if (nfs_asyncio(rabp, cred)) {
rabp->b_flags |= B_INVAL|B_ERROR;
@@ -840,6 +845,12 @@ again:
bp->b_dirtyoff = on;
bp->b_dirtyend = on + n;
}
+ /*
+ * To avoid code complexity, we may have to throw away
+ * previously valid ranges when merging the new dirty range
+ * into the valid range. As long as we do not *ADD* an
+ * invalid valid range, we are ok.
+ */
if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
bp->b_validoff > bp->b_dirtyend) {
bp->b_validoff = bp->b_dirtyoff;
@@ -1004,7 +1015,7 @@ nfs_asyncio(bp, cred)
if (nfs_numasync == 0)
return (EIO);
-
+
nmp = VFSTONFS(bp->b_vp->v_mount);
again:
if (nmp->nm_flag & NFSMNT_INT)
@@ -1109,12 +1120,12 @@ again:
*/
int
nfs_doio(bp, cr, p)
- register struct buf *bp;
+ struct buf *bp;
struct ucred *cr;
struct proc *p;
{
- register struct uio *uiop;
- register struct vnode *vp;
+ struct uio *uiop;
+ struct vnode *vp;
struct nfsnode *np;
struct nfsmount *nmp;
int error = 0, diff, len, iomode, must_commit = 0;
@@ -1130,6 +1141,8 @@ nfs_doio(bp, cr, p)
uiop->uio_segflg = UIO_SYSSPACE;
uiop->uio_procp = p;
+ KASSERT(!(bp->b_flags & B_DONE), ("nfs_doio: bp %p already marked done", bp));
+
/*
* Historically, paging was done with physio, but no more.
*/
@@ -1236,10 +1249,12 @@ nfs_doio(bp, cr, p)
io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
uiop->uio_rw = UIO_WRITE;
nfsstats.write_bios++;
+
if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE | B_CLUSTER)) == B_ASYNC)
iomode = NFSV3WRITE_UNSTABLE;
else
iomode = NFSV3WRITE_FILESYNC;
+
bp->b_flags |= B_WRITEINPROG;
error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit);
if (!error && iomode == NFSV3WRITE_UNSTABLE) {
@@ -1247,8 +1262,9 @@ nfs_doio(bp, cr, p)
if (bp->b_dirtyoff == 0
&& bp->b_dirtyend == bp->b_bufsize)
bp->b_flags |= B_CLUSTEROK;
- } else
+ } else {
bp->b_flags &= ~B_NEEDCOMMIT;
+ }
bp->b_flags &= ~B_WRITEINPROG;
/*
@@ -1265,31 +1281,30 @@ nfs_doio(bp, cr, p)
* the B_DELWRI and B_NEEDCOMMIT flags.
*
* If the buffer is marked B_PAGING, it does not reside on
- * the vp's paging queues so we do not ( and cannot ) reassign
- * it. XXX numdirtybuffers should be integrated into
- * reassignbuf() call.
+ * the vp's paging queues so we cannot call bdirty(). The
+ * bp in this case is not an NFS cache block so we should
+ * be safe. XXX
*/
if (error == EINTR
|| (!error && (bp->b_flags & B_NEEDCOMMIT))) {
int s;
+ s = splbio();
bp->b_flags &= ~(B_INVAL|B_NOCACHE);
if ((bp->b_flags & B_PAGING) == 0) {
- ++numdirtybuffers;
- bp->b_flags |= B_DELWRI;
- s = splbio();
- reassignbuf(bp, vp);
- splx(s);
+ bdirty(bp);
+ bp->b_flags &= ~B_DONE;
}
if ((bp->b_flags & B_ASYNC) == 0)
bp->b_flags |= B_EINTR;
+ splx(s);
} else {
- if (error) {
- bp->b_flags |= B_ERROR;
- bp->b_error = np->n_error = error;
- np->n_flag |= NWRITEERR;
- }
- bp->b_dirtyoff = bp->b_dirtyend = 0;
+ if (error) {
+ bp->b_flags |= B_ERROR;
+ bp->b_error = np->n_error = error;
+ np->n_flag |= NWRITEERR;
+ }
+ bp->b_dirtyoff = bp->b_dirtyend = 0;
}
} else {
bp->b_resid = 0;
@@ -1299,7 +1314,7 @@ nfs_doio(bp, cr, p)
}
bp->b_resid = uiop->uio_resid;
if (must_commit)
- nfs_clearcommit(vp->v_mount);
+ nfs_clearcommit(vp->v_mount);
biodone(bp);
return (error);
}
diff --git a/sys/nfs/nfs_vnops.c b/sys/nfs/nfs_vnops.c
index 4afb469..a92bb22 100644
--- a/sys/nfs/nfs_vnops.c
+++ b/sys/nfs/nfs_vnops.c
@@ -34,7 +34,7 @@
* SUCH DAMAGE.
*
* @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95
- * $Id: nfs_vnops.c,v 1.122 1999/02/13 09:47:30 dillon Exp $
+ * $Id: nfs_vnops.c,v 1.123 1999/02/16 10:49:54 dfr Exp $
*/
@@ -2648,6 +2648,9 @@ nfs_strategy(ap)
struct proc *p;
int error = 0;
+ KASSERT(!(bp->b_flags & B_DONE), ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp));
+ KASSERT((bp->b_flags & B_BUSY), ("nfs_strategy: buffer %p not B_BUSY", bp));
+
if (bp->b_flags & B_PHYS)
panic("nfs physio");
@@ -2797,6 +2800,10 @@ again:
/*
* Work out if all buffers are using the same cred
* so we can deal with them all with one commit.
+ *
+ * NOTE: we are not clearing B_DONE here, so we have
+ * to do it later on in this routine if we intend to
+ * initiate I/O on the bp.
*/
if (wcred == NULL)
wcred = bp->b_wcred;
@@ -2804,6 +2811,14 @@ again:
wcred = NOCRED;
bp->b_flags |= (B_BUSY | B_WRITEINPROG);
vfs_busy_pages(bp, 1);
+
+ /*
+ * bp is protected by being B_BUSY, but nbp is not
+ * and vfs_busy_pages() may sleep. We have to
+ * recalculate nbp.
+ */
+ nbp = TAILQ_NEXT(bp, b_vnbufs);
+
/*
* A list of these buffers is kept so that the
* second loop knows which buffers have actually
@@ -2849,6 +2864,7 @@ again:
if (retv == NFSERR_STALEWRITEVERF)
nfs_clearcommit(vp->v_mount);
+
/*
* Now, either mark the blocks I/O done or mark the
* blocks dirty, depending on whether the commit
@@ -2858,23 +2874,27 @@ again:
bp = bvec[i];
bp->b_flags &= ~(B_NEEDCOMMIT | B_WRITEINPROG);
if (retv) {
- vfs_unbusy_pages(bp);
- brelse(bp);
+ /*
+ * Error, leave B_DELWRI intact
+ */
+ vfs_unbusy_pages(bp);
+ brelse(bp);
} else {
- s = splbio(); /* XXX check this positionning */
- vp->v_numoutput++;
- bp->b_flags |= B_ASYNC;
- if (bp->b_flags & B_DELWRI) {
- --numdirtybuffers;
- if (needsbuffer) {
- vfs_bio_need_satisfy();
- }
- }
- bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI);
- bp->b_dirtyoff = bp->b_dirtyend = 0;
- reassignbuf(bp, vp);
- splx(s);
- biodone(bp);
+ /*
+ * Success, remove B_DELWRI ( bundirty() ).
+ *
+ * b_dirtyoff/b_dirtyend seem to be NFS
+ * specific. We should probably move that
+ * into bundirty(). XXX
+ */
+ s = splbio();
+ vp->v_numoutput++;
+ bp->b_flags |= B_ASYNC;
+ bundirty(bp);
+ bp->b_flags &= ~(B_READ|B_DONE|B_ERROR);
+ bp->b_dirtyoff = bp->b_dirtyend = 0;
+ splx(s);
+ biodone(bp);
}
}
}
@@ -2999,6 +3019,8 @@ nfs_print(ap)
/*
* Just call nfs_writebp() with the force argument set to 1.
+ *
+ * NOTE: B_DONE may or may not be set in a_bp on call.
*/
static int
nfs_bwrite(ap)
@@ -3020,26 +3042,24 @@ nfs_writebp(bp, force)
int force;
{
int s;
- register int oldflags = bp->b_flags, retv = 1;
+ int oldflags = bp->b_flags;
+ int retv = 1;
off_t off;
if(!(bp->b_flags & B_BUSY))
panic("bwrite: buffer is not busy???");
if (bp->b_flags & B_INVAL)
- bp->b_flags |= B_INVAL | B_NOCACHE;
+ bp->b_flags |= B_NOCACHE;
- if (bp->b_flags & B_DELWRI) {
- --numdirtybuffers;
- if (needsbuffer)
- vfs_bio_need_satisfy();
- }
- s = splbio(); /* XXX check if needed */
- bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI);
+ /*
+ * XXX we bundirty() the bp here. Shouldn't we do it later after
+ * the I/O has completed??
+ */
- if ((oldflags & (B_ASYNC|B_DELWRI)) == (B_ASYNC|B_DELWRI)) {
- reassignbuf(bp, bp->b_vp);
- }
+ s = splbio();
+ bundirty(bp);
+ bp->b_flags &= ~(B_READ|B_DONE|B_ERROR);
bp->b_vp->v_numoutput++;
curproc->p_stats->p_ru.ru_oublock++;
@@ -3061,8 +3081,9 @@ nfs_writebp(bp, force)
bp->b_dirtyoff = bp->b_dirtyend = 0;
bp->b_flags &= ~B_NEEDCOMMIT;
biodone(bp);
- } else if (retv == NFSERR_STALEWRITEVERF)
+ } else if (retv == NFSERR_STALEWRITEVERF) {
nfs_clearcommit(bp->b_vp->v_mount);
+ }
}
if (retv) {
if (force)
OpenPOWER on IntegriCloud