summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authorjeff <jeff@FreeBSD.org>2003-08-28 06:55:18 +0000
committerjeff <jeff@FreeBSD.org>2003-08-28 06:55:18 +0000
commitfc1a2c40168d9e5004c6e3e211ee92912e3cee88 (patch)
tree62d0a8c997a551fc4ecce9c91ba30590420b95dc /sys
parentd947ee0dfe9e494fe035ac012605623293334958 (diff)
downloadFreeBSD-src-fc1a2c40168d9e5004c6e3e211ee92912e3cee88.zip
FreeBSD-src-fc1a2c40168d9e5004c6e3e211ee92912e3cee88.tar.gz
- Move BX_BKGRDWAIT and BX_BKGRDINPROG to BV_ and the b_vflags field.
- Surround all accesses of the BKGRD{WAIT,INPROG} flags with the vnode interlock. - Don't use the B_LOCKED flag and QUEUE_LOCKED for background write buffers. Check for the BKGRDINPROG flag before recycling or throwing away a buffer. We do this instead because it is not safe for us to move the original buffer to a new queue from the callback on the background write buffer. - Remove the B_LOCKED flag and the locked buffer queue. They are no longer used. - The vnode interlock is used around checks for BKGRDINPROG where it may not be strictly necessary. If we hold the buf lock the a back-ground write will not be started without our knowledge, one may only be completed while we're not looking. Rather than remove the code, Document two of the places where this extra locking is done. A pass should be done to verify and minimize the locking later.
Diffstat (limited to 'sys')
-rw-r--r--sys/kern/vfs_bio.c151
-rw-r--r--sys/kern/vfs_cluster.c18
-rw-r--r--sys/sys/buf.h13
-rw-r--r--sys/ufs/ffs/ffs_softdep.c32
4 files changed, 117 insertions, 97 deletions
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 663a504..296c9a6 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -164,7 +164,7 @@ SYSCTL_INT(_vfs, OID_AUTO, getnewbufrestarts, CTLFLAG_RW, &getnewbufrestarts, 0,
"Number of times getnewbuf has had to restart a buffer aquisition");
static int dobkgrdwrite = 1;
SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0,
- "Do background writes (honoring the BX_BKGRDWRITE flag)?");
+ "Do background writes (honoring the BV_BKGRDWRITE flag)?");
/*
* Wakeup point for bufdaemon, as well as indicator of whether it is already
@@ -223,14 +223,13 @@ static struct mtx bdonelock;
/*
* Definitions for the buffer free lists.
*/
-#define BUFFER_QUEUES 6 /* number of free buffer queues */
+#define BUFFER_QUEUES 5 /* number of free buffer queues */
#define QUEUE_NONE 0 /* on no queue */
-#define QUEUE_LOCKED 1 /* locked buffers */
-#define QUEUE_CLEAN 2 /* non-B_DELWRI buffers */
-#define QUEUE_DIRTY 3 /* B_DELWRI buffers */
-#define QUEUE_EMPTYKVA 4 /* empty buffer headers w/KVA assignment */
-#define QUEUE_EMPTY 5 /* empty buffer headers */
+#define QUEUE_CLEAN 1 /* non-B_DELWRI buffers */
+#define QUEUE_DIRTY 2 /* B_DELWRI buffers */
+#define QUEUE_EMPTYKVA 3 /* empty buffer headers w/KVA assignment */
+#define QUEUE_EMPTY 4 /* empty buffer headers */
/* Queues for free buffers with various properties */
static TAILQ_HEAD(bqueues, buf) bufqueues[BUFFER_QUEUES] = { { 0 } };
@@ -777,17 +776,19 @@ bwrite(struct buf * bp)
* writing this block if it is asynchronous. Otherwise
* wait for the background write to complete.
*/
- if (bp->b_xflags & BX_BKGRDINPROG) {
+ VI_LOCK(bp->b_vp);
+ if (bp->b_vflags & BV_BKGRDINPROG) {
if (bp->b_flags & B_ASYNC) {
splx(s);
bdwrite(bp);
return (0);
}
- bp->b_xflags |= BX_BKGRDWAIT;
- tsleep(&bp->b_xflags, PRIBIO, "bwrbg", 0);
- if (bp->b_xflags & BX_BKGRDINPROG)
+ bp->b_vflags |= BV_BKGRDWAIT;
+ msleep(&bp->b_xflags, VI_MTX(bp->b_vp), PRIBIO, "bwrbg", 0);
+ if (bp->b_vflags & BV_BKGRDINPROG)
panic("bwrite: still writing");
}
+ VI_UNLOCK(bp->b_vp);
/* Mark the buffer clean */
bundirty(bp);
@@ -820,8 +821,8 @@ bwrite(struct buf * bp)
memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
newbp->b_lblkno = bp->b_lblkno;
newbp->b_xflags |= BX_BKGRDMARKER;
- /* XXX The BX_ flags need to be protected as well */
VI_LOCK(bp->b_vp);
+ bp->b_vflags |= BV_BKGRDINPROG;
bgetvp(bp->b_vp, newbp);
VI_UNLOCK(bp->b_vp);
newbp->b_blkno = bp->b_blkno;
@@ -842,8 +843,6 @@ bwrite(struct buf * bp)
* If the reconstituted buffer were written, we could end up
* with two background copies being written at the same time.
*/
- bp->b_xflags |= BX_BKGRDINPROG;
- bp->b_flags |= B_LOCKED;
bqrelse(bp);
bp = newbp;
}
@@ -906,38 +905,29 @@ vfs_backgroundwritedone(bp)
VI_LOCK(bp->b_vp);
if ((origbp = gbincore(bp->b_vp, bp->b_lblkno)) == NULL)
panic("backgroundwritedone: lost buffer");
- VI_UNLOCK(bp->b_vp);
- /*
- * Process dependencies then return any unfinished ones.
- */
- if (LIST_FIRST(&bp->b_dep) != NULL)
- buf_complete(bp);
- if (LIST_FIRST(&bp->b_dep) != NULL)
- buf_movedeps(bp, origbp);
- /* XXX Find out if origbp can disappear or get inconsistent */
/*
- * Clear the BX_BKGRDINPROG flag in the original buffer
+ * Clear the BV_BKGRDINPROG flag in the original buffer
* and awaken it if it is waiting for the write to complete.
- * If BX_BKGRDINPROG is not set in the original buffer it must
+ * If BV_BKGRDINPROG is not set in the original buffer it must
* have been released and re-instantiated - which is not legal.
*/
- KASSERT((origbp->b_xflags & BX_BKGRDINPROG),
+ KASSERT((origbp->b_vflags & BV_BKGRDINPROG),
("backgroundwritedone: lost buffer2"));
- origbp->b_xflags &= ~BX_BKGRDINPROG;
- if (origbp->b_xflags & BX_BKGRDWAIT) {
- origbp->b_xflags &= ~BX_BKGRDWAIT;
+ origbp->b_vflags &= ~BV_BKGRDINPROG;
+ if (origbp->b_vflags & BV_BKGRDWAIT) {
+ origbp->b_vflags &= ~BV_BKGRDWAIT;
wakeup(&origbp->b_xflags);
}
+ VI_UNLOCK(bp->b_vp);
/*
- * Clear the B_LOCKED flag and remove it from the locked
- * queue if it currently resides there.
+ * Process dependencies then return any unfinished ones.
*/
- origbp->b_flags &= ~B_LOCKED;
- if (BUF_LOCK(origbp, LK_EXCLUSIVE | LK_NOWAIT, NULL) == 0) {
- bremfree(origbp);
- bqrelse(origbp);
- }
+ if (LIST_FIRST(&bp->b_dep) != NULL)
+ buf_complete(bp);
+ if (LIST_FIRST(&bp->b_dep) != NULL)
+ buf_movedeps(bp, origbp);
+
/*
* This buffer is marked B_NOCACHE, so when it is released
* by biodone, it will be tossed. We mark it with BIO_READ
@@ -997,7 +987,7 @@ bdwrite(struct buf * bp)
* Try to find a buffer to flush.
*/
TAILQ_FOREACH(nbp, &vp->v_dirtyblkhd, b_vnbufs) {
- if ((nbp->b_xflags & BX_BKGRDINPROG) ||
+ if ((nbp->b_vflags & BV_BKGRDINPROG) ||
buf_countdeps(nbp, 0) ||
BUF_LOCK(nbp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
continue;
@@ -1207,9 +1197,6 @@ brelse(struct buf * bp)
s = splbio();
- if (bp->b_flags & B_LOCKED)
- bp->b_ioflags &= ~BIO_ERROR;
-
if (bp->b_iocmd == BIO_WRITE &&
(bp->b_ioflags & BIO_ERROR) &&
!(bp->b_flags & B_INVAL)) {
@@ -1259,8 +1246,17 @@ brelse(struct buf * bp)
*/
if (bp->b_flags & B_DELWRI)
bp->b_flags &= ~B_RELBUF;
- else if (vm_page_count_severe() && !(bp->b_xflags & BX_BKGRDINPROG))
- bp->b_flags |= B_RELBUF;
+ else if (vm_page_count_severe()) {
+ /*
+ * XXX This lock may not be necessary since BKGRDINPROG
+ * cannot be set while we hold the buf lock, it can only be
+ * cleared if it is already pending.
+ */
+ VI_LOCK(bp->b_vp);
+ if (!(bp->b_vflags & BV_BKGRDINPROG))
+ bp->b_flags |= B_RELBUF;
+ VI_UNLOCK(bp->b_vp);
+ }
/*
* VMIO buffer rundown. It is not very necessary to keep a VMIO buffer
@@ -1389,7 +1385,7 @@ brelse(struct buf * bp)
if (bp->b_bufsize == 0) {
bp->b_flags |= B_INVAL;
bp->b_xflags &= ~(BX_BKGRDWRITE | BX_ALTDATA);
- if (bp->b_xflags & BX_BKGRDINPROG)
+ if (bp->b_vflags & BV_BKGRDINPROG)
panic("losing buffer 1");
if (bp->b_kvasize) {
bp->b_qindex = QUEUE_EMPTYKVA;
@@ -1403,17 +1399,11 @@ brelse(struct buf * bp)
(bp->b_ioflags & BIO_ERROR)) {
bp->b_flags |= B_INVAL;
bp->b_xflags &= ~(BX_BKGRDWRITE | BX_ALTDATA);
- if (bp->b_xflags & BX_BKGRDINPROG)
+ if (bp->b_vflags & BV_BKGRDINPROG)
panic("losing buffer 2");
bp->b_qindex = QUEUE_CLEAN;
TAILQ_INSERT_HEAD(&bufqueues[QUEUE_CLEAN], bp, b_freelist);
bp->b_dev = NODEV;
-
- /* buffers that are locked */
- } else if (bp->b_flags & B_LOCKED) {
- bp->b_qindex = QUEUE_LOCKED;
- TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LOCKED], bp, b_freelist);
-
/* remaining buffers */
} else {
if (bp->b_flags & B_DELWRI)
@@ -1447,7 +1437,7 @@ brelse(struct buf * bp)
* if B_INVAL is set ).
*/
- if ((bp->b_flags & B_LOCKED) == 0 && !(bp->b_flags & B_DELWRI))
+ if (!(bp->b_flags & B_DELWRI))
bufcountwakeup();
/*
@@ -1493,34 +1483,38 @@ bqrelse(struct buf * bp)
return;
}
mtx_lock(&bqlock);
- if (bp->b_flags & B_LOCKED) {
- bp->b_ioflags &= ~BIO_ERROR;
- bp->b_qindex = QUEUE_LOCKED;
- TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LOCKED], bp, b_freelist);
- /* buffers with stale but valid contents */
- } else if (bp->b_flags & B_DELWRI) {
+ /* buffers with stale but valid contents */
+ if (bp->b_flags & B_DELWRI) {
bp->b_qindex = QUEUE_DIRTY;
TAILQ_INSERT_TAIL(&bufqueues[QUEUE_DIRTY], bp, b_freelist);
- } else if (vm_page_count_severe()) {
+ } else {
/*
- * We are too low on memory, we have to try to free the
- * buffer (most importantly: the wired pages making up its
- * backing store) *now*.
+ * XXX This lock may not be necessary since BKGRDINPROG
+ * cannot be set while we hold the buf lock, it can only be
+ * cleared if it is already pending.
*/
- mtx_unlock(&bqlock);
- splx(s);
- brelse(bp);
- return;
- } else {
- bp->b_qindex = QUEUE_CLEAN;
- TAILQ_INSERT_TAIL(&bufqueues[QUEUE_CLEAN], bp, b_freelist);
+ VI_LOCK(bp->b_vp);
+ if (!vm_page_count_severe() || bp->b_vflags & BV_BKGRDINPROG) {
+ VI_UNLOCK(bp->b_vp);
+ bp->b_qindex = QUEUE_CLEAN;
+ TAILQ_INSERT_TAIL(&bufqueues[QUEUE_CLEAN], bp,
+ b_freelist);
+ } else {
+ /*
+ * We are too low on memory, we have to try to free
+ * the buffer (most importantly: the wired pages
+ * making up its backing store) *now*.
+ */
+ mtx_unlock(&bqlock);
+ splx(s);
+ brelse(bp);
+ return;
+ }
}
mtx_unlock(&bqlock);
- if ((bp->b_flags & B_LOCKED) == 0 &&
- ((bp->b_flags & B_INVAL) || !(bp->b_flags & B_DELWRI))) {
+ if ((bp->b_flags & B_INVAL) || !(bp->b_flags & B_DELWRI))
bufcountwakeup();
- }
/*
* Something we can maybe free or reuse.
@@ -1826,6 +1820,14 @@ restart:
break;
}
}
+ if (bp->b_vp) {
+ VI_LOCK(bp->b_vp);
+ if (bp->b_vflags & BV_BKGRDINPROG) {
+ VI_UNLOCK(bp->b_vp);
+ continue;
+ }
+ VI_UNLOCK(bp->b_vp);
+ }
/*
* Sanity Checks
@@ -1887,7 +1889,7 @@ restart:
}
if (LIST_FIRST(&bp->b_dep) != NULL)
buf_deallocate(bp);
- if (bp->b_xflags & BX_BKGRDINPROG)
+ if (bp->b_vflags & BV_BKGRDINPROG)
panic("losing buffer 3");
if (bp->b_bufsize)
@@ -2136,10 +2138,13 @@ flushbufqueues(int flushdeps)
continue;
KASSERT((bp->b_flags & B_DELWRI),
("unexpected clean buffer %p", bp));
- if ((bp->b_xflags & BX_BKGRDINPROG) != 0) {
+ VI_LOCK(bp->b_vp);
+ if ((bp->b_vflags & BV_BKGRDINPROG) != 0) {
+ VI_UNLOCK(bp->b_vp);
BUF_UNLOCK(bp);
continue;
}
+ VI_UNLOCK(bp->b_vp);
if (bp->b_flags & B_INVAL) {
bremfreel(bp);
mtx_unlock(&bqlock);
diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c
index ad0b55d..89de2ef 100644
--- a/sys/kern/vfs_cluster.c
+++ b/sys/kern/vfs_cluster.c
@@ -399,11 +399,15 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
* VMIO backed. The clustering code can only deal
* with VMIO-backed buffers.
*/
- if ((tbp->b_flags & (B_CACHE|B_LOCKED)) ||
- (tbp->b_flags & B_VMIO) == 0) {
+ VI_LOCK(bp->b_vp);
+ if ((tbp->b_vflags & BV_BKGRDINPROG) ||
+ (tbp->b_flags & B_CACHE) ||
+ (tbp->b_flags & B_VMIO) == 0) {
+ VI_UNLOCK(bp->b_vp);
bqrelse(tbp);
break;
}
+ VI_UNLOCK(bp->b_vp);
/*
* The buffer must be completely invalid in order to
@@ -768,7 +772,8 @@ cluster_wbuild(vp, size, start_lbn, len)
* partake in the clustered write.
*/
VI_LOCK(vp);
- if ((tbp = gbincore(vp, start_lbn)) == NULL) {
+ if ((tbp = gbincore(vp, start_lbn)) == NULL ||
+ (tbp->b_vflags & BV_BKGRDINPROG)) {
VI_UNLOCK(vp);
++start_lbn;
--len;
@@ -782,8 +787,7 @@ cluster_wbuild(vp, size, start_lbn, len)
splx(s);
continue;
}
- if ((tbp->b_flags & (B_LOCKED | B_INVAL | B_DELWRI)) !=
- B_DELWRI) {
+ if ((tbp->b_flags & (B_INVAL | B_DELWRI)) != B_DELWRI) {
BUF_UNLOCK(tbp);
++start_lbn;
--len;
@@ -857,7 +861,8 @@ cluster_wbuild(vp, size, start_lbn, len)
* can't need to be written.
*/
VI_LOCK(vp);
- if ((tbp = gbincore(vp, start_lbn)) == NULL) {
+ if ((tbp = gbincore(vp, start_lbn)) == NULL ||
+ (tbp->b_vflags & BV_BKGRDINPROG)) {
VI_UNLOCK(vp);
splx(s);
break;
@@ -881,7 +886,6 @@ cluster_wbuild(vp, size, start_lbn, len)
B_INVAL | B_DELWRI | B_NEEDCOMMIT))
!= (B_DELWRI | B_CLUSTEROK |
(bp->b_flags & (B_VMIO | B_NEEDCOMMIT))) ||
- (tbp->b_flags & B_LOCKED) ||
tbp->b_wcred != bp->b_wcred) {
BUF_UNLOCK(tbp);
splx(s);
diff --git a/sys/sys/buf.h b/sys/sys/buf.h
index 73baef1..3965764 100644
--- a/sys/sys/buf.h
+++ b/sys/sys/buf.h
@@ -217,7 +217,7 @@ struct buf {
#define B_00000800 0x00000800 /* Availabel flag. */
#define B_00001000 0x00001000 /* Available flag. */
#define B_INVAL 0x00002000 /* Does not contain valid info. */
-#define B_LOCKED 0x00004000 /* Locked in core (not reusable). */
+#define B_00004000 0x00004000 /* Available flag. */
#define B_NOCACHE 0x00008000 /* Do not cache block after use. */
#define B_MALLOC 0x00010000 /* malloced b_data */
#define B_CLUSTEROK 0x00020000 /* Pagein op, so swap() can count it. */
@@ -247,15 +247,18 @@ struct buf {
*/
#define BX_VNDIRTY 0x00000001 /* On vnode dirty list */
#define BX_VNCLEAN 0x00000002 /* On vnode clean list */
-#define BX_BKGRDWRITE 0x00000004 /* Do writes in background */
-#define BX_BKGRDINPROG 0x00000008 /* Background write in progress */
-#define BX_BKGRDWAIT 0x00000010 /* Background write waiting */
+#define BX_BKGRDWRITE 0x00000010 /* Do writes in background */
#define BX_BKGRDMARKER 0x00000020 /* Mark buffer for splay tree */
#define BX_ALTDATA 0x00000040 /* Holds extended data */
#define NOOFFSET (-1LL) /* No buffer offset calculated yet */
-#define BV_SCANNED 0x00001000 /* VOP_FSYNC funcs mark written bufs */
+/*
+ * These flags are kept in b_vflags.
+ */
+#define BV_SCANNED 0x00000001 /* VOP_FSYNC funcs mark written bufs */
+#define BV_BKGRDINPROG 0x00000002 /* Background write in progress */
+#define BV_BKGRDWAIT 0x00000004 /* Background write waiting */
#ifdef _KERNEL
/*
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index df14940..9ca5686 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -4892,11 +4892,9 @@ softdep_fsync_mountdev(vp)
/*
* If it is already scheduled, skip to the next buffer.
*/
- if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK,
- VI_MTX(vp))) {
- VI_LOCK(vp);
+ if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
continue;
- }
+
if ((bp->b_flags & B_DELWRI) == 0) {
FREE_LOCK(&lk);
panic("softdep_fsync_mountdev: not dirty");
@@ -4907,11 +4905,11 @@ softdep_fsync_mountdev(vp)
*/
if ((wk = LIST_FIRST(&bp->b_dep)) == NULL ||
wk->wk_type != D_BMSAFEMAP ||
- (bp->b_xflags & BX_BKGRDINPROG)) {
+ (bp->b_vflags & BV_BKGRDINPROG)) {
BUF_UNLOCK(bp);
- VI_LOCK(vp);
continue;
}
+ VI_UNLOCK(vp);
bremfree(bp);
FREE_LOCK(&lk);
(void) bawrite(bp);
@@ -5803,21 +5801,31 @@ getdirtybuf(bpp, waitfor)
struct buf *bp;
int error;
+ /*
+ * XXX This code and the code that calls it need to be reviewed to
+ * verify its use of the vnode interlock.
+ */
+
for (;;) {
if ((bp = *bpp) == NULL)
return (0);
- /* XXX Probably needs interlock */
+ VI_LOCK(bp->b_vp);
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) == 0) {
- if ((bp->b_xflags & BX_BKGRDINPROG) == 0)
+ if ((bp->b_vflags & BV_BKGRDINPROG) == 0) {
+ VI_UNLOCK(bp->b_vp);
break;
+ }
BUF_UNLOCK(bp);
- if (waitfor != MNT_WAIT)
+ if (waitfor != MNT_WAIT) {
+ VI_UNLOCK(bp->b_vp);
return (0);
- bp->b_xflags |= BX_BKGRDWAIT;
- interlocked_sleep(&lk, SLEEP, &bp->b_xflags, NULL,
- PRIBIO, "getbuf", 0);
+ }
+ bp->b_vflags |= BV_BKGRDWAIT;
+ interlocked_sleep(&lk, SLEEP, &bp->b_xflags,
+ VI_MTX(bp->b_vp), PRIBIO|PDROP, "getbuf", 0);
continue;
}
+ VI_UNLOCK(bp->b_vp);
if (waitfor != MNT_WAIT)
return (0);
error = interlocked_sleep(&lk, LOCKBUF, bp, NULL,
OpenPOWER on IntegriCloud