summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/kern/vfs_bio.c151
-rw-r--r--sys/kern/vfs_cluster.c18
-rw-r--r--sys/sys/buf.h13
-rw-r--r--sys/ufs/ffs/ffs_softdep.c32
4 files changed, 117 insertions, 97 deletions
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 663a504..296c9a6 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -164,7 +164,7 @@ SYSCTL_INT(_vfs, OID_AUTO, getnewbufrestarts, CTLFLAG_RW, &getnewbufrestarts, 0,
"Number of times getnewbuf has had to restart a buffer aquisition");
static int dobkgrdwrite = 1;
SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0,
- "Do background writes (honoring the BX_BKGRDWRITE flag)?");
+ "Do background writes (honoring the BV_BKGRDWRITE flag)?");
/*
* Wakeup point for bufdaemon, as well as indicator of whether it is already
@@ -223,14 +223,13 @@ static struct mtx bdonelock;
/*
* Definitions for the buffer free lists.
*/
-#define BUFFER_QUEUES 6 /* number of free buffer queues */
+#define BUFFER_QUEUES 5 /* number of free buffer queues */
#define QUEUE_NONE 0 /* on no queue */
-#define QUEUE_LOCKED 1 /* locked buffers */
-#define QUEUE_CLEAN 2 /* non-B_DELWRI buffers */
-#define QUEUE_DIRTY 3 /* B_DELWRI buffers */
-#define QUEUE_EMPTYKVA 4 /* empty buffer headers w/KVA assignment */
-#define QUEUE_EMPTY 5 /* empty buffer headers */
+#define QUEUE_CLEAN 1 /* non-B_DELWRI buffers */
+#define QUEUE_DIRTY 2 /* B_DELWRI buffers */
+#define QUEUE_EMPTYKVA 3 /* empty buffer headers w/KVA assignment */
+#define QUEUE_EMPTY 4 /* empty buffer headers */
/* Queues for free buffers with various properties */
static TAILQ_HEAD(bqueues, buf) bufqueues[BUFFER_QUEUES] = { { 0 } };
@@ -777,17 +776,19 @@ bwrite(struct buf * bp)
* writing this block if it is asynchronous. Otherwise
* wait for the background write to complete.
*/
- if (bp->b_xflags & BX_BKGRDINPROG) {
+ VI_LOCK(bp->b_vp);
+ if (bp->b_vflags & BV_BKGRDINPROG) {
if (bp->b_flags & B_ASYNC) {
splx(s);
bdwrite(bp);
return (0);
}
- bp->b_xflags |= BX_BKGRDWAIT;
- tsleep(&bp->b_xflags, PRIBIO, "bwrbg", 0);
- if (bp->b_xflags & BX_BKGRDINPROG)
+ bp->b_vflags |= BV_BKGRDWAIT;
+ msleep(&bp->b_xflags, VI_MTX(bp->b_vp), PRIBIO, "bwrbg", 0);
+ if (bp->b_vflags & BV_BKGRDINPROG)
panic("bwrite: still writing");
}
+ VI_UNLOCK(bp->b_vp);
/* Mark the buffer clean */
bundirty(bp);
@@ -820,8 +821,8 @@ bwrite(struct buf * bp)
memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
newbp->b_lblkno = bp->b_lblkno;
newbp->b_xflags |= BX_BKGRDMARKER;
- /* XXX The BX_ flags need to be protected as well */
VI_LOCK(bp->b_vp);
+ bp->b_vflags |= BV_BKGRDINPROG;
bgetvp(bp->b_vp, newbp);
VI_UNLOCK(bp->b_vp);
newbp->b_blkno = bp->b_blkno;
@@ -842,8 +843,6 @@ bwrite(struct buf * bp)
* If the reconstituted buffer were written, we could end up
* with two background copies being written at the same time.
*/
- bp->b_xflags |= BX_BKGRDINPROG;
- bp->b_flags |= B_LOCKED;
bqrelse(bp);
bp = newbp;
}
@@ -906,38 +905,29 @@ vfs_backgroundwritedone(bp)
VI_LOCK(bp->b_vp);
if ((origbp = gbincore(bp->b_vp, bp->b_lblkno)) == NULL)
panic("backgroundwritedone: lost buffer");
- VI_UNLOCK(bp->b_vp);
- /*
- * Process dependencies then return any unfinished ones.
- */
- if (LIST_FIRST(&bp->b_dep) != NULL)
- buf_complete(bp);
- if (LIST_FIRST(&bp->b_dep) != NULL)
- buf_movedeps(bp, origbp);
- /* XXX Find out if origbp can disappear or get inconsistent */
/*
- * Clear the BX_BKGRDINPROG flag in the original buffer
+ * Clear the BV_BKGRDINPROG flag in the original buffer
* and awaken it if it is waiting for the write to complete.
- * If BX_BKGRDINPROG is not set in the original buffer it must
+ * If BV_BKGRDINPROG is not set in the original buffer it must
* have been released and re-instantiated - which is not legal.
*/
- KASSERT((origbp->b_xflags & BX_BKGRDINPROG),
+ KASSERT((origbp->b_vflags & BV_BKGRDINPROG),
("backgroundwritedone: lost buffer2"));
- origbp->b_xflags &= ~BX_BKGRDINPROG;
- if (origbp->b_xflags & BX_BKGRDWAIT) {
- origbp->b_xflags &= ~BX_BKGRDWAIT;
+ origbp->b_vflags &= ~BV_BKGRDINPROG;
+ if (origbp->b_vflags & BV_BKGRDWAIT) {
+ origbp->b_vflags &= ~BV_BKGRDWAIT;
wakeup(&origbp->b_xflags);
}
+ VI_UNLOCK(bp->b_vp);
/*
- * Clear the B_LOCKED flag and remove it from the locked
- * queue if it currently resides there.
+ * Process dependencies then return any unfinished ones.
*/
- origbp->b_flags &= ~B_LOCKED;
- if (BUF_LOCK(origbp, LK_EXCLUSIVE | LK_NOWAIT, NULL) == 0) {
- bremfree(origbp);
- bqrelse(origbp);
- }
+ if (LIST_FIRST(&bp->b_dep) != NULL)
+ buf_complete(bp);
+ if (LIST_FIRST(&bp->b_dep) != NULL)
+ buf_movedeps(bp, origbp);
+
/*
* This buffer is marked B_NOCACHE, so when it is released
* by biodone, it will be tossed. We mark it with BIO_READ
@@ -997,7 +987,7 @@ bdwrite(struct buf * bp)
* Try to find a buffer to flush.
*/
TAILQ_FOREACH(nbp, &vp->v_dirtyblkhd, b_vnbufs) {
- if ((nbp->b_xflags & BX_BKGRDINPROG) ||
+ if ((nbp->b_vflags & BV_BKGRDINPROG) ||
buf_countdeps(nbp, 0) ||
BUF_LOCK(nbp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
continue;
@@ -1207,9 +1197,6 @@ brelse(struct buf * bp)
s = splbio();
- if (bp->b_flags & B_LOCKED)
- bp->b_ioflags &= ~BIO_ERROR;
-
if (bp->b_iocmd == BIO_WRITE &&
(bp->b_ioflags & BIO_ERROR) &&
!(bp->b_flags & B_INVAL)) {
@@ -1259,8 +1246,17 @@ brelse(struct buf * bp)
*/
if (bp->b_flags & B_DELWRI)
bp->b_flags &= ~B_RELBUF;
- else if (vm_page_count_severe() && !(bp->b_xflags & BX_BKGRDINPROG))
- bp->b_flags |= B_RELBUF;
+ else if (vm_page_count_severe()) {
+ /*
+ * XXX This lock may not be necessary since BKGRDINPROG
+ * cannot be set while we hold the buf lock, it can only be
+ * cleared if it is already pending.
+ */
+ VI_LOCK(bp->b_vp);
+ if (!(bp->b_vflags & BV_BKGRDINPROG))
+ bp->b_flags |= B_RELBUF;
+ VI_UNLOCK(bp->b_vp);
+ }
/*
* VMIO buffer rundown. It is not very necessary to keep a VMIO buffer
@@ -1389,7 +1385,7 @@ brelse(struct buf * bp)
if (bp->b_bufsize == 0) {
bp->b_flags |= B_INVAL;
bp->b_xflags &= ~(BX_BKGRDWRITE | BX_ALTDATA);
- if (bp->b_xflags & BX_BKGRDINPROG)
+ if (bp->b_vflags & BV_BKGRDINPROG)
panic("losing buffer 1");
if (bp->b_kvasize) {
bp->b_qindex = QUEUE_EMPTYKVA;
@@ -1403,17 +1399,11 @@ brelse(struct buf * bp)
(bp->b_ioflags & BIO_ERROR)) {
bp->b_flags |= B_INVAL;
bp->b_xflags &= ~(BX_BKGRDWRITE | BX_ALTDATA);
- if (bp->b_xflags & BX_BKGRDINPROG)
+ if (bp->b_vflags & BV_BKGRDINPROG)
panic("losing buffer 2");
bp->b_qindex = QUEUE_CLEAN;
TAILQ_INSERT_HEAD(&bufqueues[QUEUE_CLEAN], bp, b_freelist);
bp->b_dev = NODEV;
-
- /* buffers that are locked */
- } else if (bp->b_flags & B_LOCKED) {
- bp->b_qindex = QUEUE_LOCKED;
- TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LOCKED], bp, b_freelist);
-
/* remaining buffers */
} else {
if (bp->b_flags & B_DELWRI)
@@ -1447,7 +1437,7 @@ brelse(struct buf * bp)
* if B_INVAL is set ).
*/
- if ((bp->b_flags & B_LOCKED) == 0 && !(bp->b_flags & B_DELWRI))
+ if (!(bp->b_flags & B_DELWRI))
bufcountwakeup();
/*
@@ -1493,34 +1483,38 @@ bqrelse(struct buf * bp)
return;
}
mtx_lock(&bqlock);
- if (bp->b_flags & B_LOCKED) {
- bp->b_ioflags &= ~BIO_ERROR;
- bp->b_qindex = QUEUE_LOCKED;
- TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LOCKED], bp, b_freelist);
- /* buffers with stale but valid contents */
- } else if (bp->b_flags & B_DELWRI) {
+ /* buffers with stale but valid contents */
+ if (bp->b_flags & B_DELWRI) {
bp->b_qindex = QUEUE_DIRTY;
TAILQ_INSERT_TAIL(&bufqueues[QUEUE_DIRTY], bp, b_freelist);
- } else if (vm_page_count_severe()) {
+ } else {
/*
- * We are too low on memory, we have to try to free the
- * buffer (most importantly: the wired pages making up its
- * backing store) *now*.
+ * XXX This lock may not be necessary since BKGRDINPROG
+ * cannot be set while we hold the buf lock, it can only be
+ * cleared if it is already pending.
*/
- mtx_unlock(&bqlock);
- splx(s);
- brelse(bp);
- return;
- } else {
- bp->b_qindex = QUEUE_CLEAN;
- TAILQ_INSERT_TAIL(&bufqueues[QUEUE_CLEAN], bp, b_freelist);
+ VI_LOCK(bp->b_vp);
+ if (!vm_page_count_severe() || bp->b_vflags & BV_BKGRDINPROG) {
+ VI_UNLOCK(bp->b_vp);
+ bp->b_qindex = QUEUE_CLEAN;
+ TAILQ_INSERT_TAIL(&bufqueues[QUEUE_CLEAN], bp,
+ b_freelist);
+ } else {
+ /*
+ * We are too low on memory, we have to try to free
+ * the buffer (most importantly: the wired pages
+ * making up its backing store) *now*.
+ */
+ mtx_unlock(&bqlock);
+ splx(s);
+ brelse(bp);
+ return;
+ }
}
mtx_unlock(&bqlock);
- if ((bp->b_flags & B_LOCKED) == 0 &&
- ((bp->b_flags & B_INVAL) || !(bp->b_flags & B_DELWRI))) {
+ if ((bp->b_flags & B_INVAL) || !(bp->b_flags & B_DELWRI))
bufcountwakeup();
- }
/*
* Something we can maybe free or reuse.
@@ -1826,6 +1820,14 @@ restart:
break;
}
}
+ if (bp->b_vp) {
+ VI_LOCK(bp->b_vp);
+ if (bp->b_vflags & BV_BKGRDINPROG) {
+ VI_UNLOCK(bp->b_vp);
+ continue;
+ }
+ VI_UNLOCK(bp->b_vp);
+ }
/*
* Sanity Checks
@@ -1887,7 +1889,7 @@ restart:
}
if (LIST_FIRST(&bp->b_dep) != NULL)
buf_deallocate(bp);
- if (bp->b_xflags & BX_BKGRDINPROG)
+ if (bp->b_vflags & BV_BKGRDINPROG)
panic("losing buffer 3");
if (bp->b_bufsize)
@@ -2136,10 +2138,13 @@ flushbufqueues(int flushdeps)
continue;
KASSERT((bp->b_flags & B_DELWRI),
("unexpected clean buffer %p", bp));
- if ((bp->b_xflags & BX_BKGRDINPROG) != 0) {
+ VI_LOCK(bp->b_vp);
+ if ((bp->b_vflags & BV_BKGRDINPROG) != 0) {
+ VI_UNLOCK(bp->b_vp);
BUF_UNLOCK(bp);
continue;
}
+ VI_UNLOCK(bp->b_vp);
if (bp->b_flags & B_INVAL) {
bremfreel(bp);
mtx_unlock(&bqlock);
diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c
index ad0b55d..89de2ef 100644
--- a/sys/kern/vfs_cluster.c
+++ b/sys/kern/vfs_cluster.c
@@ -399,11 +399,15 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
* VMIO backed. The clustering code can only deal
* with VMIO-backed buffers.
*/
- if ((tbp->b_flags & (B_CACHE|B_LOCKED)) ||
- (tbp->b_flags & B_VMIO) == 0) {
+ VI_LOCK(bp->b_vp);
+ if ((tbp->b_vflags & BV_BKGRDINPROG) ||
+ (tbp->b_flags & B_CACHE) ||
+ (tbp->b_flags & B_VMIO) == 0) {
+ VI_UNLOCK(bp->b_vp);
bqrelse(tbp);
break;
}
+ VI_UNLOCK(bp->b_vp);
/*
* The buffer must be completely invalid in order to
@@ -768,7 +772,8 @@ cluster_wbuild(vp, size, start_lbn, len)
* partake in the clustered write.
*/
VI_LOCK(vp);
- if ((tbp = gbincore(vp, start_lbn)) == NULL) {
+ if ((tbp = gbincore(vp, start_lbn)) == NULL ||
+ (tbp->b_vflags & BV_BKGRDINPROG)) {
VI_UNLOCK(vp);
++start_lbn;
--len;
@@ -782,8 +787,7 @@ cluster_wbuild(vp, size, start_lbn, len)
splx(s);
continue;
}
- if ((tbp->b_flags & (B_LOCKED | B_INVAL | B_DELWRI)) !=
- B_DELWRI) {
+ if ((tbp->b_flags & (B_INVAL | B_DELWRI)) != B_DELWRI) {
BUF_UNLOCK(tbp);
++start_lbn;
--len;
@@ -857,7 +861,8 @@ cluster_wbuild(vp, size, start_lbn, len)
* can't need to be written.
*/
VI_LOCK(vp);
- if ((tbp = gbincore(vp, start_lbn)) == NULL) {
+ if ((tbp = gbincore(vp, start_lbn)) == NULL ||
+ (tbp->b_vflags & BV_BKGRDINPROG)) {
VI_UNLOCK(vp);
splx(s);
break;
@@ -881,7 +886,6 @@ cluster_wbuild(vp, size, start_lbn, len)
B_INVAL | B_DELWRI | B_NEEDCOMMIT))
!= (B_DELWRI | B_CLUSTEROK |
(bp->b_flags & (B_VMIO | B_NEEDCOMMIT))) ||
- (tbp->b_flags & B_LOCKED) ||
tbp->b_wcred != bp->b_wcred) {
BUF_UNLOCK(tbp);
splx(s);
diff --git a/sys/sys/buf.h b/sys/sys/buf.h
index 73baef1..3965764 100644
--- a/sys/sys/buf.h
+++ b/sys/sys/buf.h
@@ -217,7 +217,7 @@ struct buf {
#define B_00000800 0x00000800 /* Availabel flag. */
#define B_00001000 0x00001000 /* Available flag. */
#define B_INVAL 0x00002000 /* Does not contain valid info. */
-#define B_LOCKED 0x00004000 /* Locked in core (not reusable). */
+#define B_00004000 0x00004000 /* Available flag. */
#define B_NOCACHE 0x00008000 /* Do not cache block after use. */
#define B_MALLOC 0x00010000 /* malloced b_data */
#define B_CLUSTEROK 0x00020000 /* Pagein op, so swap() can count it. */
@@ -247,15 +247,18 @@ struct buf {
*/
#define BX_VNDIRTY 0x00000001 /* On vnode dirty list */
#define BX_VNCLEAN 0x00000002 /* On vnode clean list */
-#define BX_BKGRDWRITE 0x00000004 /* Do writes in background */
-#define BX_BKGRDINPROG 0x00000008 /* Background write in progress */
-#define BX_BKGRDWAIT 0x00000010 /* Background write waiting */
+#define BX_BKGRDWRITE 0x00000010 /* Do writes in background */
#define BX_BKGRDMARKER 0x00000020 /* Mark buffer for splay tree */
#define BX_ALTDATA 0x00000040 /* Holds extended data */
#define NOOFFSET (-1LL) /* No buffer offset calculated yet */
-#define BV_SCANNED 0x00001000 /* VOP_FSYNC funcs mark written bufs */
+/*
+ * These flags are kept in b_vflags.
+ */
+#define BV_SCANNED 0x00000001 /* VOP_FSYNC funcs mark written bufs */
+#define BV_BKGRDINPROG 0x00000002 /* Background write in progress */
+#define BV_BKGRDWAIT 0x00000004 /* Background write waiting */
#ifdef _KERNEL
/*
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index df14940..9ca5686 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -4892,11 +4892,9 @@ softdep_fsync_mountdev(vp)
/*
* If it is already scheduled, skip to the next buffer.
*/
- if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK,
- VI_MTX(vp))) {
- VI_LOCK(vp);
+ if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
continue;
- }
+
if ((bp->b_flags & B_DELWRI) == 0) {
FREE_LOCK(&lk);
panic("softdep_fsync_mountdev: not dirty");
@@ -4907,11 +4905,11 @@ softdep_fsync_mountdev(vp)
*/
if ((wk = LIST_FIRST(&bp->b_dep)) == NULL ||
wk->wk_type != D_BMSAFEMAP ||
- (bp->b_xflags & BX_BKGRDINPROG)) {
+ (bp->b_vflags & BV_BKGRDINPROG)) {
BUF_UNLOCK(bp);
- VI_LOCK(vp);
continue;
}
+ VI_UNLOCK(vp);
bremfree(bp);
FREE_LOCK(&lk);
(void) bawrite(bp);
@@ -5803,21 +5801,31 @@ getdirtybuf(bpp, waitfor)
struct buf *bp;
int error;
+ /*
+ * XXX This code and the code that calls it need to be reviewed to
+ * verify its use of the vnode interlock.
+ */
+
for (;;) {
if ((bp = *bpp) == NULL)
return (0);
- /* XXX Probably needs interlock */
+ VI_LOCK(bp->b_vp);
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) == 0) {
- if ((bp->b_xflags & BX_BKGRDINPROG) == 0)
+ if ((bp->b_vflags & BV_BKGRDINPROG) == 0) {
+ VI_UNLOCK(bp->b_vp);
break;
+ }
BUF_UNLOCK(bp);
- if (waitfor != MNT_WAIT)
+ if (waitfor != MNT_WAIT) {
+ VI_UNLOCK(bp->b_vp);
return (0);
- bp->b_xflags |= BX_BKGRDWAIT;
- interlocked_sleep(&lk, SLEEP, &bp->b_xflags, NULL,
- PRIBIO, "getbuf", 0);
+ }
+ bp->b_vflags |= BV_BKGRDWAIT;
+ interlocked_sleep(&lk, SLEEP, &bp->b_xflags,
+ VI_MTX(bp->b_vp), PRIBIO|PDROP, "getbuf", 0);
continue;
}
+ VI_UNLOCK(bp->b_vp);
if (waitfor != MNT_WAIT)
return (0);
error = interlocked_sleep(&lk, LOCKBUF, bp, NULL,
OpenPOWER on IntegriCloud