summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjeff <jeff@FreeBSD.org>2003-03-13 07:19:23 +0000
committerjeff <jeff@FreeBSD.org>2003-03-13 07:19:23 +0000
commitae3c8799daed67832db23a2957d5f5e47250cad9 (patch)
tree42c64ebfa800452a8af9d33feb3e347c047240a9
parent8ee94afd309724ae0e8434dc6ff0158da0d7addf (diff)
downloadFreeBSD-src-ae3c8799daed67832db23a2957d5f5e47250cad9.zip
FreeBSD-src-ae3c8799daed67832db23a2957d5f5e47250cad9.tar.gz
- Remove a race between fsync like functions and flushbufqueues() by
requiring locked bufs in vfs_bio_awrite(). Previously the buf could have been written out by fsync before we acquired the buf lock if it weren't for giant. The cluster_wbuild() handles this race properly but the single write at the end of vfs_bio_awrite() would not. - Modify flushbufqueues() so there is only one copy of the loop. Pass a parameter in that says whether or not we should sync bufs with deps. - Call flushbufqueues() a second time and then break if we couldn't find any bufs without deps.
-rw-r--r--sys/kern/vfs_bio.c76
-rw-r--r--sys/kern/vfs_default.c1
-rw-r--r--sys/kern/vfs_subr.c1
-rw-r--r--sys/ufs/ffs/ffs_vnops.c6
4 files changed, 34 insertions, 50 deletions
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index a2807e9..7785e9d 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -81,7 +81,7 @@ static void vfs_vmio_release(struct buf *bp);
static void vfs_backgroundwritedone(struct buf *bp);
static int vfs_bio_clcheck(struct vnode *vp, int size,
daddr_t lblkno, daddr_t blkno);
-static int flushbufqueues(void);
+static int flushbufqueues(int flushdeps);
static void buf_daemon(void);
void bremfreel(struct buf * bp);
@@ -994,7 +994,6 @@ bdwrite(struct buf * bp)
panic("bdwrite: found ourselves");
VI_UNLOCK(vp);
if (nbp->b_flags & B_CLUSTEROK) {
- BUF_UNLOCK(nbp);
vfs_bio_awrite(nbp);
} else {
bremfree(nbp);
@@ -1672,13 +1671,13 @@ vfs_bio_awrite(struct buf * bp)
* this is a possible cluster write
*/
if (ncl != 1) {
+ BUF_UNLOCK(bp);
nwritten = cluster_wbuild(vp, size, lblkno - j, ncl);
splx(s);
return nwritten;
}
}
- BUF_LOCK(bp, LK_EXCLUSIVE, NULL);
bremfree(bp);
bp->b_flags |= B_ASYNC;
@@ -2051,8 +2050,15 @@ buf_daemon()
* normally would so they can run in parallel with our drain.
*/
while (numdirtybuffers > lodirtybuffers) {
- if (flushbufqueues() == 0)
+ if (flushbufqueues(0) == 0) {
+ /*
+ * Could not find any buffers without rollback
+ * dependencies, so just write the first one
+ * in the hopes of eventually making progress.
+ */
+ flushbufqueues(1);
break;
+ }
waitrunningbufspace();
numdirtywakeup((lodirtybuffers + hidirtybuffers) / 2);
}
@@ -2098,66 +2104,47 @@ int flushwithdeps = 0;
SYSCTL_INT(_vfs, OID_AUTO, flushwithdeps, CTLFLAG_RW, &flushwithdeps,
0, "Number of buffers flushed with dependecies that require rollbacks");
static int
-flushbufqueues(void)
+flushbufqueues(int flushdeps)
{
struct thread *td = curthread;
struct vnode *vp;
struct buf *bp;
+ int hasdeps;
mtx_lock(&bqlock);
TAILQ_FOREACH(bp, &bufqueues[QUEUE_DIRTY], b_freelist) {
- KASSERT((bp->b_flags & B_DELWRI),
- ("unexpected clean buffer %p", bp));
- if ((bp->b_xflags & BX_BKGRDINPROG) != 0)
- continue;
- if (bp->b_flags & B_INVAL) {
- if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0)
- panic("flushbufqueues: locked buf");
- bremfreel(bp);
- mtx_unlock(&bqlock);
- brelse(bp);
- return (1);
- }
- if (LIST_FIRST(&bp->b_dep) != NULL && buf_countdeps(bp, 0))
+ if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0)
continue;
- /*
- * We must hold the lock on a vnode before writing
- * one of its buffers. Otherwise we may confuse, or
- * in the case of a snapshot vnode, deadlock the
- * system.
- */
- if ((vp = bp->b_vp) == NULL ||
- vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT, td) == 0) {
- mtx_unlock(&bqlock);
- vfs_bio_awrite(bp);
- if (vp != NULL)
- VOP_UNLOCK(vp, 0, td);
- return (1);
- }
- }
- /*
- * Could not find any buffers without rollback dependencies,
- * so just write the first one in the hopes of eventually
- * making progress.
- */
- TAILQ_FOREACH(bp, &bufqueues[QUEUE_DIRTY], b_freelist) {
KASSERT((bp->b_flags & B_DELWRI),
("unexpected clean buffer %p", bp));
- if ((bp->b_xflags & BX_BKGRDINPROG) != 0)
+ if ((bp->b_xflags & BX_BKGRDINPROG) != 0) {
+ BUF_UNLOCK(bp);
continue;
+ }
if (bp->b_flags & B_INVAL) {
- if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0)
- panic("flushbufqueues: locked buf");
bremfreel(bp);
mtx_unlock(&bqlock);
brelse(bp);
return (1);
}
+
+ if (LIST_FIRST(&bp->b_dep) != NULL && buf_countdeps(bp, 0)) {
+ if (flushdeps == 0) {
+ BUF_UNLOCK(bp);
+ continue;
+ }
+ hasdeps = 1;
+ } else
+ hasdeps = 0;
/*
* We must hold the lock on a vnode before writing
* one of its buffers. Otherwise we may confuse, or
* in the case of a snapshot vnode, deadlock the
* system.
+ *
+ * The lock order here is the reverse of the normal
+ * of vnode followed by buf lock. This is ok because
+ * the NOWAIT will prevent deadlock.
*/
if ((vp = bp->b_vp) == NULL ||
vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT, td) == 0) {
@@ -2165,9 +2152,10 @@ flushbufqueues(void)
vfs_bio_awrite(bp);
if (vp != NULL)
VOP_UNLOCK(vp, 0, td);
- flushwithdeps += 1;
- return (0);
+ flushwithdeps += hasdeps;
+ return (1);
}
+ BUF_UNLOCK(bp);
}
mtx_unlock(&bqlock);
return (0);
diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c
index 1921abf..58f1a86 100644
--- a/sys/kern/vfs_default.c
+++ b/sys/kern/vfs_default.c
@@ -749,7 +749,6 @@ loop2:
if ((bp->b_flags & B_DELWRI) == 0)
panic("fsync: not dirty");
if ((vp->v_vflag & VV_OBJBUF) && (bp->b_flags & B_CLUSTEROK)) {
- BUF_UNLOCK(bp);
vfs_bio_awrite(bp);
splx(s);
} else {
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 5f4b678..3cedcb4 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -1240,7 +1240,6 @@ flushbuflist(blist, flags, vp, slpflag, slptimeo, errorp)
if (bp->b_vp == vp) {
if (bp->b_flags & B_CLUSTEROK) {
- BUF_UNLOCK(bp);
vfs_bio_awrite(bp);
} else {
bremfree(bp);
diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c
index cc6debc..8aa3db4 100644
--- a/sys/ufs/ffs/ffs_vnops.c
+++ b/sys/ufs/ffs/ffs_vnops.c
@@ -227,7 +227,6 @@ loop:
*/
if (passes > 0 || !wait) {
if ((bp->b_flags & B_CLUSTEROK) && !wait) {
- BUF_UNLOCK(bp);
(void) vfs_bio_awrite(bp);
} else {
bremfree(bp);
@@ -252,10 +251,9 @@ loop:
splx(s);
brelse(bp);
s = splbio();
- } else {
- BUF_UNLOCK(bp);
+ } else
vfs_bio_awrite(bp);
- }
+
/*
* Since we may have slept during the I/O, we need
* to start from a known point.
OpenPOWER on IntegriCloud