- Remove a race between fsync like functions and flushbufqueues() by

requiring locked bufs in vfs_bio_awrite(). Previously the buf could have been written out by fsync before we acquired the buf lock if it weren't for giant. The cluster_wbuild() handles this race properly but the single write at the end of vfs_bio_awrite() would not. - Modify flushbufqueues() so there is only one copy of the loop. Pass a parameter in that says whether or not we should sync bufs with deps. - Call flushbufqueues() a second time and then break if we couldn't find any bufs without deps.
author: jeff <jeff@FreeBSD.org> 2003-03-13 07:19:23 +0000
committer: jeff <jeff@FreeBSD.org> 2003-03-13 07:19:23 +0000
commit: ae3c8799daed67832db23a2957d5f5e47250cad9 (patch)
tree: 42c64ebfa800452a8af9d33feb3e347c047240a9 /sys/kern/vfs_bio.c
parent: 8ee94afd309724ae0e8434dc6ff0158da0d7addf (diff)
download: FreeBSD-src-ae3c8799daed67832db23a2957d5f5e47250cad9.zip
FreeBSD-src-ae3c8799daed67832db23a2957d5f5e47250cad9.tar.gz
1 files changed, 32 insertions, 44 deletions
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index a2807e9..7785e9d 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -81,7 +81,7 @@ static void vfs_vmio_release(struct buf *bp);
 static void vfs_backgroundwritedone(struct buf *bp);
 static int vfs_bio_clcheck(struct vnode *vp, int size,
 		daddr_t lblkno, daddr_t blkno);
-static int flushbufqueues(void);
+static int flushbufqueues(int flushdeps);
 static void buf_daemon(void);
 void bremfreel(struct buf * bp);
 
@@ -994,7 +994,6 @@ bdwrite(struct buf * bp)
 				panic("bdwrite: found ourselves");
 			VI_UNLOCK(vp);
 			if (nbp->b_flags & B_CLUSTEROK) {
-				BUF_UNLOCK(nbp);
 				vfs_bio_awrite(nbp);
 			} else {
 				bremfree(nbp);
@@ -1672,13 +1671,13 @@ vfs_bio_awrite(struct buf * bp)
 		 * this is a possible cluster write
 		 */
 		if (ncl != 1) {
+			BUF_UNLOCK(bp);
 			nwritten = cluster_wbuild(vp, size, lblkno - j, ncl);
 			splx(s);
 			return nwritten;
 		}
 	}
 
-	BUF_LOCK(bp, LK_EXCLUSIVE, NULL);
 	bremfree(bp);
 	bp->b_flags |= B_ASYNC;
 
@@ -2051,8 +2050,15 @@ buf_daemon()
 		 * normally would so they can run in parallel with our drain.
 		 */
 		while (numdirtybuffers > lodirtybuffers) {
-			if (flushbufqueues() == 0)
+			if (flushbufqueues(0) == 0) {
+				/*
+				 * Could not find any buffers without rollback
+				 * dependencies, so just write the first one
+				 * in the hopes of eventually making progress.
+				 */
+				flushbufqueues(1);
 				break;
+			}
 			waitrunningbufspace();
 			numdirtywakeup((lodirtybuffers + hidirtybuffers) / 2);
 		}
@@ -2098,66 +2104,47 @@ int flushwithdeps = 0;
 SYSCTL_INT(_vfs, OID_AUTO, flushwithdeps, CTLFLAG_RW, &flushwithdeps,
     0, "Number of buffers flushed with dependecies that require rollbacks");
 static int
-flushbufqueues(void)
+flushbufqueues(int flushdeps)
 {
 	struct thread *td = curthread;
 	struct vnode *vp;
 	struct buf *bp;
+	int hasdeps;
 
 	mtx_lock(&bqlock);
 	TAILQ_FOREACH(bp, &bufqueues[QUEUE_DIRTY], b_freelist) {
-		KASSERT((bp->b_flags & B_DELWRI),
-		    ("unexpected clean buffer %p", bp));
-		if ((bp->b_xflags & BX_BKGRDINPROG) != 0)
-			continue;
-		if (bp->b_flags & B_INVAL) {
-			if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0)
-				panic("flushbufqueues: locked buf");
-			bremfreel(bp);
-			mtx_unlock(&bqlock);
-			brelse(bp);
-			return (1);
-		}
-		if (LIST_FIRST(&bp->b_dep) != NULL && buf_countdeps(bp, 0))
+		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0)
 			continue;
-		/*
-		 * We must hold the lock on a vnode before writing
-		 * one of its buffers. Otherwise we may confuse, or
-		 * in the case of a snapshot vnode, deadlock the
-		 * system.
-		 */
-		if ((vp = bp->b_vp) == NULL ||
-		    vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT, td) == 0) {
-			mtx_unlock(&bqlock);
-			vfs_bio_awrite(bp);
-			if (vp != NULL)
-				VOP_UNLOCK(vp, 0, td);
-			return (1);
-		}
-	}
-	/*
-	 * Could not find any buffers without rollback dependencies,
-	 * so just write the first one in the hopes of eventually
-	 * making progress.
-	 */
-	TAILQ_FOREACH(bp, &bufqueues[QUEUE_DIRTY], b_freelist) {
 		KASSERT((bp->b_flags & B_DELWRI),
 		    ("unexpected clean buffer %p", bp));
-		if ((bp->b_xflags & BX_BKGRDINPROG) != 0)
+		if ((bp->b_xflags & BX_BKGRDINPROG) != 0) {
+			BUF_UNLOCK(bp);
 			continue;
+		}
 		if (bp->b_flags & B_INVAL) {
-			if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0)
-				panic("flushbufqueues: locked buf");
 			bremfreel(bp);
 			mtx_unlock(&bqlock);
 			brelse(bp);
 			return (1);
 		}
+
+		if (LIST_FIRST(&bp->b_dep) != NULL && buf_countdeps(bp, 0)) {
+			if (flushdeps == 0) {
+				BUF_UNLOCK(bp);
+				continue;
+			}
+			hasdeps = 1;
+		} else
+			hasdeps = 0;
 		/*
 		 * We must hold the lock on a vnode before writing
 		 * one of its buffers. Otherwise we may confuse, or
 		 * in the case of a snapshot vnode, deadlock the
 		 * system.
+		 *
+		 * The lock order here is the reverse of the normal
+		 * of vnode followed by buf lock.  This is ok because
+		 * the NOWAIT will prevent deadlock.
 		 */
 		if ((vp = bp->b_vp) == NULL ||
 		    vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT, td) == 0) {
@@ -2165,9 +2152,10 @@ flushbufqueues(void)
 			vfs_bio_awrite(bp);
 			if (vp != NULL)
 				VOP_UNLOCK(vp, 0, td);
-			flushwithdeps += 1;
-			return (0);
+			flushwithdeps += hasdeps;
+			return (1);
 		}
+		BUF_UNLOCK(bp);
 	}
 	mtx_unlock(&bqlock);
 	return (0);
author	jeff <jeff@FreeBSD.org>	2003-03-13 07:19:23 +0000
committer	jeff <jeff@FreeBSD.org>	2003-03-13 07:19:23 +0000
commit	ae3c8799daed67832db23a2957d5f5e47250cad9 (patch)
tree	42c64ebfa800452a8af9d33feb3e347c047240a9 /sys/kern/vfs_bio.c
parent	8ee94afd309724ae0e8434dc6ff0158da0d7addf (diff)
download	FreeBSD-src-ae3c8799daed67832db23a2957d5f5e47250cad9.zip FreeBSD-src-ae3c8799daed67832db23a2957d5f5e47250cad9.tar.gz