Change the write-behind code to take more care when starting

async I/O's. The sequential read heuristic has been extended to cover writes as well. We continue to call cluster_write() normally, thus blocks in the file will still be reallocated for large (but still random) I/O's, but I/O will only be initiated for truely sequential writes. This solves a number of annoying situations, especially with DBM (hash method) writes, and also has the side effect of fixing a number of (stupid) benchmarks. Reviewed-by: mckusick
author: dillon <dillon@FreeBSD.org> 2000-04-02 00:55:28 +0000
committer: dillon <dillon@FreeBSD.org> 2000-04-02 00:55:28 +0000
commit: 057e33d02ca1d636be1b99e212ebb7911cf4fc62 (patch)
tree: c8d43beb4d47e12029b09bb06c919f2a25c818e1 /sys/kern/vfs_cluster.c
parent: 0589fa18882d56c333936da3d552f6f1fb8a3fab (diff)
download: FreeBSD-src-057e33d02ca1d636be1b99e212ebb7911cf4fc62.zip
FreeBSD-src-057e33d02ca1d636be1b99e212ebb7911cf4fc62.tar.gz
1 files changed, 29 insertions, 8 deletions
diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c
index ac14bc4..d7e6f61 100644
--- a/sys/kern/vfs_cluster.c
+++ b/sys/kern/vfs_cluster.c
@@ -533,9 +533,10 @@ cluster_wbuild_wb(struct vnode *vp, long size, daddr_t start_lbn, int len)
  *	4.	end of a cluster - asynchronously write cluster
  */
 void
-cluster_write(bp, filesize)
+cluster_write(bp, filesize, seqcount)
 	struct buf *bp;
 	u_quad_t filesize;
+	int seqcount;
 {
 	struct vnode *vp;
 	daddr_t lbn;
@@ -570,13 +571,21 @@ cluster_write(bp, filesize)
 			 * write, or we have reached our maximum cluster size,
 			 * then push the previous cluster. Otherwise try
 			 * reallocating to make it sequential.
+			 *
+			 * Change to algorithm: only push previous cluster if
+			 * it was sequential from the point of view of the
+			 * seqcount heuristic, otherwise leave the buffer 
+			 * intact so we can potentially optimize the I/O
+			 * later on in the buf_daemon or update daemon
+			 * flush.
 			 */
 			cursize = vp->v_lastw - vp->v_cstart + 1;
 			if (((u_quad_t) bp->b_offset + lblocksize) != filesize ||
 			    lbn != vp->v_lastw + 1 || vp->v_clen <= cursize) {
-				if (!async)
+				if (!async && seqcount > 0) {
 					cluster_wbuild_wb(vp, lblocksize,
 						vp->v_cstart, cursize);
+				}
 			} else {
 				struct buf **bpp, **endbp;
 				struct cluster_save *buflist;
@@ -586,14 +595,22 @@ cluster_write(bp, filesize)
 				    [buflist->bs_nchildren - 1];
 				if (VOP_REALLOCBLKS(vp, buflist)) {
 					/*
-					 * Failed, push the previous cluster.
+					 * Failed, push the previous cluster
+					 * if *really* writing sequentially
+					 * in the logical file (seqcount > 1),
+					 * otherwise delay it in the hopes that
+					 * the low level disk driver can
+					 * optimize the write ordering.
 					 */
 					for (bpp = buflist->bs_children;
 					     bpp < endbp; bpp++)
 						brelse(*bpp);
 					free(buflist, M_SEGMENT);
-					cluster_wbuild_wb(vp, lblocksize,
-					    vp->v_cstart, cursize);
+					if (seqcount > 1) {
+						cluster_wbuild_wb(vp, 
+						    lblocksize, vp->v_cstart, 
+						    cursize);
+					}
 				} else {
 					/*
 					 * Succeeded, keep building cluster.
@@ -635,17 +652,21 @@ cluster_write(bp, filesize)
 		}
 	} else if (lbn == vp->v_cstart + vp->v_clen) {
 		/*
-		 * At end of cluster, write it out.
+		 * At end of cluster, write it out if seqcount tells us we
+		 * are operating sequentially, otherwise let the buf or
+		 * update daemon handle it.
 		 */
 		bdwrite(bp);
-		cluster_wbuild_wb(vp, lblocksize, vp->v_cstart, vp->v_clen + 1);
+		if (seqcount > 1)
+			cluster_wbuild_wb(vp, lblocksize, vp->v_cstart, vp->v_clen + 1);
 		vp->v_clen = 0;
 		vp->v_cstart = lbn + 1;
-	} else
+	} else {
 		/*
 		 * In the middle of a cluster, so just delay the I/O for now.
 		 */
 		bdwrite(bp);
+	}
 	vp->v_lastw = lbn;
 	vp->v_lasta = bp->b_blkno;
 }
author	dillon <dillon@FreeBSD.org>	2000-04-02 00:55:28 +0000
committer	dillon <dillon@FreeBSD.org>	2000-04-02 00:55:28 +0000
commit	057e33d02ca1d636be1b99e212ebb7911cf4fc62 (patch)
tree	c8d43beb4d47e12029b09bb06c919f2a25c818e1 /sys/kern/vfs_cluster.c
parent	0589fa18882d56c333936da3d552f6f1fb8a3fab (diff)
download	FreeBSD-src-057e33d02ca1d636be1b99e212ebb7911cf4fc62.zip FreeBSD-src-057e33d02ca1d636be1b99e212ebb7911cf4fc62.tar.gz