MFC r282881: Do not promote large async writes to sync.

Present implementation of large sync writes is too strict and so can be quite slow. Instead of doing that, execute large async write in chunks, syncing each chunk separately. It would be good to fix large sync writes too, but I leave it to somebody with more skills in this area.
author: mav <mav@FreeBSD.org> 2015-05-24 17:16:30 +0000
committer: mav <mav@FreeBSD.org> 2015-05-24 17:16:30 +0000
commit: 5562aba4db3f186e83fe2e06149c0cb25b88173b (patch)
tree: 25fd30840d988b4706fc708ec0fecdb52a9843e9 /sys/fs
parent: e7adedb422865fbd63ed6f10518eb0a9e16abe7b (diff)
download: FreeBSD-src-5562aba4db3f186e83fe2e06149c0cb25b88173b.zip
FreeBSD-src-5562aba4db3f186e83fe2e06149c0cb25b88173b.tar.gz
1 files changed, 15 insertions, 34 deletions
diff --git a/sys/fs/nfsclient/nfs_clbio.c b/sys/fs/nfsclient/nfs_clbio.c
index 576fd59..5f85eb2 100644
--- a/sys/fs/nfsclient/nfs_clbio.c
+++ b/sys/fs/nfsclient/nfs_clbio.c
@@ -875,7 +875,7 @@ ncl_write(struct vop_write_args *ap)
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	daddr_t lbn;
 	int bcount, noncontig_write, obcount;
-	int bp_cached, n, on, error = 0, error1;
+	int bp_cached, n, on, error = 0, error1, wouldcommit;
 	size_t orig_resid, local_resid;
 	off_t orig_size, tmp_off;
 
@@ -919,7 +919,6 @@ ncl_write(struct vop_write_args *ap)
 			if (ioflag & IO_NDELAY)
 				return (EAGAIN);
 #endif
-flush_and_restart:
 			np->n_attrstamp = 0;
 			KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 			error = ncl_vinvalbuf(vp, V_SAVE, td, 1);
@@ -976,27 +975,14 @@ flush_and_restart:
 	 * IO_UNIT -- we just make all writes atomic anyway, as there's
 	 * no point optimizing for something that really won't ever happen.
 	 */
+	wouldcommit = 0;
 	if (!(ioflag & IO_SYNC)) {
 		int nflag;
 
 		mtx_lock(&np->n_mtx);
 		nflag = np->n_flag;
 		mtx_unlock(&np->n_mtx);
-		int needrestart = 0;
-		if (nmp->nm_wcommitsize < uio->uio_resid) {
-			/*
-			 * If this request could not possibly be completed
-			 * without exceeding the maximum outstanding write
-			 * commit size, see if we can convert it into a
-			 * synchronous write operation.
-			 */
-			if (ioflag & IO_NDELAY)
-				return (EAGAIN);
-			ioflag |= IO_SYNC;
-			if (nflag & NMODIFIED)
-				needrestart = 1;
-		} else if (nflag & NMODIFIED) {
-			int wouldcommit = 0;
+		if (nflag & NMODIFIED) {
 			BO_LOCK(&vp->v_bufobj);
 			if (vp->v_bufobj.bo_dirty.bv_cnt != 0) {
 				TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd,
@@ -1006,27 +992,22 @@ flush_and_restart:
 				}
 			}
 			BO_UNLOCK(&vp->v_bufobj);
-			/*
-			 * Since we're not operating synchronously and
-			 * bypassing the buffer cache, we are in a commit
-			 * and holding all of these buffers whether
-			 * transmitted or not.  If not limited, this
-			 * will lead to the buffer cache deadlocking,
-			 * as no one else can flush our uncommitted buffers.
-			 */
-			wouldcommit += uio->uio_resid;
-			/*
-			 * If we would initially exceed the maximum
-			 * outstanding write commit size, flush and restart.
-			 */
-			if (wouldcommit > nmp->nm_wcommitsize)
-				needrestart = 1;
 		}
-		if (needrestart)
-			goto flush_and_restart;
 	}
 
 	do {
+		if (!(ioflag & IO_SYNC)) {
+			wouldcommit += biosize;
+			if (wouldcommit > nmp->nm_wcommitsize) {
+				np->n_attrstamp = 0;
+				KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
+				error = ncl_vinvalbuf(vp, V_SAVE, td, 1);
+				if (error)
+					return (error);
+				wouldcommit = biosize;
+			}
+		}
+
 		NFSINCRGLOBAL(newnfsstats.biocache_writes);
 		lbn = uio->uio_offset / biosize;
 		on = uio->uio_offset - (lbn * biosize);
author	mav <mav@FreeBSD.org>	2015-05-24 17:16:30 +0000
committer	mav <mav@FreeBSD.org>	2015-05-24 17:16:30 +0000
commit	5562aba4db3f186e83fe2e06149c0cb25b88173b (patch)
tree	25fd30840d988b4706fc708ec0fecdb52a9843e9 /sys/fs
parent	e7adedb422865fbd63ed6f10518eb0a9e16abe7b (diff)
download	FreeBSD-src-5562aba4db3f186e83fe2e06149c0cb25b88173b.zip FreeBSD-src-5562aba4db3f186e83fe2e06149c0cb25b88173b.tar.gz