summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/kern/vfs_bio.c2
-rw-r--r--sys/nfsclient/nfs_bio.c65
-rw-r--r--sys/nfsclient/nfs_vfsops.c9
-rw-r--r--sys/nfsclient/nfsargs.h4
-rw-r--r--sys/nfsclient/nfsmount.h1
-rw-r--r--sys/sys/buf.h1
-rw-r--r--sys/sys/bufobj.h4
7 files changed, 81 insertions, 5 deletions
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 110ba79..b9dd64c 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -127,7 +127,7 @@ SYSCTL_INT(_vfs, OID_AUTO, maxmallocbufspace, CTLFLAG_RW, &maxbufmallocspace, 0,
static int lobufspace;
SYSCTL_INT(_vfs, OID_AUTO, lobufspace, CTLFLAG_RD, &lobufspace, 0,
"Minimum amount of buffers we want to have");
-static int hibufspace;
+int hibufspace;
SYSCTL_INT(_vfs, OID_AUTO, hibufspace, CTLFLAG_RD, &hibufspace, 0,
"Maximum allowed value of bufspace (excluding buf_daemon)");
static int bufreusecnt;
diff --git a/sys/nfsclient/nfs_bio.c b/sys/nfsclient/nfs_bio.c
index 17c3c3d..4eb461d 100644
--- a/sys/nfsclient/nfs_bio.c
+++ b/sys/nfsclient/nfs_bio.c
@@ -873,6 +873,14 @@ nfs_write(struct vop_write_args *ap)
*/
if (ioflag & (IO_APPEND | IO_SYNC)) {
if (np->n_flag & NMODIFIED) {
+ /*
+ * Require non-blocking, synchronous writes to
+ * dirty files to inform the program it needs
+ * to fsync(2) explicitly.
+ */
+ if (ioflag & IO_NDELAY)
+ return (EAGAIN);
+flush_and_restart:
np->n_attrstamp = 0;
error = nfs_vinvalbuf(vp, V_SAVE, td, 1);
if (error)
@@ -953,6 +961,63 @@ restart:
}
biosize = vp->v_mount->mnt_stat.f_iosize;
+ /*
+ * Find all of this file's B_NEEDCOMMIT buffers. If our writes
+ * would exceed the local maximum per-file write commit size when
+ * combined with those, we must decide whether to flush,
+ * go synchronous, or return error. We don't bother checking
+ * IO_UNIT -- we just make all writes atomic anyway, as there's
+ * no point optimizing for something that really won't ever happen.
+ */
+ if (!(ioflag & IO_SYNC)) {
+ int needrestart = 0;
+ if (nmp->nm_wcommitsize < uio->uio_resid) {
+ /*
+ * If this request could not possibly be completed
+ * without exceeding the maximum outstanding write
+ * commit size, see if we can convert it into a
+ * synchronous write operation.
+ */
+ if (ioflag & IO_NDELAY)
+ return (EAGAIN);
+ ioflag |= IO_SYNC;
+ if (np->n_flag & NMODIFIED)
+ needrestart = 1;
+ } else if (np->n_flag & NMODIFIED) {
+ int wouldcommit = 0;
+ BO_LOCK(&vp->v_bufobj);
+ if (vp->v_bufobj.bo_dirty.bv_cnt != 0) {
+ TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd,
+ b_bobufs) {
+ if (bp->b_flags & B_NEEDCOMMIT)
+ wouldcommit += bp->b_bcount;
+ }
+ }
+ BO_UNLOCK(&vp->v_bufobj);
+ /*
+ * Since we're not operating synchronously and
+ * bypassing the buffer cache, we are in a commit
+ * and holding all of these buffers whether
+ * transmitted or not. If not limited, this
+ * will lead to the buffer cache deadlocking,
+ * as no one else can flush our uncommitted buffers.
+ */
+ wouldcommit += uio->uio_resid;
+ /*
+ * If we would initially exceed the maximum
+ * outstanding write commit size, flush and restart.
+ */
+ if (wouldcommit > nmp->nm_wcommitsize)
+ needrestart = 1;
+ }
+ if (needrestart) {
+ if (haverslock) {
+ nfs_rsunlock(np, td);
+ haverslock = 0;
+ }
+ goto flush_and_restart;
+ }
+ }
do {
nfsstats.biocache_writes++;
diff --git a/sys/nfsclient/nfs_vfsops.c b/sys/nfsclient/nfs_vfsops.c
index e7a3a1a..9cf9b2b 100644
--- a/sys/nfsclient/nfs_vfsops.c
+++ b/sys/nfsclient/nfs_vfsops.c
@@ -41,6 +41,8 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
@@ -633,6 +635,12 @@ nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp)
else
nmp->nm_readahead = NFS_MAXRAHEAD;
}
+ if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) {
+ if (argp->wcommitsize < nmp->nm_wsize)
+ nmp->nm_wcommitsize = nmp->nm_wsize;
+ else
+ nmp->nm_wcommitsize = argp->wcommitsize;
+ }
if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 0) {
if (argp->deadthresh <= NFS_MAXDEADTHRESH)
nmp->nm_deadthresh = argp->deadthresh;
@@ -815,6 +823,7 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
nmp->nm_wsize = NFS_WSIZE;
nmp->nm_rsize = NFS_RSIZE;
}
+ nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000);
nmp->nm_readdirsize = NFS_READDIRSIZE;
nmp->nm_numgrps = NFS_MAXGRPS;
nmp->nm_readahead = NFS_DEFRAHEAD;
diff --git a/sys/nfsclient/nfsargs.h b/sys/nfsclient/nfsargs.h
index 8aa2392..7ebf1a0 100644
--- a/sys/nfsclient/nfsargs.h
+++ b/sys/nfsclient/nfsargs.h
@@ -56,7 +56,7 @@ struct nfs_args {
int retrans; /* times to retry send */
int maxgrouplist; /* Max. size of group list */
int readahead; /* # of blocks to readahead */
- int __pad1; /* was "leaseterm" */
+ int wcommitsize; /* Max. write commit size in bytes */
int deadthresh; /* Retrans threshold */
char *hostname; /* server's name */
int acregmin; /* cache attrs for reg files min time */
@@ -80,7 +80,7 @@ struct nfs_args {
#define NFSMNT_NFSV3 0x00000200 /* Use NFS Version 3 protocol */
/* 0x400 free, was NFSMNT_KERB */
#define NFSMNT_DUMBTIMR 0x00000800 /* Don't estimate rtt dynamically */
-/* 0x1000 free, was NFSMNT_LEASETERM */
+#define NFSMNT_WCOMMITSIZE 0x00001000 /* set max write commit size */
#define NFSMNT_READAHEAD 0x00002000 /* set read ahead */
#define NFSMNT_DEADTHRESH 0x00004000 /* set dead server retry thresh */
#define NFSMNT_RESVPORT 0x00008000 /* Allocate a reserved port */
diff --git a/sys/nfsclient/nfsmount.h b/sys/nfsclient/nfsmount.h
index 7f23aaa..a8bcc14 100644
--- a/sys/nfsclient/nfsmount.h
+++ b/sys/nfsclient/nfsmount.h
@@ -74,6 +74,7 @@ struct nfsmount {
int nm_wsize; /* Max size of write rpc */
int nm_readdirsize; /* Size of a readdir rpc */
int nm_readahead; /* Num. of blocks to readahead */
+ int nm_wcommitsize; /* Max size of commit for write */
int nm_acdirmin; /* Directory attr cache min lifetime */
int nm_acdirmax; /* Directory attr cache max lifetime */
int nm_acregmin; /* Reg file attr cache min lifetime */
diff --git a/sys/sys/buf.h b/sys/sys/buf.h
index 8f68ff2..9f8fc2f 100644
--- a/sys/sys/buf.h
+++ b/sys/sys/buf.h
@@ -465,6 +465,7 @@ extern int nbuf; /* The number of buffer headers */
extern int maxswzone; /* Max KVA for swap structures */
extern int maxbcache; /* Max KVA for buffer cache */
extern int runningbufspace;
+extern int hibufspace;
extern int buf_maxio; /* nominal maximum I/O for buffer */
extern struct buf *buf; /* The buffer headers. */
extern char *buffers; /* The buffer contents. */
diff --git a/sys/sys/bufobj.h b/sys/sys/bufobj.h
index f938c80..cab7fb0 100644
--- a/sys/sys/bufobj.h
+++ b/sys/sys/bufobj.h
@@ -109,13 +109,13 @@ struct bufobj {
#define BO_LOCK(bo) \
do { \
- KASSERT (bo->bo_mtx != NULL, ("No lock in bufobj")); \
+ KASSERT((bo)->bo_mtx != NULL, ("No lock in bufobj")); \
mtx_lock((bo)->bo_mtx); \
} while (0)
#define BO_UNLOCK(bo) \
do { \
- KASSERT (bo->bo_mtx != NULL, ("No lock in bufobj")); \
+ KASSERT((bo)->bo_mtx != NULL, ("No lock in bufobj")); \
mtx_unlock((bo)->bo_mtx); \
} while (0)
OpenPOWER on IntegriCloud