summaryrefslogtreecommitdiffstats
path: root/sys/nfsserver
diff options
context:
space:
mode:
authoralc <alc@FreeBSD.org>1999-05-02 23:57:16 +0000
committeralc <alc@FreeBSD.org>1999-05-02 23:57:16 +0000
commit5cb08a2652f36ddab7172faf6b766038472c1647 (patch)
treec47eaa3332628f6c725ca32dda81aa44d24e2ac2 /sys/nfsserver
parentc75d7e89c3e63bc9b8e9863a5cc985649edf5f9a (diff)
downloadFreeBSD-src-5cb08a2652f36ddab7172faf6b766038472c1647.zip
FreeBSD-src-5cb08a2652f36ddab7172faf6b766038472c1647.tar.gz
The VFS/BIO subsystem contained a number of hacks in order to optimize
piecemeal, middle-of-file writes for NFS. These hacks have caused no end of trouble, especially when combined with mmap(). I've removed them. Instead, NFS will issue a read-before-write to fully instantiate the struct buf containing the write. NFS does, however, optimize piecemeal appends to files. For most common file operations, you will not notice the difference. The sole remaining fragment in the VFS/BIO system is b_dirtyoff/end, which NFS uses to avoid cache coherency issues with read-merge-write style operations. NFS also optimizes the write-covers-entire-buffer case by avoiding the read-before-write. There is quite a bit of room for further optimization in these areas. The VM system marks pages fully-valid (AKA vm_page_t->valid = VM_PAGE_BITS_ALL) in several places, most noteably in vm_fault. This is not correct operation. The vm_pager_get_pages() code is now responsible for marking VM pages all-valid. A number of VM helper routines have been added to aid in zeroing-out the invalid portions of a VM page prior to the page being marked all-valid. This operation is necessary to properly support mmap(). The zeroing occurs most often when dealing with file-EOF situations. Several bugs have been fixed in the NFS subsystem, including bits handling file and directory EOF situations and buf->b_flags consistancy issues relating to clearing B_ERROR & B_INVAL, and handling B_DONE. getblk() and allocbuf() have been rewritten. B_CACHE operation is now formally defined in comments and more straightforward in implementation. B_CACHE for VMIO buffers is based on the validity of the backing store. B_CACHE for non-VMIO buffers is based simply on whether the buffer is B_INVAL or not (B_CACHE set if B_INVAL clear, and vise-versa). biodone() is now responsible for setting B_CACHE when a successful read completes. B_CACHE is also set when a bdwrite() is initiated and when a bwrite() is initiated. VFS VOP_BWRITE routines (there are only two - nfs_bwrite() and bwrite()) are now expected to set B_CACHE. This means that bowrite() and bawrite() also set B_CACHE indirectly. There are a number of places in the code which were previously using buf->b_bufsize (which is DEV_BSIZE aligned) when they should have been using buf->b_bcount. These have been fixed. getblk() now clears B_DONE on return because the rest of the system is so bad about dealing with B_DONE. Major fixes to NFS/TCP have been made. A server-side bug could cause requests to be lost by the server due to nfs_realign() overwriting other rpc's in the same TCP mbuf chain. The server's kernel must be recompiled to get the benefit of the fixes. Submitted by: Matthew Dillon <dillon@apollo.backplane.com>
Diffstat (limited to 'sys/nfsserver')
-rw-r--r--sys/nfsserver/nfs.h5
-rw-r--r--sys/nfsserver/nfs_srvsock.c136
-rw-r--r--sys/nfsserver/nfsrvstats.h5
3 files changed, 59 insertions, 87 deletions
diff --git a/sys/nfsserver/nfs.h b/sys/nfsserver/nfs.h
index bc15a7c..78a54a2 100644
--- a/sys/nfsserver/nfs.h
+++ b/sys/nfsserver/nfs.h
@@ -34,7 +34,7 @@
* SUCH DAMAGE.
*
* @(#)nfs.h 8.4 (Berkeley) 5/1/95
- * $Id: nfs.h,v 1.44 1998/09/07 05:42:15 bde Exp $
+ * $Id: nfs.h,v 1.45 1999/02/25 00:03:50 peter Exp $
*/
#ifndef _NFS_NFS_H_
@@ -651,8 +651,7 @@ void nfs_disconnect __P((struct nfsmount *));
void nfs_safedisconnect __P((struct nfsmount *));
int nfs_getattrcache __P((struct vnode *, struct vattr *));
int nfsm_strtmbuf __P((struct mbuf **, char **, const char *, long));
-int nfs_bioread __P((struct vnode *, struct uio *, int, struct ucred *,
- int));
+int nfs_bioread __P((struct vnode *, struct uio *, int, struct ucred *));
int nfsm_uiotombuf __P((struct uio *, struct mbuf **, int, caddr_t *));
void nfsrv_init __P((int));
void nfs_clearcommit __P((struct mount *));
diff --git a/sys/nfsserver/nfs_srvsock.c b/sys/nfsserver/nfs_srvsock.c
index 1490f72..2267629 100644
--- a/sys/nfsserver/nfs_srvsock.c
+++ b/sys/nfsserver/nfs_srvsock.c
@@ -34,7 +34,7 @@
* SUCH DAMAGE.
*
* @(#)nfs_socket.c 8.5 (Berkeley) 3/30/95
- * $Id: nfs_socket.c,v 1.50 1999/02/25 00:03:51 peter Exp $
+ * $Id: nfs_socket.c,v 1.51 1999/04/24 11:29:48 dt Exp $
*/
/*
@@ -54,6 +54,7 @@
#include <sys/socketvar.h>
#include <sys/syslog.h>
#include <sys/tprintf.h>
+#include <sys/sysctl.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
@@ -115,6 +116,15 @@ static int proct[NFS_NPROCS] = {
0, 0, 0,
};
+static int nfs_realign_test;
+static int nfs_realign_count;
+
+SYSCTL_DECL(_vfs_nfs);
+
+SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_test, CTLFLAG_RD, &nfs_realign_test, 0, "");
+SYSCTL_INT(_vfs_nfs, OID_AUTO, realign_count, CTLFLAG_RD, &nfs_realign_count, 0, "");
+
+
/*
* There is a congestion window for outstanding rpcs maintained per mount
* point. The cwnd size is adjusted in roughly the way that:
@@ -138,7 +148,7 @@ struct callout_handle nfs_timer_handle;
static int nfs_msg __P((struct proc *,char *,char *));
static int nfs_rcvlock __P((struct nfsreq *));
static void nfs_rcvunlock __P((struct nfsreq *));
-static void nfs_realign __P((struct mbuf *m, int hsiz));
+static void nfs_realign __P((struct mbuf **pm, int hsiz));
static int nfs_receive __P((struct nfsreq *rep, struct sockaddr **aname,
struct mbuf **mp));
static int nfs_reconnect __P((struct nfsreq *rep));
@@ -702,7 +712,7 @@ errout:
* These could cause pointer alignment problems, so copy them to
* well aligned mbufs.
*/
- nfs_realign(*mp, 5 * NFSX_UNSIGNED);
+ nfs_realign(mp, 5 * NFSX_UNSIGNED);
return (error);
}
@@ -1589,92 +1599,56 @@ nfs_rcvunlock(rep)
}
/*
- * Check for badly aligned mbuf data areas and
- * realign data in an mbuf list by copying the data areas up, as required.
+ * nfs_realign:
+ *
+ * Check for badly aligned mbuf data and realign by copying the unaligned
+ * portion of the data into a new mbuf chain and freeing the portions
+ * of the old chain that were replaced.
+ *
+ * We cannot simply realign the data within the existing mbuf chain
+ * because the underlying buffers may contain other rpc commands and
+ * we cannot afford to overwrite them.
+ *
+ * We would prefer to avoid this situation entirely. The situation does
+ * not occur with NFS/UDP and is supposed to only occassionally occur
+ * with TCP. Use vfs.nfs.realign_count and realign_test to check this.
*/
static void
-nfs_realign(m, hsiz)
- register struct mbuf *m;
+nfs_realign(pm, hsiz)
+ register struct mbuf **pm;
int hsiz;
{
- register struct mbuf *m2;
- register int siz, mlen, olen;
- register caddr_t tcp, fcp;
- struct mbuf *mnew;
+ struct mbuf *m;
+ struct mbuf *n = NULL;
+ int off = 0;
- while (m) {
- /*
- * This never happens for UDP, rarely happens for TCP
- * but frequently happens for iso transport.
- */
- if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) {
- olen = m->m_len;
- fcp = mtod(m, caddr_t);
- if ((intptr_t)fcp & 0x3) {
- m->m_flags &= ~M_PKTHDR;
- if (m->m_flags & M_EXT)
- m->m_data = m->m_ext.ext_buf +
- ((m->m_ext.ext_size - olen) & ~0x3);
- else
- m->m_data = m->m_dat;
- }
- m->m_len = 0;
- tcp = mtod(m, caddr_t);
- mnew = m;
- m2 = m->m_next;
+ ++nfs_realign_test;
- /*
- * If possible, only put the first invariant part
- * of the RPC header in the first mbuf.
- */
- mlen = M_TRAILINGSPACE(m);
- if (olen <= hsiz && mlen > hsiz)
- mlen = hsiz;
-
- /*
- * Loop through the mbuf list consolidating data.
- */
- while (m) {
- while (olen > 0) {
- if (mlen == 0) {
- m2->m_flags &= ~M_PKTHDR;
- if (m2->m_flags & M_EXT)
- m2->m_data = m2->m_ext.ext_buf;
- else
- m2->m_data = m2->m_dat;
- m2->m_len = 0;
- mlen = M_TRAILINGSPACE(m2);
- tcp = mtod(m2, caddr_t);
- mnew = m2;
- m2 = m2->m_next;
- }
- siz = min(mlen, olen);
- if (tcp != fcp)
- bcopy(fcp, tcp, siz);
- mnew->m_len += siz;
- mlen -= siz;
- olen -= siz;
- tcp += siz;
- fcp += siz;
- }
- m = m->m_next;
- if (m) {
- olen = m->m_len;
- fcp = mtod(m, caddr_t);
+ while ((m = *pm) != NULL) {
+ if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) {
+ MGET(n, M_WAIT, MT_DATA);
+ if (m->m_len >= MINCLSIZE) {
+ MCLGET(n, M_WAIT);
}
+ n->m_len = 0;
+ break;
}
+ pm = &m->m_next;
+ }
- /*
- * Finally, set m_len == 0 for any trailing mbufs that have
- * been copied out of.
- */
- while (m2) {
- m2->m_len = 0;
- m2 = m2->m_next;
+ /*
+ * If n is non-NULL, loop on m copying data, then replace the
+ * portion of the chain that had to be realigned.
+ */
+ if (n != NULL) {
+ ++nfs_realign_count;
+ while (m) {
+ m_copyback(n, off, m->m_len, mtod(m, caddr_t));
+ off += m->m_len;
+ m = m->m_next;
}
- return;
- }
- m = m->m_next;
+ m_freem(*pm);
+ *pm = n;
}
}
@@ -2040,7 +2014,7 @@ nfsrv_rcv(so, arg, waitflag)
m_freem(mp);
continue;
}
- nfs_realign(mp, 10 * NFSX_UNSIGNED);
+ nfs_realign(&mp, 10 * NFSX_UNSIGNED);
rec->nr_address = nam;
rec->nr_packet = mp;
STAILQ_INSERT_TAIL(&slp->ns_rec, rec, nr_link);
@@ -2182,7 +2156,7 @@ nfsrv_getstream(slp, waitflag)
if (!rec) {
m_freem(slp->ns_frag);
} else {
- nfs_realign(slp->ns_frag, 10 * NFSX_UNSIGNED);
+ nfs_realign(&slp->ns_frag, 10 * NFSX_UNSIGNED);
rec->nr_address = (struct sockaddr *)0;
rec->nr_packet = slp->ns_frag;
STAILQ_INSERT_TAIL(&slp->ns_rec, rec, nr_link);
diff --git a/sys/nfsserver/nfsrvstats.h b/sys/nfsserver/nfsrvstats.h
index bc15a7c..78a54a2 100644
--- a/sys/nfsserver/nfsrvstats.h
+++ b/sys/nfsserver/nfsrvstats.h
@@ -34,7 +34,7 @@
* SUCH DAMAGE.
*
* @(#)nfs.h 8.4 (Berkeley) 5/1/95
- * $Id: nfs.h,v 1.44 1998/09/07 05:42:15 bde Exp $
+ * $Id: nfs.h,v 1.45 1999/02/25 00:03:50 peter Exp $
*/
#ifndef _NFS_NFS_H_
@@ -651,8 +651,7 @@ void nfs_disconnect __P((struct nfsmount *));
void nfs_safedisconnect __P((struct nfsmount *));
int nfs_getattrcache __P((struct vnode *, struct vattr *));
int nfsm_strtmbuf __P((struct mbuf **, char **, const char *, long));
-int nfs_bioread __P((struct vnode *, struct uio *, int, struct ucred *,
- int));
+int nfs_bioread __P((struct vnode *, struct uio *, int, struct ucred *));
int nfsm_uiotombuf __P((struct uio *, struct mbuf **, int, caddr_t *));
void nfsrv_init __P((int));
void nfs_clearcommit __P((struct mount *));
OpenPOWER on IntegriCloud