summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/nfs4client/nfs4_vfsops.c1
-rw-r--r--sys/nfs4client/nfs4_vnops.c1
-rw-r--r--sys/nfsclient/nfs.h22
-rw-r--r--sys/nfsclient/nfs_bio.c10
-rw-r--r--sys/nfsclient/nfs_socket.c931
-rw-r--r--sys/nfsclient/nfs_subs.c4
-rw-r--r--sys/nfsclient/nfs_vfsops.c9
-rw-r--r--sys/nfsclient/nfs_vnops.c1
-rw-r--r--sys/nfsclient/nfsm_subs.h3
-rw-r--r--sys/nfsclient/nfsmount.h9
10 files changed, 573 insertions, 418 deletions
diff --git a/sys/nfs4client/nfs4_vfsops.c b/sys/nfs4client/nfs4_vfsops.c
index 7749146..14a2720 100644
--- a/sys/nfs4client/nfs4_vfsops.c
+++ b/sys/nfs4client/nfs4_vfsops.c
@@ -82,6 +82,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <sys/unistd.h>
#include <sys/vnode.h>
+#include <sys/signalvar.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
diff --git a/sys/nfs4client/nfs4_vnops.c b/sys/nfs4client/nfs4_vnops.c
index 5b9b582..985303a 100644
--- a/sys/nfs4client/nfs4_vnops.c
+++ b/sys/nfs4client/nfs4_vnops.c
@@ -87,6 +87,7 @@ __FBSDID("$FreeBSD$");
#include <sys/stat.h>
#include <sys/sysctl.h>
#include <sys/lockmgr.h>
+#include <sys/signalvar.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
diff --git a/sys/nfsclient/nfs.h b/sys/nfsclient/nfs.h
index 648120b..4743ea2 100644
--- a/sys/nfsclient/nfs.h
+++ b/sys/nfsclient/nfs.h
@@ -90,8 +90,6 @@
#define NFSSTA_GOTFSINFO 0x00100000 /* Got the V3 fsinfo */
#define NFSSTA_SNDLOCK 0x01000000 /* Send socket lock */
#define NFSSTA_WANTSND 0x02000000 /* Want above */
-#define NFSSTA_RCVLOCK 0x04000000 /* Rcv socket lock */
-#define NFSSTA_WANTRCV 0x08000000 /* Want above */
#define NFSSTA_TIMEO 0x10000000 /* Experiencing a timeout */
@@ -151,19 +149,6 @@ struct vattr;
struct nameidata;
/*
- * The set of signals that interrupt an I/O in progress for NFSMNT_INT mounts.
- * What should be in this set is open to debate, but I believe that since
- * I/O system calls on ufs are never interrupted by signals the set should
- * be minimal. My reasoning is that many current programs that use signals
- * such as SIGALRM will not expect file I/O system calls to be interrupted
- * by them and break.
- */
-#define NFSINT_SIGMASK(set) \
- (SIGISMEMBER(set, SIGINT) || SIGISMEMBER(set, SIGTERM) || \
- SIGISMEMBER(set, SIGHUP) || SIGISMEMBER(set, SIGKILL) || \
- SIGISMEMBER(set, SIGQUIT))
-
-/*
* Socket errors ignored for connectionless sockets??
* For now, ignore them all
*/
@@ -321,6 +306,13 @@ int nfs_fsinfo(struct nfsmount *, struct vnode *, struct ucred *,
int nfs_meta_setsize (struct vnode *, struct ucred *,
struct thread *, u_quad_t);
+void nfs_set_sigmask __P((struct thread *td, sigset_t *oldset));
+void nfs_restore_sigmask __P((struct thread *td, sigset_t *set));
+int nfs_tsleep __P((struct thread *td, void *ident, int priority, char *wmesg,
+ int timo));
+int nfs_msleep __P((struct thread *td, void *ident, struct mtx *mtx, int priority,
+ char *wmesg, int timo));
+
#endif /* _KERNEL */
#endif
diff --git a/sys/nfsclient/nfs_bio.c b/sys/nfsclient/nfs_bio.c
index bc2cf53..9e9af49 100644
--- a/sys/nfsclient/nfs_bio.c
+++ b/sys/nfsclient/nfs_bio.c
@@ -1036,7 +1036,11 @@ nfs_getcacheblk(struct vnode *vp, daddr_t bn, int size, struct thread *td)
nmp = VFSTONFS(mp);
if (nmp->nm_flag & NFSMNT_INT) {
+ sigset_t oldset;
+
+ nfs_set_sigmask(td, &oldset);
bp = getblk(vp, bn, size, PCATCH, 0, 0);
+ nfs_restore_sigmask(td, &oldset);
while (bp == NULL) {
if (nfs_sigintr(nmp, NULL, td))
return (NULL);
@@ -1208,8 +1212,8 @@ again:
NFS_DPF(ASYNCIO,
("nfs_asyncio: waiting for mount %p queue to drain\n", nmp));
nmp->nm_bufqwant = TRUE;
- error = tsleep(&nmp->nm_bufq, slpflag | PRIBIO,
- "nfsaio", slptimeo);
+ error = nfs_tsleep(td, &nmp->nm_bufq, slpflag | PRIBIO,
+ "nfsaio", slptimeo);
if (error) {
error2 = nfs_sigintr(nmp, NULL, td);
if (error2)
@@ -1511,6 +1515,8 @@ nfs_meta_setsize(struct vnode *vp, struct ucred *cred, struct thread *td, u_quad
lbn = nsize / biosize;
bufsize = nsize & (biosize - 1);
bp = nfs_getcacheblk(vp, lbn, bufsize, td);
+ if (!bp)
+ return EINTR;
if (bp->b_dirtyoff > bp->b_bcount)
bp->b_dirtyoff = bp->b_bcount;
if (bp->b_dirtyend > bp->b_bcount)
diff --git a/sys/nfsclient/nfs_socket.c b/sys/nfsclient/nfs_socket.c
index 094a2e1..8002908 100644
--- a/sys/nfsclient/nfs_socket.c
+++ b/sys/nfsclient/nfs_socket.c
@@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$");
#include <sys/proc.h>
#include <sys/protosw.h>
#include <sys/signalvar.h>
+#include <sys/syscallsubr.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
@@ -142,14 +143,15 @@ static int nfs_backoff[NFS_NBACKOFF] = { 2, 4, 8, 16, 32, 64, 128, 256, };
struct callout nfs_callout;
static int nfs_msg(struct thread *, const char *, const char *, int);
-static int nfs_rcvlock(struct nfsreq *);
-static void nfs_rcvunlock(struct nfsreq *);
-static void nfs_realign(struct mbuf **pm, int hsiz);
-static int nfs_receive(struct nfsreq *rep, struct sockaddr **aname,
- struct mbuf **mp);
+static int nfs_realign(struct mbuf **pm, int hsiz);
static int nfs_reply(struct nfsreq *);
static void nfs_softterm(struct nfsreq *rep);
static int nfs_reconnect(struct nfsreq *rep);
+static void nfs_clnt_tcp_soupcall(struct socket *so, void *arg, int waitflag);
+static void nfs_clnt_udp_soupcall(struct socket *so, void *arg, int waitflag);
+
+extern struct mtx nfs_reqq_mtx;
+extern struct mtx nfs_reply_mtx;
/*
* Initialize sockets and congestion for a new NFS connection.
@@ -166,6 +168,12 @@ nfs_connect(struct nfsmount *nmp, struct nfsreq *rep)
NET_ASSERT_GIANT();
+ if (nmp->nm_sotype == SOCK_STREAM) {
+ mtx_lock(&nmp->nm_nfstcpstate.mtx);
+ nmp->nm_nfstcpstate.flags |= NFS_TCP_EXPECT_RPCMARKER;
+ nmp->nm_nfstcpstate.rpcresid = 0;
+ mtx_unlock(&nmp->nm_nfstcpstate.mtx);
+ }
nmp->nm_so = NULL;
saddr = nmp->nm_nam;
error = socreate(saddr->sa_family, &nmp->nm_so, nmp->nm_sotype,
@@ -324,6 +332,12 @@ nfs_connect(struct nfsmount *nmp, struct nfsreq *rep)
goto bad;
SOCKBUF_LOCK(&so->so_rcv);
so->so_rcv.sb_flags |= SB_NOINTR;
+ so->so_upcallarg = (caddr_t)nmp;
+ if (so->so_type == SOCK_STREAM)
+ so->so_upcall = nfs_clnt_tcp_soupcall;
+ else
+ so->so_upcall = nfs_clnt_udp_soupcall;
+ so->so_rcv.sb_flags |= SB_UPCALL;
SOCKBUF_UNLOCK(&so->so_rcv);
SOCKBUF_LOCK(&so->so_snd);
so->so_snd.sb_flags |= SB_NOINTR;
@@ -370,14 +384,29 @@ nfs_reconnect(struct nfsreq *rep)
(void) tsleep(&lbolt, PSOCK, "nfscon", 0);
}
+ /*
+ * Clear the FORCE_RECONNECT flag only after the connect
+ * succeeds. To prevent races between multiple processes
+ * waiting on the mountpoint where the connection is being
+ * torn down. The first one to acquire the sndlock will
+ * retry the connection. The others block on the sndlock
+ * until the connection is established successfully, and
+ * the re-transmit the request.
+ */
+ mtx_lock(&nmp->nm_nfstcpstate.mtx);
+ nmp->nm_nfstcpstate.flags &= ~NFS_TCP_FORCE_RECONNECT;
+ mtx_unlock(&nmp->nm_nfstcpstate.mtx);
+
/*
* Loop through outstanding request list and fix up all requests
* on old socket.
*/
+ mtx_lock(&nfs_reqq_mtx);
TAILQ_FOREACH(rp, &nfs_reqq, r_chain) {
if (rp->r_nmp == nmp)
rp->r_flags |= R_MUSTRESEND;
}
+ mtx_unlock(&nfs_reqq_mtx);
return (0);
}
@@ -394,7 +423,12 @@ nfs_disconnect(struct nfsmount *nmp)
if (nmp->nm_so) {
so = nmp->nm_so;
nmp->nm_so = NULL;
- soshutdown(so, SHUT_RDWR);
+ SOCKBUF_LOCK(&so->so_rcv);
+ so->so_upcallarg = NULL;
+ so->so_upcall = NULL;
+ so->so_rcv.sb_flags &= ~SB_UPCALL;
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ soshutdown(so, SHUT_WR);
soclose(so);
}
}
@@ -406,9 +440,7 @@ nfs_safedisconnect(struct nfsmount *nmp)
bzero(&dummyreq, sizeof(dummyreq));
dummyreq.r_nmp = nmp;
- nfs_rcvlock(&dummyreq);
nfs_disconnect(nmp);
- nfs_rcvunlock(&dummyreq);
}
/*
@@ -488,79 +520,54 @@ nfs_send(struct socket *so, struct sockaddr *nam, struct mbuf *top,
return (error);
}
-/*
- * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
- * done by soreceive(), but for SOCK_STREAM we must deal with the Record
- * Mark and consolidate the data into a new mbuf list.
- * nb: Sometimes TCP passes the data up to soreceive() in long lists of
- * small mbufs.
- * For SOCK_STREAM we must be very careful to read an entire record once
- * we have read any of it, even if the system call has been interrupted.
- */
-static int
-nfs_receive(struct nfsreq *rep, struct sockaddr **aname, struct mbuf **mp)
+int nfs_mrep_before_tsleep = 0;
+
+int
+nfs_reply(struct nfsreq *rep)
{
- struct socket *so;
- struct uio auio;
- struct iovec aio;
- struct mbuf *m;
- struct mbuf *control;
- u_int32_t len;
- struct sockaddr **getnam;
- int error, error2, sotype, rcvflg;
- struct thread *td = curthread; /* XXX */
+ register struct socket *so;
+ register struct mbuf *m;
+ int error, sotype, slpflag;
NET_ASSERT_GIANT();
- /*
- * Set up arguments for soreceive()
- */
- *mp = NULL;
- *aname = NULL;
sotype = rep->r_nmp->nm_sotype;
-
/*
* For reliable protocols, lock against other senders/receivers
* in case a reconnect is necessary.
- * For SOCK_STREAM, first get the Record Mark to find out how much
- * more there is to get.
- * We must lock the socket against other receivers
- * until we have an entire rpc request/reply.
*/
if (sotype != SOCK_DGRAM) {
error = nfs_sndlock(rep);
if (error)
return (error);
tryagain:
- /*
- * Check for fatal errors and resending request.
- */
- /*
- * Ugh: If a reconnect attempt just happened, nm_so
- * would have changed. NULL indicates a failed
- * attempt that has essentially shut down this
- * mount point.
- */
- if (rep->r_mrep || (error = NFS_SIGREP(rep)) != 0) {
+ if (rep->r_mrep) {
nfs_sndunlock(rep);
- return (error == 0 ? EINTR : error);
+ return (0);
+ }
+ if (rep->r_flags & R_SOFTTERM) {
+ nfs_sndunlock(rep);
+ return (EINTR);
}
so = rep->r_nmp->nm_so;
- if (!so) {
+ mtx_lock(&rep->r_nmp->nm_nfstcpstate.mtx);
+ if (!so ||
+ (rep->r_nmp->nm_nfstcpstate.flags & NFS_TCP_FORCE_RECONNECT)) {
+ mtx_unlock(&rep->r_nmp->nm_nfstcpstate.mtx);
error = nfs_reconnect(rep);
if (error) {
nfs_sndunlock(rep);
return (error);
}
goto tryagain;
- }
+ } else
+ mtx_unlock(&rep->r_nmp->nm_nfstcpstate.mtx);
while (rep->r_flags & R_MUSTRESEND) {
- m = m_copym(rep->r_mreq, 0, M_COPYALL, M_TRYWAIT);
+ m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
nfsstats.rpcretries++;
error = nfs_send(so, rep->r_nmp->nm_nam, m, rep);
if (error) {
if (error == EINTR || error == ERESTART ||
- error == EIO ||
(error = nfs_reconnect(rep)) != 0) {
nfs_sndunlock(rep);
return (error);
@@ -569,325 +576,330 @@ tryagain:
}
}
nfs_sndunlock(rep);
- if (sotype == SOCK_STREAM) {
- aio.iov_base = (caddr_t) &len;
- aio.iov_len = sizeof(u_int32_t);
- auio.uio_iov = &aio;
- auio.uio_iovcnt = 1;
- auio.uio_segflg = UIO_SYSSPACE;
- auio.uio_rw = UIO_READ;
- auio.uio_offset = 0;
- auio.uio_resid = sizeof(u_int32_t);
- auio.uio_td = td;
- do {
- rcvflg = MSG_WAITALL;
- error = so->so_proto->pr_usrreqs->pru_soreceive
- (so, NULL, &auio, NULL, NULL, &rcvflg);
- if (error == EWOULDBLOCK) {
- error2 = NFS_SIGREP(rep);
- if (error2)
- return (error2);
- }
- } while (0);
- if (!error && auio.uio_resid > 0) {
- /*
- * Don't log a 0 byte receive; it means
- * that the socket has been closed, and
- * can happen during normal operation
- * (forcible unmount or Solaris server).
- */
- if (auio.uio_resid != sizeof (u_int32_t))
- log(LOG_INFO,
- "short receive (%d/%d) from nfs server %s\n",
- (int)(sizeof(u_int32_t) - auio.uio_resid),
- (int)sizeof(u_int32_t),
- rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
- error = EPIPE;
- }
- if (error)
- goto errout;
- len = ntohl(len) & ~0x80000000;
- /*
- * This is SERIOUS! We are out of sync with the sender
- * and forcing a disconnect/reconnect is all I can do.
- */
- if (len > NFS_MAXPACKET) {
- log(LOG_ERR, "%s (%d) from nfs server %s\n",
- "impossible packet length",
- len,
- rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
- error = EFBIG;
- goto errout;
- }
- auio.uio_resid = len;
- do {
- rcvflg = MSG_WAITALL;
- error = so->so_proto->pr_usrreqs->pru_soreceive
- (so, NULL,
- &auio, mp, NULL, &rcvflg);
- } while (0);
- if (!error && auio.uio_resid > 0) {
- if (len != auio.uio_resid)
- log(LOG_INFO,
- "short receive (%d/%d) from nfs server %s\n",
- len - auio.uio_resid, len,
- rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
- error = EPIPE;
- }
- } else {
- /*
- * NB: Since uio_resid is big, MSG_WAITALL is ignored
- * and soreceive() will return when it has either a
- * control msg or a data msg.
- * We have no use for control msg., but must grab them
- * and then throw them away so we know what is going
- * on.
- */
- auio.uio_resid = len = 100000000; /* Anything Big */
- auio.uio_td = td;
- do {
- rcvflg = 0;
- error = so->so_proto->pr_usrreqs->pru_soreceive
- (so, NULL,
- &auio, mp, &control, &rcvflg);
- if (control)
- m_freem(control);
- if (error == EWOULDBLOCK && rep) {
- error2 = NFS_SIGREP(rep);
- if (error2)
- return (error2);
- }
- } while (!error && *mp == NULL && control);
- if ((rcvflg & MSG_EOR) == 0)
- printf("Egad!!\n");
- if (!error && *mp == NULL)
- error = EPIPE;
- len -= auio.uio_resid;
- }
-errout:
- if (error && error != EINTR && error != EIO &&
- error != ERESTART) {
- m_freem(*mp);
- *mp = NULL;
- if (error != EPIPE && error != EWOULDBLOCK)
- log(LOG_INFO,
- "receive error %d from nfs server %s\n",
- error,
- rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
- error = nfs_sndlock(rep);
- if (!error) {
- error = nfs_reconnect(rep);
- if (!error)
- goto tryagain;
- else
- nfs_sndunlock(rep);
- }
- }
- } else {
+ }
+ slpflag = 0;
+ if (rep->r_nmp->nm_flag & NFSMNT_INT)
+ slpflag = PCATCH;
+ mtx_lock(&nfs_reply_mtx);
+ if (rep->r_mrep != NULL) {
/*
- * We may have failed while rebinding the datagram socket
- * so attempt a rebind here.
+ * This is a very rare race, but it does occur. The reply
+ * could come in and the wakeup could happen before the
+ * process tsleeps(). Blocking here without checking for
+ * this results in a missed wakeup(), blocking this request
+ * forever. The 2 reasons why this could happen are a context
+ * switch in the stack after the request is sent out, or heavy
+ * interrupt activity pinning down the process within the window.
+ * (after the request is sent).
*/
- if ((so = rep->r_nmp->nm_so) == NULL) {
+ mtx_unlock(&nfs_reply_mtx);
+ nfs_mrep_before_tsleep++;
+ return (0);
+ }
+ error = msleep((caddr_t)rep, &nfs_reply_mtx,
+ slpflag | (PZERO - 1), "nfsreq", 0);
+ mtx_unlock(&nfs_reply_mtx);
+ if (error == EINTR || error == ERESTART)
+ /* NFS operations aren't restartable. Map ERESTART to EINTR */
+ return (EINTR);
+ if (rep->r_flags & R_SOFTTERM)
+ /* Request was terminated because we exceeded the retries (soft mount) */
+ return (ETIMEDOUT);
+ if (sotype == SOCK_STREAM) {
+ mtx_lock(&rep->r_nmp->nm_nfstcpstate.mtx);
+ if (((rep->r_nmp->nm_nfstcpstate.flags & NFS_TCP_FORCE_RECONNECT) ||
+ (rep->r_flags & R_MUSTRESEND))) {
+ mtx_unlock(&rep->r_nmp->nm_nfstcpstate.mtx);
error = nfs_sndlock(rep);
- if (!error) {
- error = nfs_reconnect(rep);
- nfs_sndunlock(rep);
- }
if (error)
return (error);
- so = rep->r_nmp->nm_so;
- }
- if (so->so_state & SS_ISCONNECTED)
- getnam = NULL;
- else
- getnam = aname;
- auio.uio_resid = len = 1000000;
- auio.uio_td = td;
- do {
- rcvflg = 0;
- error = so->so_proto->pr_usrreqs->pru_soreceive
- (so, getnam, &auio, mp,
- NULL, &rcvflg);
- if (error) {
- error2 = NFS_SIGREP(rep);
- if (error2) {
- error = error2;
- goto dgramout;
- }
- }
- if (error) {
- error2 = nfs_sndlock(rep);
- if (!error2) {
- error2 = nfs_reconnect(rep);
- if (error2)
- error = error2;
- else
- so = rep->r_nmp->nm_so;
- nfs_sndunlock(rep);
- } else {
- error = error2;
- }
- }
- } while (error == EWOULDBLOCK);
-dgramout:
- len -= auio.uio_resid;
- }
- if (error) {
- m_freem(*mp);
- *mp = NULL;
+ goto tryagain;
+ } else
+ mtx_unlock(&rep->r_nmp->nm_nfstcpstate.mtx);
}
- /*
- * Search for any mbufs that are not a multiple of 4 bytes long
- * or with m_data not longword aligned.
- * These could cause pointer alignment problems, so copy them to
- * well aligned mbufs.
- */
- nfs_realign(mp, 5 * NFSX_UNSIGNED);
return (error);
}
/*
- * Implement receipt of reply on a socket.
- * We must search through the list of received datagrams matching them
- * with outstanding requests using the xid, until ours is found.
+ * XXX TO DO
+ * Make nfs_realign() non-blocking. Also make nfsm_dissect() nonblocking.
*/
-/* ARGSUSED */
-static int
-nfs_reply(struct nfsreq *myrep)
+static void
+nfs_clnt_match_xid(struct socket *so,
+ struct nfsmount *nmp,
+ struct mbuf *mrep)
{
- struct nfsreq *rep;
- struct nfsmount *nmp = myrep->r_nmp;
- int32_t t1;
- struct mbuf *mrep, *md;
- struct sockaddr *nam;
- u_int32_t rxid, *tl;
+ struct mbuf *md;
caddr_t dpos;
+ u_int32_t rxid, *tl;
+ struct nfsreq *rep;
+ register int32_t t1;
int error;
-
+
/*
- * Loop around until we get our own reply
+ * Search for any mbufs that are not a multiple of 4 bytes long
+ * or with m_data not longword aligned.
+ * These could cause pointer alignment problems, so copy them to
+ * well aligned mbufs.
*/
- for (;;) {
- /*
- * Lock against other receivers so that I don't get stuck in
- * sbwait() after someone else has received my reply for me.
- * Also necessary for connection based protocols to avoid
- * race conditions during a reconnect.
- * If nfs_rcvlock() returns EALREADY, that means that
- * the reply has already been recieved by another
- * process and we can return immediately. In this
- * case, the lock is not taken to avoid races with
- * other processes.
- */
- error = nfs_rcvlock(myrep);
- if (error == EALREADY)
- return (0);
- if (error)
- return (error);
- /*
- * Get the next Rpc reply off the socket
- */
- error = nfs_receive(myrep, &nam, &mrep);
- nfs_rcvunlock(myrep);
- if (error) {
+ if (nfs_realign(&mrep, 5 * NFSX_UNSIGNED) == ENOMEM) {
+ m_freem(mrep);
+ nfsstats.rpcinvalid++;
+ return;
+ }
+
+ /*
+ * Get the xid and check that it is an rpc reply
+ */
+ md = mrep;
+ dpos = mtod(md, caddr_t);
+ tl = nfsm_dissect_nonblock(u_int32_t *, 2*NFSX_UNSIGNED);
+ rxid = *tl++;
+ if (*tl != rpc_reply) {
+ m_freem(mrep);
+nfsmout:
+ nfsstats.rpcinvalid++;
+ return;
+ }
+ mtx_lock(&nfs_reqq_mtx);
+ /*
+ * Loop through the request list to match up the reply
+ * Iff no match, just drop the datagram
+ */
+ TAILQ_FOREACH(rep, &nfs_reqq, r_chain) {
+ if (rep->r_mrep == NULL && rxid == rep->r_xid) {
+ /* Found it.. */
+ rep->r_mrep = mrep;
+ rep->r_md = md;
+ rep->r_dpos = dpos;
/*
- * Ignore routing errors on connectionless protocols??
+ * Update congestion window.
+ * Do the additive increase of
+ * one rpc/rtt.
*/
- if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
- nmp->nm_so->so_error = 0;
- if (myrep->r_flags & R_GETONEREP)
- return (0);
- continue;
+ if (nmp->nm_cwnd <= nmp->nm_sent) {
+ nmp->nm_cwnd +=
+ (NFS_CWNDSCALE * NFS_CWNDSCALE +
+ (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
+ if (nmp->nm_cwnd > NFS_MAXCWND)
+ nmp->nm_cwnd = NFS_MAXCWND;
+ }
+ if (rep->r_flags & R_SENT) {
+ rep->r_flags &= ~R_SENT;
+ nmp->nm_sent -= NFS_CWNDSCALE;
}
- return (error);
+ /*
+ * Update rtt using a gain of 0.125 on the mean
+ * and a gain of 0.25 on the deviation.
+ */
+ if (rep->r_flags & R_TIMING) {
+ /*
+ * Since the timer resolution of
+ * NFS_HZ is so course, it can often
+ * result in r_rtt == 0. Since
+ * r_rtt == N means that the actual
+ * rtt is between N+dt and N+2-dt ticks,
+ * add 1.
+ */
+ t1 = rep->r_rtt + 1;
+ t1 -= (NFS_SRTT(rep) >> 3);
+ NFS_SRTT(rep) += t1;
+ if (t1 < 0)
+ t1 = -t1;
+ t1 -= (NFS_SDRTT(rep) >> 2);
+ NFS_SDRTT(rep) += t1;
+ }
+ nmp->nm_timeouts = 0;
+ break;
}
- if (nam)
- FREE(nam, M_SONAME);
+ }
+ /*
+ * If not matched to a request, drop it.
+ * If it's mine, wake up requestor.
+ */
+ if (rep == 0) {
+ nfsstats.rpcunexpected++;
+ m_freem(mrep);
+ } else {
+ mtx_lock(&nfs_reply_mtx);
+ wakeup((caddr_t)rep);
+ mtx_unlock(&nfs_reply_mtx);
+ }
+ mtx_unlock(&nfs_reqq_mtx);
+}
- /*
- * Get the xid and check that it is an rpc reply
- */
- md = mrep;
- dpos = mtod(md, caddr_t);
- tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED);
- rxid = *tl++;
- if (*tl != rpc_reply) {
- nfsstats.rpcinvalid++;
- m_freem(mrep);
-nfsmout:
- if (myrep->r_flags & R_GETONEREP)
- return (0);
- continue;
- }
+static void
+nfs_mark_for_reconnect(struct nfsmount *nmp)
+{
+ struct nfsreq *rp;
- /*
- * Loop through the request list to match up the reply
- * Iff no match, just drop the datagram
- */
- TAILQ_FOREACH(rep, &nfs_reqq, r_chain) {
- if (rep->r_mrep == NULL && rxid == rep->r_xid) {
- /* Found it.. */
- rep->r_mrep = mrep;
- rep->r_md = md;
- rep->r_dpos = dpos;
- /*
- * Update congestion window.
- * Do the additive increase of
- * one rpc/rtt.
- */
- if (nmp->nm_cwnd <= nmp->nm_sent) {
- nmp->nm_cwnd +=
- (NFS_CWNDSCALE * NFS_CWNDSCALE +
- (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
- if (nmp->nm_cwnd > NFS_MAXCWND)
- nmp->nm_cwnd = NFS_MAXCWND;
- }
- if (rep->r_flags & R_SENT) {
- rep->r_flags &= ~R_SENT;
- nmp->nm_sent -= NFS_CWNDSCALE;
- }
- /*
- * Update rtt using a gain of 0.125 on the mean
- * and a gain of 0.25 on the deviation.
- */
- if (rep->r_flags & R_TIMING) {
- /*
- * Since the timer resolution of
- * NFS_HZ is so course, it can often
- * result in r_rtt == 0. Since
- * r_rtt == N means that the actual
- * rtt is between N+dt and N+2-dt ticks,
- * add 1.
- */
- t1 = rep->r_rtt + 1;
- t1 -= (NFS_SRTT(rep) >> 3);
- NFS_SRTT(rep) += t1;
- if (t1 < 0)
- t1 = -t1;
- t1 -= (NFS_SDRTT(rep) >> 2);
- NFS_SDRTT(rep) += t1;
+ mtx_lock(&nmp->nm_nfstcpstate.mtx);
+ nmp->nm_nfstcpstate.flags |= NFS_TCP_FORCE_RECONNECT;
+ mtx_unlock(&nmp->nm_nfstcpstate.mtx);
+ /*
+ * Wakeup all processes that are waiting for replies
+ * on this mount point. One of them does the reconnect.
+ */
+ mtx_lock(&nfs_reqq_mtx);
+ TAILQ_FOREACH(rp, &nfs_reqq, r_chain) {
+ if (rp->r_nmp == nmp)
+ wakeup(rp);
+ }
+ mtx_unlock(&nfs_reqq_mtx);
+}
+
+static int
+nfstcp_readable(struct socket *so, int bytes)
+{
+ int retval;
+
+ SOCKBUF_LOCK(&so->so_rcv);
+ retval = (so->so_rcv.sb_cc >= (bytes) ||
+ (so->so_state & SBS_CANTRCVMORE) ||
+ so->so_error);
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ return (retval);
+}
+
+#define nfstcp_marker_readable(so) nfstcp_readable(so, sizeof(u_int32_t))
+
+static void
+nfs_clnt_tcp_soupcall(struct socket *so, void *arg, int waitflag)
+{
+ struct nfsmount *nmp = (struct nfsmount *)arg;
+ struct mbuf *mp = NULL;
+ struct uio auio;
+ int error;
+ u_int32_t len;
+ int rcvflg;
+
+ /*
+ * Don't pick any more data from the socket if we've marked the
+ * mountpoint for reconnect.
+ */
+ mtx_lock(&nmp->nm_nfstcpstate.mtx);
+ if (nmp->nm_nfstcpstate.flags & NFS_TCP_FORCE_RECONNECT) {
+ mtx_unlock(&nmp->nm_nfstcpstate.mtx);
+ return;
+ } else
+ mtx_unlock(&nmp->nm_nfstcpstate.mtx);
+ auio.uio_td = curthread;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = UIO_READ;
+ for ( ; ; ) {
+ if (nmp->nm_nfstcpstate.flags & NFS_TCP_EXPECT_RPCMARKER) {
+ if (!nfstcp_marker_readable(so)) {
+ /* Marker is not readable */
+ return;
+ }
+ auio.uio_resid = sizeof(u_int32_t);
+ auio.uio_iov = NULL;
+ auio.uio_iovcnt = 0;
+ mp = NULL;
+ rcvflg = (MSG_DONTWAIT | MSG_SOCALLBCK);
+ error = so->so_proto->pr_usrreqs->pru_soreceive
+ (so, (struct sockaddr **)0,
+ &auio, &mp, (struct mbuf **)0, &rcvflg);
+ /*
+ * We've already tested that the socket is readable. 2 cases
+ * here, we either read 0 bytes (client closed connection),
+ * or got some other error. In both cases, we tear down the
+ * connection.
+ */
+ if (error || auio.uio_resid > 0) {
+ if (auio.uio_resid > 0) {
+ log(LOG_ERR,
+ "nfs/tcp clnt: Peer closed connection, tearing down TCP connection\n");
+ } else {
+ log(LOG_ERR,
+ "nfs/tcp clnt: Error %d reading socket, tearing down TCP connection\n",
+ error);
}
- nmp->nm_timeouts = 0;
- break;
+ goto mark_reconnect;
}
+ if (mp == NULL)
+ panic("nfs_clnt_tcp_soupcall: Got empty mbuf chain from sorecv\n");
+ len = ntohl(*mtod(mp, u_int32_t *)) & ~0x80000000;
+ m_freem(mp);
+ /*
+ * This is SERIOUS! We are out of sync with the sender
+ * and forcing a disconnect/reconnect is all I can do.
+ */
+ if (len > NFS_MAXPACKET) {
+ log(LOG_ERR, "%s (%d) from nfs server %s\n",
+ "impossible packet length",
+ len,
+ nmp->nm_mountp->mnt_stat.f_mntfromname);
+ goto mark_reconnect;
+ }
+ nmp->nm_nfstcpstate.rpcresid = len;
+ nmp->nm_nfstcpstate.flags &= ~(NFS_TCP_EXPECT_RPCMARKER);
}
- /*
- * If not matched to a request, drop it.
- * If it's mine, get out.
+ /*
+ * Processed RPC marker or no RPC marker to process.
+ * Pull in and process data.
*/
- if (rep == 0) {
- nfsstats.rpcunexpected++;
- m_freem(mrep);
- } else if (rep == myrep) {
- if (rep->r_mrep == NULL)
- panic("nfsreply nil");
- return (0);
+ if (nmp->nm_nfstcpstate.rpcresid > 0) {
+ if (!nfstcp_readable(so, nmp->nm_nfstcpstate.rpcresid)) {
+ /* All data not readable */
+ return;
+ }
+ auio.uio_resid = nmp->nm_nfstcpstate.rpcresid;
+ auio.uio_iov = NULL;
+ auio.uio_iovcnt = 0;
+ mp = NULL;
+ rcvflg = (MSG_DONTWAIT | MSG_SOCALLBCK);
+ error = so->so_proto->pr_usrreqs->pru_soreceive
+ (so, (struct sockaddr **)0,
+ &auio, &mp, (struct mbuf **)0, &rcvflg);
+ if (error || auio.uio_resid > 0) {
+ if (auio.uio_resid > 0) {
+ log(LOG_ERR,
+ "nfs/tcp clnt: Peer closed connection, tearing down TCP connection\n");
+ } else {
+ log(LOG_ERR,
+ "nfs/tcp clnt: Error %d reading socket, tearing down TCP connection\n",
+ error);
+ }
+ goto mark_reconnect;
+ }
+ if (mp == NULL)
+ panic("nfs_clnt_tcp_soupcall: Got empty mbuf chain from sorecv\n");
+ nmp->nm_nfstcpstate.rpcresid = 0;
+ nmp->nm_nfstcpstate.flags |= NFS_TCP_EXPECT_RPCMARKER;
+ /* We got the entire RPC reply. Match XIDs and wake up requestor */
+ nfs_clnt_match_xid(so, nmp, mp);
}
- if (myrep->r_flags & R_GETONEREP)
- return (0);
}
+
+mark_reconnect:
+ nfs_mark_for_reconnect(nmp);
+}
+
+static void
+nfs_clnt_udp_soupcall(struct socket *so, void *arg, int waitflag)
+{
+ struct nfsmount *nmp = (struct nfsmount *)arg;
+ struct uio auio;
+ struct mbuf *mp = NULL;
+ struct mbuf *control = NULL;
+ int error, rcvflag;
+
+ auio.uio_resid = 1000000;
+ auio.uio_td = curthread;
+ rcvflag = MSG_DONTWAIT;
+ auio.uio_resid = 1000000000;
+ do {
+ mp = control = NULL;
+ error = so->so_proto->pr_usrreqs->pru_soreceive(so,
+ NULL, &auio, &mp,
+ &control, &rcvflag);
+ if (control)
+ m_freem(control);
+ if (mp)
+ nfs_clnt_match_xid(so, nmp, mp);
+ } while (mp && !error);
}
/*
@@ -930,6 +942,7 @@ nfs_request(struct vnode *vp, struct mbuf *mrest, int procnum,
if ((nmp->nm_flag & NFSMNT_NFSV4) != 0)
return nfs4_request(vp, mrest, procnum, td, cred, mrp, mdp, dposp);
MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
+ rep->r_mrep = rep->r_md = NULL;
rep->r_nmp = nmp;
rep->r_vp = vp;
rep->r_td = td;
@@ -983,9 +996,11 @@ tryagain:
* to put it LAST so timer finds oldest requests first.
*/
s = splsoftclock();
+ mtx_lock(&nfs_reqq_mtx);
if (TAILQ_EMPTY(&nfs_reqq))
callout_reset(&nfs_callout, nfs_ticks, nfs_timer, NULL);
TAILQ_INSERT_TAIL(&nfs_reqq, rep, r_chain);
+ mtx_unlock(&nfs_reqq_mtx);
/*
* If backing off another request or avoiding congestion, don't
@@ -1021,9 +1036,11 @@ tryagain:
* RPC done, unlink the request.
*/
s = splsoftclock();
+ mtx_lock(&nfs_reqq_mtx);
TAILQ_REMOVE(&nfs_reqq, rep, r_chain);
if (TAILQ_EMPTY(&nfs_reqq))
callout_stop(&nfs_callout);
+ mtx_unlock(&nfs_reqq_mtx);
splx(s);
/*
@@ -1044,11 +1061,22 @@ tryagain:
md = rep->r_md;
dpos = rep->r_dpos;
if (error) {
+ /*
+ * If we got interrupted by a signal in nfs_reply(), there's
+ * a very small window where the reply could've come in before
+ * this process got scheduled in. To handle that case, we need
+ * to free the reply if it was delivered.
+ */
+ if (rep->r_mrep != NULL)
+ m_freem(rep->r_mrep);
m_freem(rep->r_mreq);
free((caddr_t)rep, M_NFSREQ);
return (error);
}
+ if (rep->r_mrep == NULL)
+ panic("nfs_request: rep->r_mrep shouldn't be NULL if no error\n");
+
/*
* break down the rpc header and check if ok
*/
@@ -1129,6 +1157,20 @@ nfsmout:
* Scan the nfsreq list and retranmit any requests that have timed out
* To avoid retransmission attempts on STREAM sockets (in the future) make
* sure to set the r_retry field to 0 (implies nm_retry == 0).
+ *
+ * XXX -
+ * For now, since we don't register MPSAFE callouts for the NFS client -
+ * softclock() acquires Giant before calling us. That prevents req entries
+ * from being removed from the list (from nfs_request()). But we still
+ * acquire the nfs reqq mutex to make sure the state of individual req
+ * entries is not modified from RPC reply handling (from socket callback)
+ * while nfs_timer is walking the list of reqs.
+ * The nfs reqq lock cannot be held while we do the pru_send() because of a
+ * lock ordering violation. The NFS client socket callback acquires
+ * inp_lock->nfsreq mutex and pru_send acquires inp_lock. So we drop the
+ * reqq mutex (and reacquire it after the pru_send()). This won't work
+ * when we move to fine grained locking for NFS. When we get to that point,
+ * a rewrite of nfs_timer() will be needed.
*/
void
nfs_timer(void *arg)
@@ -1143,6 +1185,7 @@ nfs_timer(void *arg)
getmicrouptime(&now);
s = splnet();
+ mtx_lock(&nfs_reqq_mtx);
TAILQ_FOREACH(rep, &nfs_reqq, r_chain) {
nmp = rep->r_nmp;
if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
@@ -1202,12 +1245,14 @@ nfs_timer(void *arg)
(rep->r_flags & R_SENT) ||
nmp->nm_sent < nmp->nm_cwnd) &&
(m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
+ mtx_unlock(&nfs_reqq_mtx);
if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
error = (*so->so_proto->pr_usrreqs->pru_send)
(so, 0, m, NULL, NULL, curthread);
else
error = (*so->so_proto->pr_usrreqs->pru_send)
(so, 0, m, nmp->nm_nam, NULL, curthread);
+ mtx_lock(&nfs_reqq_mtx);
if (error) {
if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
so->so_error = 0;
@@ -1235,6 +1280,7 @@ nfs_timer(void *arg)
}
}
}
+ mtx_unlock(&nfs_reqq_mtx);
splx(s);
callout_reset(&nfs_callout, nfs_ticks, nfs_timer, NULL);
}
@@ -1252,20 +1298,24 @@ nfs_nmcancelreqs(nmp)
int i, s;
s = splnet();
+ mtx_lock(&nfs_reqq_mtx);
TAILQ_FOREACH(req, &nfs_reqq, r_chain) {
if (nmp != req->r_nmp || req->r_mrep != NULL ||
(req->r_flags & R_SOFTTERM))
continue;
nfs_softterm(req);
}
+ mtx_unlock(&nfs_reqq_mtx);
splx(s);
for (i = 0; i < 30; i++) {
s = splnet();
+ mtx_lock(&nfs_reqq_mtx);
TAILQ_FOREACH(req, &nfs_reqq, r_chain) {
if (nmp == req->r_nmp)
break;
}
+ mtx_unlock(&nfs_reqq_mtx);
splx(s);
if (req == NULL)
return (0);
@@ -1289,6 +1339,125 @@ nfs_softterm(struct nfsreq *rep)
rep->r_nmp->nm_sent -= NFS_CWNDSCALE;
rep->r_flags &= ~R_SENT;
}
+ /*
+ * Request terminated, wakeup the blocked process, so that we
+ * can return EINTR back.
+ */
+ wakeup((caddr_t)rep);
+}
+
+/*
+ * Any signal that can interrupt an NFS operation in an intr mount
+ * should be added to this set.
+ */
+int nfs_sig_set[] = {
+ SIGINT,
+ SIGTERM,
+ SIGHUP,
+ SIGKILL,
+ SIGQUIT
+};
+
+/*
+ * Check to see if one of the signals in our subset is pending on
+ * the process (in an intr mount).
+ */
+static int
+nfs_sig_pending(sigset_t set)
+{
+ int i;
+
+ for (i = 0 ; i < sizeof(nfs_sig_set)/sizeof(int) ; i++)
+ if (SIGISMEMBER(set, nfs_sig_set[i]))
+ return (1);
+ return (0);
+}
+
+/*
+ * The set/restore sigmask functions are used to (temporarily) overwrite
+ * the process p_sigmask during an RPC call (for example). These are also
+ * used in other places in the NFS client that might tsleep().
+ */
+void
+nfs_set_sigmask(struct thread *td, sigset_t *oldset)
+{
+ sigset_t newset;
+ int i;
+ struct proc *p;
+
+ SIGFILLSET(newset);
+ if (td == NULL)
+ td = curthread; /* XXX */
+ p = td->td_proc;
+ /* Remove the NFS set of signals from newset */
+ PROC_LOCK(p);
+ mtx_lock(&p->p_sigacts->ps_mtx);
+ for (i = 0 ; i < sizeof(nfs_sig_set)/sizeof(int) ; i++) {
+ /*
+ * But make sure we leave the ones already masked
+ * by the process, ie. remove the signal from the
+ * temporary signalmask only if it wasn't already
+ * in p_sigmask.
+ */
+ if (!SIGISMEMBER(td->td_sigmask, nfs_sig_set[i]) &&
+ !SIGISMEMBER(p->p_sigacts->ps_sigignore, nfs_sig_set[i]))
+ SIGDELSET(newset, nfs_sig_set[i]);
+ }
+ mtx_unlock(&p->p_sigacts->ps_mtx);
+ PROC_UNLOCK(p);
+ kern_sigprocmask(td, SIG_SETMASK, &newset, oldset, 0);
+}
+
+void
+nfs_restore_sigmask(struct thread *td, sigset_t *set)
+{
+ if (td == NULL)
+ td = curthread; /* XXX */
+ kern_sigprocmask(td, SIG_SETMASK, set, NULL, 0);
+}
+
+/*
+ * NFS wrapper to msleep(), that shoves a new p_sigmask and restores the
+ * old one after msleep() returns.
+ */
+int
+nfs_msleep(struct thread *td, void *ident, struct mtx *mtx, int priority, char *wmesg, int timo)
+{
+ sigset_t oldset;
+ int error;
+ struct proc *p;
+
+ if ((priority & PCATCH) == 0)
+ return msleep(ident, mtx, priority, wmesg, timo);
+ if (td == NULL)
+ td = curthread; /* XXX */
+ nfs_set_sigmask(td, &oldset);
+ error = msleep(ident, mtx, priority, wmesg, timo);
+ nfs_restore_sigmask(td, &oldset);
+ p = td->td_proc;
+ return (error);
+}
+
+/*
+ * NFS wrapper to tsleep(), that shoves a new p_sigmask and restores the
+ * old one after tsleep() returns.
+ */
+int
+nfs_tsleep(struct thread *td, void *ident, int priority, char *wmesg, int timo)
+{
+ sigset_t oldset;
+ int error;
+ struct proc *p;
+
+ if ((priority & PCATCH) == 0)
+ return tsleep(ident, priority, wmesg, timo);
+ if (td == NULL)
+ td = curthread; /* XXX */
+ nfs_set_sigmask(td, &oldset);
+ error = tsleep(ident, priority, wmesg, timo);
+ nfs_restore_sigmask(td, &oldset);
+ p = td->td_proc;
+ return (error);
}
/*
@@ -1320,7 +1489,7 @@ nfs_sigintr(struct nfsmount *nmp, struct nfsreq *rep, struct thread *td)
mtx_lock(&p->p_sigacts->ps_mtx);
SIGSETNAND(tmpset, p->p_sigacts->ps_sigignore);
mtx_unlock(&p->p_sigacts->ps_mtx);
- if (SIGNOTEMPTY(p->p_siglist) && NFSINT_SIGMASK(tmpset)) {
+ if (SIGNOTEMPTY(p->p_siglist) && nfs_sig_pending(tmpset)) {
PROC_UNLOCK(p);
return (EINTR);
}
@@ -1378,60 +1547,6 @@ nfs_sndunlock(struct nfsreq *rep)
}
}
-static int
-nfs_rcvlock(struct nfsreq *rep)
-{
- int *statep = &rep->r_nmp->nm_state;
- int error, slpflag, slptimeo = 0;
-
- if (rep->r_nmp->nm_flag & NFSMNT_INT)
- slpflag = PCATCH;
- else
- slpflag = 0;
- while (*statep & NFSSTA_RCVLOCK) {
- error = nfs_sigintr(rep->r_nmp, rep, rep->r_td);
- if (error)
- return (error);
- *statep |= NFSSTA_WANTRCV;
- (void) tsleep(statep, slpflag | (PZERO - 1), "nfsrcvlk",
- slptimeo);
- /*
- * If our reply was recieved while we were sleeping,
- * then just return without taking the lock to avoid a
- * situation where a single iod could 'capture' the
- * recieve lock.
- */
- if (rep->r_mrep != NULL)
- return (EALREADY);
- if (slpflag == PCATCH) {
- slpflag = 0;
- slptimeo = 2 * hz;
- }
- }
- /* Always fail if our request has been cancelled. */
- if (rep != NULL && (error = NFS_SIGREP(rep)) != 0)
- return (error);
- *statep |= NFSSTA_RCVLOCK;
- return (0);
-}
-
-/*
- * Unlock the stream socket for others.
- */
-static void
-nfs_rcvunlock(struct nfsreq *rep)
-{
- int *statep = &rep->r_nmp->nm_state;
-
- if ((*statep & NFSSTA_RCVLOCK) == 0)
- panic("nfs rcvunlock");
- *statep &= ~NFSSTA_RCVLOCK;
- if (*statep & NFSSTA_WANTRCV) {
- *statep &= ~NFSSTA_WANTRCV;
- wakeup(statep);
- }
-}
-
/*
* nfs_realign:
*
@@ -1446,8 +1561,15 @@ nfs_rcvunlock(struct nfsreq *rep)
* We would prefer to avoid this situation entirely. The situation does
* not occur with NFS/UDP and is supposed to only occassionally occur
* with TCP. Use vfs.nfs.realign_count and realign_test to check this.
+ *
+ * XXX - This still looks buggy. If there are multiple mbufs in the mbuf chain
+ * passed in that are unaligned, the first loop will allocate multiple new
+ * mbufs. But then, it doesn't seem to chain these together. So, if there are
+ * multiple unaligned mbufs, we're looking at a pretty serious mbuf leak.
+ * But, this has been how it is, perhaps the misalignment only happens in the head
+ * of the chain.
*/
-static void
+static int
nfs_realign(struct mbuf **pm, int hsiz)
{
struct mbuf *m;
@@ -1457,9 +1579,15 @@ nfs_realign(struct mbuf **pm, int hsiz)
++nfs_realign_test;
while ((m = *pm) != NULL) {
if ((m->m_len & 0x3) || (mtod(m, intptr_t) & 0x3)) {
- MGET(n, M_TRYWAIT, MT_DATA);
+ MGET(n, M_DONTWAIT, MT_DATA);
+ if (n == NULL)
+ return (ENOMEM);
if (m->m_len >= MINCLSIZE) {
- MCLGET(n, M_TRYWAIT);
+ MCLGET(n, M_DONTWAIT);
+ if (n->m_ext.ext_buf == NULL) {
+ m_freem(n);
+ return (ENOMEM);
+ }
}
n->m_len = 0;
break;
@@ -1480,6 +1608,7 @@ nfs_realign(struct mbuf **pm, int hsiz)
m_freem(*pm);
*pm = n;
}
+ return (0);
}
diff --git a/sys/nfsclient/nfs_subs.c b/sys/nfsclient/nfs_subs.c
index 8668a99..9552f95 100644
--- a/sys/nfsclient/nfs_subs.c
+++ b/sys/nfsclient/nfs_subs.c
@@ -94,6 +94,8 @@ int nfs_ticks;
int nfs_pbuf_freecnt = -1; /* start out unlimited */
struct nfs_reqq nfs_reqq;
+struct mtx nfs_reqq_mtx;
+struct mtx nfs_reply_mtx;
struct nfs_bufq nfs_bufq;
/*
@@ -412,6 +414,8 @@ nfs_init(struct vfsconf *vfsp)
*/
TAILQ_INIT(&nfs_reqq);
callout_init(&nfs_callout, 0);
+ mtx_init(&nfs_reqq_mtx, "NFS reqq lock", NULL, MTX_DEF);
+ mtx_init(&nfs_reply_mtx, "Synch NFS reply posting", NULL, MTX_DEF);
nfs_pbuf_freecnt = nswbuf / 2 + 1;
diff --git a/sys/nfsclient/nfs_vfsops.c b/sys/nfsclient/nfs_vfsops.c
index 801335b..ff15eb1 100644
--- a/sys/nfsclient/nfs_vfsops.c
+++ b/sys/nfsclient/nfs_vfsops.c
@@ -817,6 +817,10 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
nfs_decode_args(nmp, argp);
+ if (nmp->nm_sotype == SOCK_STREAM)
+ mtx_init(&nmp->nm_nfstcpstate.mtx, "NFS/TCP state lock",
+ NULL, MTX_DEF);
+
/*
* For Connection based sockets (TCP,...) defer the connect until
* the first request, in case the server is not responding.
@@ -862,6 +866,8 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
return (0);
bad:
+ if (nmp->nm_sotype == SOCK_STREAM)
+ mtx_destroy(&nmp->nm_nfstcpstate.mtx);
nfs_disconnect(nmp);
uma_zfree(nfsmount_zone, nmp);
FREE(nam, M_SONAME);
@@ -903,6 +909,9 @@ nfs_unmount(struct mount *mp, int mntflags, struct thread *td)
nfs_disconnect(nmp);
FREE(nmp->nm_nam, M_SONAME);
+ if (nmp->nm_sotype == SOCK_STREAM)
+ mtx_destroy(&nmp->nm_nfstcpstate.mtx);
+
uma_zfree(nfsmount_zone, nmp);
return (0);
}
diff --git a/sys/nfsclient/nfs_vnops.c b/sys/nfsclient/nfs_vnops.c
index a4dbe18..93d1853 100644
--- a/sys/nfsclient/nfs_vnops.c
+++ b/sys/nfsclient/nfs_vnops.c
@@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$");
#include <sys/lockf.h>
#include <sys/stat.h>
#include <sys/sysctl.h>
+#include <sys/signalvar.h>
#include <vm/vm.h>
#include <vm/vm_object.h>
diff --git a/sys/nfsclient/nfsm_subs.h b/sys/nfsclient/nfsm_subs.h
index a91777e..9ab9b9e 100644
--- a/sys/nfsclient/nfsm_subs.h
+++ b/sys/nfsclient/nfsm_subs.h
@@ -134,7 +134,10 @@ do { \
#define nfsm_request(v, t, p, c) \
do { \
+ sigset_t oldset; \
+ nfs_set_sigmask(p, &oldset); \
error = nfs_request((v), mreq, (t), (p), (c), &mrep, &md, &dpos); \
+ nfs_restore_sigmask(p, &oldset); \
if (error != 0) { \
if (error & NFSERR_RETERR) \
error &= ~NFSERR_RETERR; \
diff --git a/sys/nfsclient/nfsmount.h b/sys/nfsclient/nfsmount.h
index 5064370..7bc593c 100644
--- a/sys/nfsclient/nfsmount.h
+++ b/sys/nfsclient/nfsmount.h
@@ -36,6 +36,14 @@
#ifndef _NFSCLIENT_NFSMOUNT_H_
#define _NFSCLIENT_NFSMOUNT_H_
+struct nfs_tcp_mountstate {
+ int rpcresid;
+#define NFS_TCP_EXPECT_RPCMARKER 0x0001 /* Expect to see a RPC/TCP marker next */
+#define NFS_TCP_FORCE_RECONNECT 0x0002 /* Force a TCP reconnect */
+ int flags;
+ struct mtx mtx;
+};
+
/*
* Mount structure.
* One allocated on every NFS mount.
@@ -79,6 +87,7 @@ struct nfsmount {
struct nfs_rpcops *nm_rpcops;
int nm_tprintf_initial_delay; /* initial delay */
int nm_tprintf_delay; /* interval for messages */
+ struct nfs_tcp_mountstate nm_nfstcpstate;
/* NFSv4 */
uint64_t nm_clientid;
OpenPOWER on IntegriCloud