summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/nfsclient/nfs.h25
-rw-r--r--sys/nfsclient/nfs_socket.c191
-rw-r--r--sys/nfsclient/nfsmount.h4
3 files changed, 158 insertions, 62 deletions
diff --git a/sys/nfsclient/nfs.h b/sys/nfsclient/nfs.h
index c3c54b2..9dc34a7 100644
--- a/sys/nfsclient/nfs.h
+++ b/sys/nfsclient/nfs.h
@@ -257,6 +257,31 @@ extern int nfs_debug;
#endif
+/*
+ * On fast networks, the estimator will try to reduce the
+ * timeout lower than the latency of the server's disks,
+ * which results in too many timeouts, so cap the lower
+ * bound.
+ */
+#define NFS_MINRTO (NFS_HZ >> 2)
+
+/*
+ * Keep the RTO from increasing to unreasonably large values
+ * when a server is not responding.
+ */
+#define NFS_MAXRTO (20 * NFS_HZ)
+
+enum nfs_rto_timer_t {
+ NFS_DEFAULT_TIMER,
+ NFS_GETATTR_TIMER,
+ NFS_LOOKUP_TIMER,
+ NFS_READ_TIMER,
+ NFS_WRITE_TIMER,
+};
+#define NFS_MAX_TIMER (NFS_WRITE_TIMER)
+
+#define NFS_INITRTT (NFS_HZ << 3)
+
vfs_init_t nfs_init;
vfs_uninit_t nfs_uninit;
int nfs_mountroot(struct mount *mp, struct thread *td);
diff --git a/sys/nfsclient/nfs_socket.c b/sys/nfsclient/nfs_socket.c
index d2fd025..5e12939 100644
--- a/sys/nfsclient/nfs_socket.c
+++ b/sys/nfsclient/nfs_socket.c
@@ -79,38 +79,6 @@ __FBSDID("$FreeBSD$");
extern u_int32_t nfs_xid;
-/*
- * Estimate rto for an nfs rpc sent via. an unreliable datagram.
- * Use the mean and mean deviation of rtt for the appropriate type of rpc
- * for the frequent rpcs and a default for the others.
- * The justification for doing "other" this way is that these rpcs
- * happen so infrequently that timer est. would probably be stale.
- * Also, since many of these rpcs are
- * non-idempotent, a conservative timeout is desired.
- * getattr, lookup - A+2D
- * read, write - A+4D
- * other - nm_timeo
- */
-#define NFS_RTO(n, t) \
- ((t) == 0 ? (n)->nm_timeo : \
- ((t) < 3 ? \
- (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
- ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
-#define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
-#define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
-
-/*
- * Defines which timer to use for the procnum.
- * 0 - default
- * 1 - getattr
- * 2 - lookup
- * 3 - read
- * 4 - write
- */
-static int proct[NFS_NPROCS] = {
- 0, 1, 0, 2, 1, 3, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0,
-};
-
static int nfs_realign_test;
static int nfs_realign_count;
static int nfs_bufpackets = 4;
@@ -157,6 +125,132 @@ static void nfs_clnt_udp_soupcall(struct socket *so, void *arg, int waitflag);
extern struct mtx nfs_reqq_mtx;
/*
+ * RTT estimator
+ */
+
+static enum nfs_rto_timer_t nfs_proct[NFS_NPROCS] = {
+ NFS_DEFAULT_TIMER, /* NULL */
+ NFS_GETATTR_TIMER, /* GETATTR */
+ NFS_DEFAULT_TIMER, /* SETATTR */
+ NFS_LOOKUP_TIMER, /* LOOKUP */
+ NFS_GETATTR_TIMER, /* ACCESS */
+ NFS_READ_TIMER, /* READLINK */
+ NFS_READ_TIMER, /* READ */
+ NFS_WRITE_TIMER, /* WRITE */
+ NFS_DEFAULT_TIMER, /* CREATE */
+ NFS_DEFAULT_TIMER, /* MKDIR */
+ NFS_DEFAULT_TIMER, /* SYMLINK */
+ NFS_DEFAULT_TIMER, /* MKNOD */
+ NFS_DEFAULT_TIMER, /* REMOVE */
+ NFS_DEFAULT_TIMER, /* RMDIR */
+ NFS_DEFAULT_TIMER, /* RENAME */
+ NFS_DEFAULT_TIMER, /* LINK */
+ NFS_READ_TIMER, /* READDIR */
+ NFS_READ_TIMER, /* READDIRPLUS */
+ NFS_DEFAULT_TIMER, /* FSSTAT */
+ NFS_DEFAULT_TIMER, /* FSINFO */
+ NFS_DEFAULT_TIMER, /* PATHCONF */
+ NFS_DEFAULT_TIMER, /* COMMIT */
+ NFS_DEFAULT_TIMER, /* NOOP */
+};
+
+/*
+ * Choose the correct RTT timer for this NFS procedure.
+ */
+static inline enum nfs_rto_timer_t
+nfs_rto_timer(u_int32_t procnum)
+{
+ return nfs_proct[procnum];
+}
+
+/*
+ * Initialize the RTT estimator state for a new mount point.
+ */
+static void
+nfs_init_rtt(struct nfsmount *nmp)
+{
+ int i;
+
+ for (i = 0; i < NFS_MAX_TIMER; i++)
+ nmp->nm_srtt[i] = NFS_INITRTT;
+ for (i = 0; i < NFS_MAX_TIMER; i++)
+ nmp->nm_sdrtt[i] = 0;
+}
+
+/*
+ * Update a mount point's RTT estimator state using data from the
+ * passed-in request.
+ *
+ * Use a gain of 0.125 on the mean and a gain of 0.25 on the deviation.
+ *
+ * NB: Since the timer resolution of NFS_HZ is so course, it can often
+ * result in r_rtt == 0. Since r_rtt == N means that the actual RTT is
+ * between N + dt and N + 2 - dt ticks, add 1 before calculating the
+ * update values.
+ */
+static void
+nfs_update_rtt(struct nfsreq *rep)
+{
+ int t1 = rep->r_rtt + 1;
+ int index = nfs_rto_timer(rep->r_procnum) - 1;
+ int *srtt = &rep->r_nmp->nm_srtt[index];
+ int *sdrtt = &rep->r_nmp->nm_sdrtt[index];
+
+ t1 -= *srtt >> 3;
+ *srtt += t1;
+ if (t1 < 0)
+ t1 = -t1;
+ t1 -= *sdrtt >> 2;
+ *sdrtt += t1;
+}
+
+/*
+ * Estimate RTO for an NFS RPC sent via an unreliable datagram.
+ *
+ * Use the mean and mean deviation of RTT for the appropriate type
+ * of RPC for the frequent RPCs and a default for the others.
+ * The justification for doing "other" this way is that these RPCs
+ * happen so infrequently that timer est. would probably be stale.
+ * Also, since many of these RPCs are non-idempotent, a conservative
+ * timeout is desired.
+ *
+ * getattr, lookup - A+2D
+ * read, write - A+4D
+ * other - nm_timeo
+ */
+static int
+nfs_estimate_rto(struct nfsmount *nmp, u_int32_t procnum)
+{
+ enum nfs_rto_timer_t timer = nfs_rto_timer(procnum);
+ int index = timer - 1;
+ int rto;
+
+ switch (timer) {
+ case NFS_GETATTR_TIMER:
+ case NFS_LOOKUP_TIMER:
+ rto = ((nmp->nm_srtt[index] + 3) >> 2) +
+ ((nmp->nm_sdrtt[index] + 1) >> 1);
+ break;
+ case NFS_READ_TIMER:
+ case NFS_WRITE_TIMER:
+ rto = ((nmp->nm_srtt[index] + 7) >> 3) +
+ (nmp->nm_sdrtt[index] + 1);
+ break;
+ default:
+ rto = nmp->nm_timeo;
+ return (rto);
+ }
+
+ if (rto < NFS_MINRTO)
+ rto = NFS_MINRTO;
+ else if (rto > NFS_MAXRTO)
+ rto = NFS_MAXRTO;
+
+ return (rto);
+}
+
+
+/*
* Initialize sockets and congestion for a new NFS connection.
* We do not free the sockaddr if error.
*/
@@ -357,10 +451,7 @@ nfs_connect(struct nfsmount *nmp, struct nfsreq *rep)
mtx_lock(&nmp->nm_mtx);
/* Initialize other non-zero congestion variables */
- nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] =
- nmp->nm_srtt[3] = (NFS_TIMEO << 3);
- nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
- nmp->nm_sdrtt[3] = 0;
+ nfs_init_rtt(nmp);
nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
nmp->nm_sent = 0;
nmp->nm_timeouts = 0;
@@ -685,7 +776,6 @@ nfs_clnt_match_xid(struct socket *so,
caddr_t dpos;
u_int32_t rxid, *tl;
struct nfsreq *rep;
- register int32_t t1;
int error;
/*
@@ -743,27 +833,8 @@ nfsmout:
rep->r_flags &= ~R_SENT;
nmp->nm_sent -= NFS_CWNDSCALE;
}
- /*
- * Update rtt using a gain of 0.125 on the mean
- * and a gain of 0.25 on the deviation.
- */
- if (rep->r_flags & R_TIMING) {
- /*
- * Since the timer resolution of
- * NFS_HZ is so course, it can often
- * result in r_rtt == 0. Since
- * r_rtt == N means that the actual
- * rtt is between N+dt and N+2-dt ticks,
- * add 1.
- */
- t1 = rep->r_rtt + 1;
- t1 -= (NFS_SRTT(rep) >> 3);
- NFS_SRTT(rep) += t1;
- if (t1 < 0)
- t1 = -t1;
- t1 -= (NFS_SDRTT(rep) >> 2);
- NFS_SDRTT(rep) += t1;
- }
+ if (rep->r_flags & R_TIMING)
+ nfs_update_rtt(rep);
nmp->nm_timeouts = 0;
wakeup((caddr_t)rep);
mtx_unlock(&rep->r_mtx);
@@ -1073,7 +1144,7 @@ tryagain:
else
rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
rep->r_rtt = rep->r_rexmit = 0;
- if (proct[procnum] > 0)
+ if (nfs_rto_timer(procnum) != NFS_DEFAULT_TIMER)
rep->r_flags = R_TIMING;
else
rep->r_flags = 0;
@@ -1328,7 +1399,7 @@ nfs_timer(void *arg)
if (nmp->nm_flag & NFSMNT_DUMBTIMR)
timeo = nmp->nm_timeo;
else
- timeo = NFS_RTO(nmp, proct[rep->r_procnum]);
+ timeo = nfs_estimate_rto(nmp, rep->r_procnum);
if (nmp->nm_timeouts > 0)
timeo *= nfs_backoff[nmp->nm_timeouts - 1];
if (rep->r_rtt <= timeo) {
diff --git a/sys/nfsclient/nfsmount.h b/sys/nfsclient/nfsmount.h
index e7b9d0f..4fd2afe 100644
--- a/sys/nfsclient/nfsmount.h
+++ b/sys/nfsclient/nfsmount.h
@@ -64,8 +64,8 @@ struct nfsmount {
struct sockaddr *nm_nam; /* Addr of server */
int nm_timeo; /* Init timer for NFSMNT_DUMBTIMR */
int nm_retry; /* Max retries */
- int nm_srtt[4]; /* Timers for rpcs */
- int nm_sdrtt[4];
+ int nm_srtt[NFS_MAX_TIMER], /* RTT Timers for rpcs */
+ nm_sdrtt[NFS_MAX_TIMER];
int nm_sent; /* Request send count */
int nm_cwnd; /* Request send window */
int nm_timeouts; /* Request timeouts */
OpenPOWER on IntegriCloud