summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorglebius <glebius@FreeBSD.org>2012-02-05 16:53:02 +0000
committerglebius <glebius@FreeBSD.org>2012-02-05 16:53:02 +0000
commit4326beb059fb2e0786289b19b1bf9a8b7a2b824b (patch)
treedcb36bc519c1f170cc8c8a32ba0dd1c8c7acfc78
parent96baefc0cb3bd4bac8a5c692c335abaf29650d71 (diff)
downloadFreeBSD-src-4326beb059fb2e0786289b19b1bf9a8b7a2b824b.zip
FreeBSD-src-4326beb059fb2e0786289b19b1bf9a8b7a2b824b.tar.gz
Add new socket options: TCP_KEEPINIT, TCP_KEEPIDLE, TCP_KEEPINTVL and
TCP_KEEPCNT, that allow to control initial timeout, idle time, idle re-send interval and idle send count on a per-socket basis. Reviewed by: andre, bz, lstewart
-rw-r--r--share/man/man4/tcp.471
-rw-r--r--sys/netinet/tcp.h4
-rw-r--r--sys/netinet/tcp_input.c16
-rw-r--r--sys/netinet/tcp_syncache.c10
-rw-r--r--sys/netinet/tcp_timer.c21
-rw-r--r--sys/netinet/tcp_timer.h8
-rw-r--r--sys/netinet/tcp_usrreq.c61
-rw-r--r--sys/netinet/tcp_var.h7
-rw-r--r--sys/sys/param.h2
9 files changed, 171 insertions, 29 deletions
diff --git a/share/man/man4/tcp.4 b/share/man/man4/tcp.4
index e2d4ad3..34c2a28 100644
--- a/share/man/man4/tcp.4
+++ b/share/man/man4/tcp.4
@@ -38,7 +38,7 @@
.\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93
.\" $FreeBSD$
.\"
-.Dd November 14, 2011
+.Dd February 5, 2012
.Dt TCP 4
.Os
.Sh NAME
@@ -146,6 +146,65 @@ connection.
See
.Xr mod_cc 4
for details.
+.It Dv TCP_KEEPINIT
+This write-only
+.Xr setsockopt 2
+option accepts a per-socket timeout argument of
+.Vt "u_int"
+in seconds, for new, non-established
+.Tn TCP
+connections.
+For the global default in milliseconds see
+.Va keepinit
+in the
+.Sx MIB Variables
+section further down.
+.It Dv TCP_KEEPIDLE
+This write-only
+.Xr setsockopt 2
+option accepts an argument of
+.Vt "u_int"
+for the amount of time, in seconds, that the connection must be idle
+before keepalive probes (if enabled) are sent for the connection of this
+socket.
+If set on a listening socket, the value is inherited by the newly created
+socket upon
+.Xr accept 2 .
+For the global default in milliseconds see
+.Va keepidle
+in the
+.Sx MIB Variables
+section further down.
+.It Dv TCP_KEEPINTVL
+This write-only
+.Xr setsockopt 2
+option accepts an argument of
+.Vt "u_int"
+to set the per-socket interval, in seconds, between keepalive probes sent
+to a peer.
+If set on a listening socket, the value is inherited by the newly created
+socket upon
+.Xr accept 2 .
+For the global default in milliseconds see
+.Va keepintvl
+in the
+.Sx MIB Variables
+section further down.
+.It Dv TCP_KEEPCNT
+This write-only
+.Xr setsockopt 2
+option accepts an argument of
+.Vt "u_int"
+and allows a per-socket tuning of the number of probes sent, with no response,
+before the connection will be dropped.
+If set on a listening socket, the value is inherited by the newly created
+socket upon
+.Xr accept 2 .
+For the global default see the
+.Va keepcnt
+in the
+.Sx MIB Variables
+section further down.
.It Dv TCP_NODELAY
Under most circumstances,
.Tn TCP
@@ -296,17 +355,21 @@ The Maximum Segment Lifetime, in milliseconds, for a packet.
Timeout, in milliseconds, for new, non-established
.Tn TCP
connections.
+The default is 75000 msec.
.It Va keepidle
Amount of time, in milliseconds, that the connection must be idle
before keepalive probes (if enabled) are sent.
+The default is 7200000 msec (2 hours).
.It Va keepintvl
The interval, in milliseconds, between keepalive probes sent to remote
machines, when no response is received on a
.Va keepidle
probe.
-After
-.Dv TCPTV_KEEPCNT
-(default 8) probes are sent, with no response, the connection is dropped.
+The default is 75000 msec.
+.It Va keepcnt
+Number of probes sent, with no response, before a connection
+is dropped.
+The default is 8 packets.
.It Va always_keepalive
Assume that
.Dv SO_KEEPALIVE
diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h
index 805a561..c714360 100644
--- a/sys/netinet/tcp.h
+++ b/sys/netinet/tcp.h
@@ -159,6 +159,10 @@ struct tcphdr {
#define TCP_MD5SIG 0x10 /* use MD5 digests (RFC2385) */
#define TCP_INFO 0x20 /* retrieve tcp_info structure */
#define TCP_CONGESTION 0x40 /* get/set congestion control algorithm */
+#define TCP_KEEPINIT 0x80 /* N, time to establish connection */
+#define TCP_KEEPIDLE 0x100 /* L,N,X start keeplives after this period */
+#define TCP_KEEPINTVL 0x200 /* L,N interval between keepalives */
+#define TCP_KEEPCNT 0x400 /* L,N number of keepalives before close */
#define TCP_CA_NAME_MAX 16 /* max congestion control name length */
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index db373cd..0fadf87 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -1446,7 +1446,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
*/
tp->t_rcvtime = ticks;
if (TCPS_HAVEESTABLISHED(tp->t_state))
- tcp_timer_activate(tp, TT_KEEP, tcp_keepidle);
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
/*
* Unscale the window into a 32-bit value.
@@ -1889,7 +1889,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
} else {
tp->t_state = TCPS_ESTABLISHED;
cc_conn_init(tp);
- tcp_timer_activate(tp, TT_KEEP, tcp_keepidle);
+ tcp_timer_activate(tp, TT_KEEP,
+ TP_KEEPIDLE(tp));
}
} else {
/*
@@ -2293,7 +2294,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
} else {
tp->t_state = TCPS_ESTABLISHED;
cc_conn_init(tp);
- tcp_timer_activate(tp, TT_KEEP, tcp_keepidle);
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
}
/*
* If segment contains data or ACK, will call tcp_reass()
@@ -2630,12 +2631,11 @@ process_ACK:
* compressed state.
*/
if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
- int timeout;
-
soisdisconnected(so);
- timeout = (tcp_fast_finwait2_recycle) ?
- tcp_finwait2_timeout : tcp_maxidle;
- tcp_timer_activate(tp, TT_2MSL, timeout);
+ tcp_timer_activate(tp, TT_2MSL,
+ (tcp_fast_finwait2_recycle ?
+ tcp_finwait2_timeout :
+ TP_MAXIDLE(tp)));
}
tp->t_state = TCPS_FIN_WAIT_2;
}
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index 2e9c37c..baa0f92 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -845,7 +845,15 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
*/
if (sc->sc_rxmits > 1)
tp->snd_cwnd = tp->t_maxseg;
- tcp_timer_activate(tp, TT_KEEP, tcp_keepinit);
+
+ /*
+ * Copy and activate timers.
+ */
+ tp->t_keepinit = sototcpcb(lso)->t_keepinit;
+ tp->t_keepidle = sototcpcb(lso)->t_keepidle;
+ tp->t_keepintvl = sototcpcb(lso)->t_keepintvl;
+ tp->t_keepcnt = sototcpcb(lso)->t_keepcnt;
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
INP_WUNLOCK(inp);
diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c
index 1651d69..9c3c749 100644
--- a/sys/netinet/tcp_timer.c
+++ b/sys/netinet/tcp_timer.c
@@ -111,12 +111,12 @@ int tcp_finwait2_timeout;
SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW,
&tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout");
+int tcp_keepcnt = TCPTV_KEEPCNT;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0,
+ "Number of keepalive probes to send");
-static int tcp_keepcnt = TCPTV_KEEPCNT;
/* max idle probes */
int tcp_maxpersistidle;
- /* max idle time in persist */
-int tcp_maxidle;
static int per_cpu_timers = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
@@ -138,7 +138,6 @@ tcp_slowtimo(void)
VNET_LIST_RLOCK_NOSLEEP();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
- tcp_maxidle = tcp_keepcnt * tcp_keepintvl;
INP_INFO_WLOCK(&V_tcbinfo);
(void) tcp_tw_2msl_scan(0);
INP_INFO_WUNLOCK(&V_tcbinfo);
@@ -255,9 +254,9 @@ tcp_timer_2msl(void *xtp)
tp = tcp_close(tp);
} else {
if (tp->t_state != TCPS_TIME_WAIT &&
- ticks - tp->t_rcvtime <= tcp_maxidle)
- callout_reset_on(&tp->t_timers->tt_2msl, tcp_keepintvl,
- tcp_timer_2msl, tp, INP_CPU(inp));
+ ticks - tp->t_rcvtime <= TP_MAXIDLE(tp))
+ callout_reset_on(&tp->t_timers->tt_2msl,
+ TP_KEEPINTVL(tp), tcp_timer_2msl, tp, INP_CPU(inp));
else
tp = tcp_close(tp);
}
@@ -318,7 +317,7 @@ tcp_timer_keep(void *xtp)
goto dropit;
if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
tp->t_state <= TCPS_CLOSING) {
- if (ticks - tp->t_rcvtime >= tcp_keepidle + tcp_maxidle)
+ if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
goto dropit;
/*
* Send a packet designed to force a response
@@ -340,9 +339,11 @@ tcp_timer_keep(void *xtp)
tp->rcv_nxt, tp->snd_una - 1, 0);
free(t_template, M_TEMP);
}
- callout_reset_on(&tp->t_timers->tt_keep, tcp_keepintvl, tcp_timer_keep, tp, INP_CPU(inp));
+ callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp),
+ tcp_timer_keep, tp, INP_CPU(inp));
} else
- callout_reset_on(&tp->t_timers->tt_keep, tcp_keepidle, tcp_timer_keep, tp, INP_CPU(inp));
+ callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp),
+ tcp_timer_keep, tp, INP_CPU(inp));
#ifdef TCPDEBUG
if (inp->inp_socket->so_options & SO_DEBUG)
diff --git a/sys/netinet/tcp_timer.h b/sys/netinet/tcp_timer.h
index 4bfcdf6..2fc8f7b 100644
--- a/sys/netinet/tcp_timer.h
+++ b/sys/netinet/tcp_timer.h
@@ -153,10 +153,16 @@ struct tcp_timer {
#define TT_KEEP 0x08
#define TT_2MSL 0x10
+#define TP_KEEPINIT(tp) ((tp)->t_keepinit ? (tp)->t_keepinit : tcp_keepinit)
+#define TP_KEEPIDLE(tp) ((tp)->t_keepidle ? (tp)->t_keepidle : tcp_keepidle)
+#define TP_KEEPINTVL(tp) ((tp)->t_keepintvl ? (tp)->t_keepintvl : tcp_keepintvl)
+#define TP_KEEPCNT(tp) ((tp)->t_keepcnt ? (tp)->t_keepcnt : tcp_keepcnt)
+#define TP_MAXIDLE(tp) (TP_KEEPCNT(tp) * TP_KEEPINTVL(tp))
+
extern int tcp_keepinit; /* time to establish connection */
extern int tcp_keepidle; /* time before keepalive probes begin */
extern int tcp_keepintvl; /* time between keepalive probes */
-extern int tcp_maxidle; /* time to drop after starting probes */
+extern int tcp_keepcnt; /* number of keepalives */
extern int tcp_delacktime; /* time before sending a delayed ACK */
extern int tcp_maxpersistidle;
extern int tcp_rexmit_min;
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index 5e2af8f..a9045f3 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/limits.h>
#include <sys/malloc.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
@@ -1118,7 +1119,7 @@ tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
soisconnecting(so);
TCPSTAT_INC(tcps_connattempt);
tp->t_state = TCPS_SYN_SENT;
- tcp_timer_activate(tp, TT_KEEP, tcp_keepinit);
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
tp->iss = tcp_new_isn(tp);
tcp_sendseqinit(tp);
@@ -1191,7 +1192,7 @@ tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
soisconnecting(so);
TCPSTAT_INC(tcps_connattempt);
tp->t_state = TCPS_SYN_SENT;
- tcp_timer_activate(tp, TT_KEEP, tcp_keepinit);
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
tp->iss = tcp_new_isn(tp);
tcp_sendseqinit(tp);
@@ -1272,6 +1273,7 @@ int
tcp_ctloutput(struct socket *so, struct sockopt *sopt)
{
int error, opt, optval;
+ u_int ui;
struct inpcb *inp;
struct tcpcb *tp;
struct tcp_info ti;
@@ -1439,6 +1441,59 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
INP_WUNLOCK(inp);
break;
+ case TCP_KEEPIDLE:
+ case TCP_KEEPINTVL:
+ case TCP_KEEPCNT:
+ case TCP_KEEPINIT:
+ INP_WUNLOCK(inp);
+ error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
+ if (error)
+ return (error);
+
+ if (ui > (UINT_MAX / hz)) {
+ error = EINVAL;
+ break;
+ }
+ ui *= hz;
+
+ INP_WLOCK_RECHECK(inp);
+ switch (sopt->sopt_name) {
+ case TCP_KEEPIDLE:
+ tp->t_keepidle = ui;
+ /*
+ * XXX: better check current remaining
+ * timeout and "merge" it with new value.
+ */
+ if ((tp->t_state > TCPS_LISTEN) &&
+ (tp->t_state <= TCPS_CLOSING))
+ tcp_timer_activate(tp, TT_KEEP,
+ TP_KEEPIDLE(tp));
+ break;
+ case TCP_KEEPINTVL:
+ tp->t_keepintvl = ui;
+ if ((tp->t_state == TCPS_FIN_WAIT_2) &&
+ (TP_MAXIDLE(tp) > 0))
+ tcp_timer_activate(tp, TT_2MSL,
+ TP_MAXIDLE(tp));
+ break;
+ case TCP_KEEPCNT:
+ tp->t_keepcnt = ui;
+ if ((tp->t_state == TCPS_FIN_WAIT_2) &&
+ (TP_MAXIDLE(tp) > 0))
+ tcp_timer_activate(tp, TT_2MSL,
+ TP_MAXIDLE(tp));
+ break;
+ case TCP_KEEPINIT:
+ tp->t_keepinit = ui;
+ if (tp->t_state == TCPS_SYN_RECEIVED ||
+ tp->t_state == TCPS_SYN_SENT)
+ tcp_timer_activate(tp, TT_KEEP,
+ TP_KEEPINIT(tp));
+ break;
+ }
+ INP_WUNLOCK(inp);
+ break;
+
default:
INP_WUNLOCK(inp);
error = ENOPROTOOPT;
@@ -1636,7 +1691,7 @@ tcp_usrclosed(struct tcpcb *tp)
int timeout;
timeout = (tcp_fast_finwait2_recycle) ?
- tcp_finwait2_timeout : tcp_maxidle;
+ tcp_finwait2_timeout : TP_MAXIDLE(tp);
tcp_timer_activate(tp, TT_2MSL, timeout);
}
}
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index bc64ac2..b602c27 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -203,7 +203,12 @@ struct tcpcb {
struct cc_var *ccv; /* congestion control specific vars */
struct osd *osd; /* storage for Khelp module data */
- uint32_t t_ispare[12]; /* 4 keep timers, 5 UTO, 3 TBD */
+ u_int t_keepinit; /* time to establish connection */
+ u_int t_keepidle; /* time before keepalive probes begin */
+ u_int t_keepintvl; /* interval between keepalives */
+ u_int t_keepcnt; /* number of keepalives before close */
+
+ uint32_t t_ispare[8]; /* 5 UTO, 3 TBD */
void *t_pspare2[4]; /* 4 TBD */
uint64_t _pad[6]; /* 6 TBD (1-2 CC/RTT?) */
};
diff --git a/sys/sys/param.h b/sys/sys/param.h
index 35bca55..11421a5 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -58,7 +58,7 @@
* in the range 5 to 9.
*/
#undef __FreeBSD_version
-#define __FreeBSD_version 1000006 /* Master, propagated to newvers */
+#define __FreeBSD_version 1000007 /* Master, propagated to newvers */
/*
* __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
OpenPOWER on IntegriCloud