From 4326beb059fb2e0786289b19b1bf9a8b7a2b824b Mon Sep 17 00:00:00 2001 From: glebius Date: Sun, 5 Feb 2012 16:53:02 +0000 Subject: Add new socket options: TCP_KEEPINIT, TCP_KEEPIDLE, TCP_KEEPINTVL and TCP_KEEPCNT, that allow to control initial timeout, idle time, idle re-send interval and idle send count on a per-socket basis. Reviewed by: andre, bz, lstewart --- share/man/man4/tcp.4 | 71 +++++++++++++++++++++++++++++++++++++++++++--- sys/netinet/tcp.h | 4 +++ sys/netinet/tcp_input.c | 16 +++++------ sys/netinet/tcp_syncache.c | 10 ++++++- sys/netinet/tcp_timer.c | 21 +++++++------- sys/netinet/tcp_timer.h | 8 +++++- sys/netinet/tcp_usrreq.c | 61 +++++++++++++++++++++++++++++++++++++-- sys/netinet/tcp_var.h | 7 ++++- sys/sys/param.h | 2 +- 9 files changed, 171 insertions(+), 29 deletions(-) diff --git a/share/man/man4/tcp.4 b/share/man/man4/tcp.4 index e2d4ad3..34c2a28 100644 --- a/share/man/man4/tcp.4 +++ b/share/man/man4/tcp.4 @@ -38,7 +38,7 @@ .\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93 .\" $FreeBSD$ .\" -.Dd November 14, 2011 +.Dd February 5, 2012 .Dt TCP 4 .Os .Sh NAME @@ -146,6 +146,65 @@ connection. See .Xr mod_cc 4 for details. +.It Dv TCP_KEEPINIT +This write-only +.Xr setsockopt 2 +option accepts a per-socket timeout argument of +.Vt "u_int" +in seconds, for new, non-established +.Tn TCP +connections. +For the global default in milliseconds see +.Va keepinit +in the +.Sx MIB Variables +section further down. +.It Dv TCP_KEEPIDLE +This write-only +.Xr setsockopt 2 +option accepts an argument of +.Vt "u_int" +for the amount of time, in seconds, that the connection must be idle +before keepalive probes (if enabled) are sent for the connection of this +socket. +If set on a listening socket, the value is inherited by the newly created +socket upon +.Xr accept 2 . +For the global default in milliseconds see +.Va keepidle +in the +.Sx MIB Variables +section further down. +.It Dv TCP_KEEPINTVL +This write-only +.Xr setsockopt 2 +option accepts an argument of +.Vt "u_int" +to set the per-socket interval, in seconds, between keepalive probes sent +to a peer. +If set on a listening socket, the value is inherited by the newly created +socket upon +.Xr accept 2 . +For the global default in milliseconds see +.Va keepintvl +in the +.Sx MIB Variables +section further down. +.It Dv TCP_KEEPCNT +This write-only +.Xr setsockopt 2 +option accepts an argument of +.Vt "u_int" +and allows a per-socket tuning of the number of probes sent, with no response, +before the connection will be dropped. +If set on a listening socket, the value is inherited by the newly created +socket upon +.Xr accept 2 . +For the global default see the +.Va keepcnt +in the +.Sx MIB Variables +section further down. .It Dv TCP_NODELAY Under most circumstances, .Tn TCP @@ -296,17 +355,21 @@ The Maximum Segment Lifetime, in milliseconds, for a packet. Timeout, in milliseconds, for new, non-established .Tn TCP connections. +The default is 75000 msec. .It Va keepidle Amount of time, in milliseconds, that the connection must be idle before keepalive probes (if enabled) are sent. +The default is 7200000 msec (2 hours). .It Va keepintvl The interval, in milliseconds, between keepalive probes sent to remote machines, when no response is received on a .Va keepidle probe. -After -.Dv TCPTV_KEEPCNT -(default 8) probes are sent, with no response, the connection is dropped. +The default is 75000 msec. +.It Va keepcnt +Number of probes sent, with no response, before a connection +is dropped. +The default is 8 packets. .It Va always_keepalive Assume that .Dv SO_KEEPALIVE diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h index 805a561..c714360 100644 --- a/sys/netinet/tcp.h +++ b/sys/netinet/tcp.h @@ -159,6 +159,10 @@ struct tcphdr { #define TCP_MD5SIG 0x10 /* use MD5 digests (RFC2385) */ #define TCP_INFO 0x20 /* retrieve tcp_info structure */ #define TCP_CONGESTION 0x40 /* get/set congestion control algorithm */ +#define TCP_KEEPINIT 0x80 /* N, time to establish connection */ +#define TCP_KEEPIDLE 0x100 /* L,N,X start keeplives after this period */ +#define TCP_KEEPINTVL 0x200 /* L,N interval between keepalives */ +#define TCP_KEEPCNT 0x400 /* L,N number of keepalives before close */ #define TCP_CA_NAME_MAX 16 /* max congestion control name length */ diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index db373cd..0fadf87 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -1446,7 +1446,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, */ tp->t_rcvtime = ticks; if (TCPS_HAVEESTABLISHED(tp->t_state)) - tcp_timer_activate(tp, TT_KEEP, tcp_keepidle); + tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp)); /* * Unscale the window into a 32-bit value. @@ -1889,7 +1889,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, } else { tp->t_state = TCPS_ESTABLISHED; cc_conn_init(tp); - tcp_timer_activate(tp, TT_KEEP, tcp_keepidle); + tcp_timer_activate(tp, TT_KEEP, + TP_KEEPIDLE(tp)); } } else { /* @@ -2293,7 +2294,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, } else { tp->t_state = TCPS_ESTABLISHED; cc_conn_init(tp); - tcp_timer_activate(tp, TT_KEEP, tcp_keepidle); + tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp)); } /* * If segment contains data or ACK, will call tcp_reass() @@ -2630,12 +2631,11 @@ process_ACK: * compressed state. */ if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { - int timeout; - soisdisconnected(so); - timeout = (tcp_fast_finwait2_recycle) ? - tcp_finwait2_timeout : tcp_maxidle; - tcp_timer_activate(tp, TT_2MSL, timeout); + tcp_timer_activate(tp, TT_2MSL, + (tcp_fast_finwait2_recycle ? + tcp_finwait2_timeout : + TP_MAXIDLE(tp))); } tp->t_state = TCPS_FIN_WAIT_2; } diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index 2e9c37c..baa0f92 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -845,7 +845,15 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) */ if (sc->sc_rxmits > 1) tp->snd_cwnd = tp->t_maxseg; - tcp_timer_activate(tp, TT_KEEP, tcp_keepinit); + + /* + * Copy and activate timers. + */ + tp->t_keepinit = sototcpcb(lso)->t_keepinit; + tp->t_keepidle = sototcpcb(lso)->t_keepidle; + tp->t_keepintvl = sototcpcb(lso)->t_keepintvl; + tp->t_keepcnt = sototcpcb(lso)->t_keepcnt; + tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)); INP_WUNLOCK(inp); diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c index 1651d69..9c3c749 100644 --- a/sys/netinet/tcp_timer.c +++ b/sys/netinet/tcp_timer.c @@ -111,12 +111,12 @@ int tcp_finwait2_timeout; SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW, &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout"); +int tcp_keepcnt = TCPTV_KEEPCNT; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0, + "Number of keepalive probes to send"); -static int tcp_keepcnt = TCPTV_KEEPCNT; /* max idle probes */ int tcp_maxpersistidle; - /* max idle time in persist */ -int tcp_maxidle; static int per_cpu_timers = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW, @@ -138,7 +138,6 @@ tcp_slowtimo(void) VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); - tcp_maxidle = tcp_keepcnt * tcp_keepintvl; INP_INFO_WLOCK(&V_tcbinfo); (void) tcp_tw_2msl_scan(0); INP_INFO_WUNLOCK(&V_tcbinfo); @@ -255,9 +254,9 @@ tcp_timer_2msl(void *xtp) tp = tcp_close(tp); } else { if (tp->t_state != TCPS_TIME_WAIT && - ticks - tp->t_rcvtime <= tcp_maxidle) - callout_reset_on(&tp->t_timers->tt_2msl, tcp_keepintvl, - tcp_timer_2msl, tp, INP_CPU(inp)); + ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) + callout_reset_on(&tp->t_timers->tt_2msl, + TP_KEEPINTVL(tp), tcp_timer_2msl, tp, INP_CPU(inp)); else tp = tcp_close(tp); } @@ -318,7 +317,7 @@ tcp_timer_keep(void *xtp) goto dropit; if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) && tp->t_state <= TCPS_CLOSING) { - if (ticks - tp->t_rcvtime >= tcp_keepidle + tcp_maxidle) + if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp)) goto dropit; /* * Send a packet designed to force a response @@ -340,9 +339,11 @@ tcp_timer_keep(void *xtp) tp->rcv_nxt, tp->snd_una - 1, 0); free(t_template, M_TEMP); } - callout_reset_on(&tp->t_timers->tt_keep, tcp_keepintvl, tcp_timer_keep, tp, INP_CPU(inp)); + callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp), + tcp_timer_keep, tp, INP_CPU(inp)); } else - callout_reset_on(&tp->t_timers->tt_keep, tcp_keepidle, tcp_timer_keep, tp, INP_CPU(inp)); + callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp), + tcp_timer_keep, tp, INP_CPU(inp)); #ifdef TCPDEBUG if (inp->inp_socket->so_options & SO_DEBUG) diff --git a/sys/netinet/tcp_timer.h b/sys/netinet/tcp_timer.h index 4bfcdf6..2fc8f7b 100644 --- a/sys/netinet/tcp_timer.h +++ b/sys/netinet/tcp_timer.h @@ -153,10 +153,16 @@ struct tcp_timer { #define TT_KEEP 0x08 #define TT_2MSL 0x10 +#define TP_KEEPINIT(tp) ((tp)->t_keepinit ? (tp)->t_keepinit : tcp_keepinit) +#define TP_KEEPIDLE(tp) ((tp)->t_keepidle ? (tp)->t_keepidle : tcp_keepidle) +#define TP_KEEPINTVL(tp) ((tp)->t_keepintvl ? (tp)->t_keepintvl : tcp_keepintvl) +#define TP_KEEPCNT(tp) ((tp)->t_keepcnt ? (tp)->t_keepcnt : tcp_keepcnt) +#define TP_MAXIDLE(tp) (TP_KEEPCNT(tp) * TP_KEEPINTVL(tp)) + extern int tcp_keepinit; /* time to establish connection */ extern int tcp_keepidle; /* time before keepalive probes begin */ extern int tcp_keepintvl; /* time between keepalive probes */ -extern int tcp_maxidle; /* time to drop after starting probes */ +extern int tcp_keepcnt; /* number of keepalives */ extern int tcp_delacktime; /* time before sending a delayed ACK */ extern int tcp_maxpersistidle; extern int tcp_rexmit_min; diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index 5e2af8f..a9045f3 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -1118,7 +1119,7 @@ tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td) soisconnecting(so); TCPSTAT_INC(tcps_connattempt); tp->t_state = TCPS_SYN_SENT; - tcp_timer_activate(tp, TT_KEEP, tcp_keepinit); + tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)); tp->iss = tcp_new_isn(tp); tcp_sendseqinit(tp); @@ -1191,7 +1192,7 @@ tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td) soisconnecting(so); TCPSTAT_INC(tcps_connattempt); tp->t_state = TCPS_SYN_SENT; - tcp_timer_activate(tp, TT_KEEP, tcp_keepinit); + tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)); tp->iss = tcp_new_isn(tp); tcp_sendseqinit(tp); @@ -1272,6 +1273,7 @@ int tcp_ctloutput(struct socket *so, struct sockopt *sopt) { int error, opt, optval; + u_int ui; struct inpcb *inp; struct tcpcb *tp; struct tcp_info ti; @@ -1439,6 +1441,59 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt) INP_WUNLOCK(inp); break; + case TCP_KEEPIDLE: + case TCP_KEEPINTVL: + case TCP_KEEPCNT: + case TCP_KEEPINIT: + INP_WUNLOCK(inp); + error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui)); + if (error) + return (error); + + if (ui > (UINT_MAX / hz)) { + error = EINVAL; + break; + } + ui *= hz; + + INP_WLOCK_RECHECK(inp); + switch (sopt->sopt_name) { + case TCP_KEEPIDLE: + tp->t_keepidle = ui; + /* + * XXX: better check current remaining + * timeout and "merge" it with new value. + */ + if ((tp->t_state > TCPS_LISTEN) && + (tp->t_state <= TCPS_CLOSING)) + tcp_timer_activate(tp, TT_KEEP, + TP_KEEPIDLE(tp)); + break; + case TCP_KEEPINTVL: + tp->t_keepintvl = ui; + if ((tp->t_state == TCPS_FIN_WAIT_2) && + (TP_MAXIDLE(tp) > 0)) + tcp_timer_activate(tp, TT_2MSL, + TP_MAXIDLE(tp)); + break; + case TCP_KEEPCNT: + tp->t_keepcnt = ui; + if ((tp->t_state == TCPS_FIN_WAIT_2) && + (TP_MAXIDLE(tp) > 0)) + tcp_timer_activate(tp, TT_2MSL, + TP_MAXIDLE(tp)); + break; + case TCP_KEEPINIT: + tp->t_keepinit = ui; + if (tp->t_state == TCPS_SYN_RECEIVED || + tp->t_state == TCPS_SYN_SENT) + tcp_timer_activate(tp, TT_KEEP, + TP_KEEPINIT(tp)); + break; + } + INP_WUNLOCK(inp); + break; + default: INP_WUNLOCK(inp); error = ENOPROTOOPT; @@ -1636,7 +1691,7 @@ tcp_usrclosed(struct tcpcb *tp) int timeout; timeout = (tcp_fast_finwait2_recycle) ? - tcp_finwait2_timeout : tcp_maxidle; + tcp_finwait2_timeout : TP_MAXIDLE(tp); tcp_timer_activate(tp, TT_2MSL, timeout); } } diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index bc64ac2..b602c27 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -203,7 +203,12 @@ struct tcpcb { struct cc_var *ccv; /* congestion control specific vars */ struct osd *osd; /* storage for Khelp module data */ - uint32_t t_ispare[12]; /* 4 keep timers, 5 UTO, 3 TBD */ + u_int t_keepinit; /* time to establish connection */ + u_int t_keepidle; /* time before keepalive probes begin */ + u_int t_keepintvl; /* interval between keepalives */ + u_int t_keepcnt; /* number of keepalives before close */ + + uint32_t t_ispare[8]; /* 5 UTO, 3 TBD */ void *t_pspare2[4]; /* 4 TBD */ uint64_t _pad[6]; /* 6 TBD (1-2 CC/RTT?) */ }; diff --git a/sys/sys/param.h b/sys/sys/param.h index 35bca55..11421a5 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -58,7 +58,7 @@ * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 1000006 /* Master, propagated to newvers */ +#define __FreeBSD_version 1000007 /* Master, propagated to newvers */ /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, -- cgit v1.1