diff options
author | rwatson <rwatson@FreeBSD.org> | 2004-11-26 18:58:46 +0000 |
---|---|---|
committer | rwatson <rwatson@FreeBSD.org> | 2004-11-26 18:58:46 +0000 |
commit | 0aa3c6f81730f2718d8c61f1d34d33bfe921c3f0 (patch) | |
tree | 4caf2c37378509b28e849b5a6086de1c5d497ede | |
parent | c76a6d58b3f2dc81bf3900ace4a6919a5d6521d4 (diff) | |
download | FreeBSD-src-0aa3c6f81730f2718d8c61f1d34d33bfe921c3f0.zip FreeBSD-src-0aa3c6f81730f2718d8c61f1d34d33bfe921c3f0.tar.gz |
Implement parts of the TCP_INFO socket option as found in Linux 2.6.
This socket option allows processes query a TCP socket for some low
level transmission details, such as the current send, bandwidth, and
congestion windows. Linux provides a 'struct tcpinfo' structure
containing various variables, rather than separate socket options;
this makes the API somewhat fragile as it makes it dificult to add
new entries of interest as requirements and implementation evolve.
As such, I've included a large pad at the end of the structure.
Right now, relatively few of the Linux API fields are filled in, and
some contain no logical equivilent on FreeBSD. I've include __'d
entries in the structure to make it easier to figure ou what is and
isn't omitted. This API/ABI should be considered unstable for the
time being.
-rw-r--r-- | sys/netinet/tcp.h | 66 | ||||
-rw-r--r-- | sys/netinet/tcp_usrreq.c | 56 |
2 files changed, 120 insertions, 2 deletions
diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h index 443b9b2..a312078 100644 --- a/sys/netinet/tcp.h +++ b/sys/netinet/tcp.h @@ -159,6 +159,72 @@ struct tcphdr { #define TCP_NOPUSH 0x04 /* don't push last block of write */ #define TCP_NOOPT 0x08 /* don't use TCP options */ #define TCP_MD5SIG 0x10 /* use MD5 digests (RFC2385) */ +#define TCP_INFO 0x20 /* retrieve tcp_info structure */ + +#define TCPI_OPT_TIMESTAMPS 0x01 +#define TCPI_OPT_SACK 0x02 +#define TCPI_OPT_WSCALE 0x04 +#define TCPI_OPT_ECN 0x08 + +/* + * The TCP_INFO socket option comes from the Linux 2.6 TCP API, and permits + * the caller to query certain information about the state of a TCP + * connection. We provide an overlapping set of fields with the Linux + * implementation, but since this is a fixed size structure, room has been + * left for growth. In order to maximize potential future compatibility with + * the Linux API, the same variable names and order have been adopted, and + * padding left to make room for omitted fields in case they are added later. + * + * XXX: This is currently an unstable ABI/API, in that it is expected to + * change. + */ +struct tcp_info { + u_int8_t tcpi_state; /* TCP FSM state. */ + u_int8_t __tcpi_ca_state; + u_int8_t __tcpi_retransmits; + u_int8_t __tcpi_probes; + u_int8_t __tcpi_backoff; + u_int8_t tcpi_options; /* Options enabled on conn. */ + u_int8_t tcpi_snd_wscale:4, /* RFC1323 send shift value. */ + tcpi_rcv_wscale:4; /* RFC1323 recv shift value. */ + + u_int32_t __tcpi_rto; + u_int32_t __tcpi_ato; + u_int32_t __tcpi_snd_mss; + u_int32_t __tcpi_rcv_mss; + + u_int32_t __tcpi_unacked; + u_int32_t __tcpi_sacked; + u_int32_t __tcpi_lost; + u_int32_t __tcpi_retrans; + u_int32_t __tcpi_fackets; + + /* Times; measurements in usecs. */ + u_int32_t __tcpi_last_data_sent; + u_int32_t __tcpi_last_ack_sent; /* Also unimpl. on Linux? */ + u_int32_t __tcpi_last_data_recv; + u_int32_t __tcpi_last_ack_recv; + + /* Metrics; variable units. */ + u_int32_t __tcpi_pmtu; + u_int32_t __tcpi_rcv_ssthresh; + u_int32_t __tcpi_rtt; + u_int32_t __tcpi_rttvar; + u_int32_t tcpi_snd_ssthresh; /* Slow start threshold. */ + u_int32_t tcpi_snd_cwnd; /* Send congestion window. */ + u_int32_t __tcpi_advmss; + u_int32_t __tcpi_reordering; + + u_int32_t __tcpi_rcv_rtt; + u_int32_t __tcpi_rcv_space; + + /* FreeBSD extensions to tcp_info. */ + u_int32_t tcpi_snd_wnd; /* Advertised send window. */ + u_int32_t tcpi_snd_bwnd; /* Bandwidth send window. */ + + /* Padding to grow without breaking ABI. */ + u_int32_t __tcpi_pad[32]; /* Padding. */ +}; #endif #endif /* !_NETINET_TCP_H_ */ diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index 55c5b3d..b07458b 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -97,6 +97,7 @@ static struct tcpcb * tcp_disconnect(struct tcpcb *); static struct tcpcb * tcp_usrclosed(struct tcpcb *); +static void tcp_fill_info(struct tcpcb *, struct tcp_info *); #ifdef TCPDEBUG #define TCPDEBUG0 int ostate = 0 @@ -942,11 +943,50 @@ tcp6_connect(tp, nam, td) #endif /* INET6 */ /* + * Export TCP internal state information via a struct tcp_info, based on the + * Linux 2.6 API. Not ABI compatible as our constants are mapped differently + * (TCP state machine, etc). We export all information using FreeBSD-native + * constants -- for example, the numeric values for tcpi_state will differ + * from Linux. + */ +static void +tcp_fill_info(tp, ti) + struct tcpcb *tp; + struct tcp_info *ti; +{ + + INP_LOCK_ASSERT(tp->t_inpcb); + bzero(ti, sizeof(*ti)); + + ti->tcpi_state = tp->t_state; + if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP)) + ti->tcpi_options |= TCPI_OPT_TIMESTAMPS; + if (tp->sack_enable) + ti->tcpi_options |= TCPI_OPT_SACK; + if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) { + ti->tcpi_options |= TCPI_OPT_WSCALE; + ti->tcpi_snd_wscale = tp->snd_scale; + ti->tcpi_rcv_wscale = tp->rcv_scale; + } + ti->tcpi_snd_ssthresh = tp->snd_ssthresh; + ti->tcpi_snd_cwnd = tp->snd_cwnd; + + /* + * FreeBSD-specific extension fields for tcp_info. + */ + ti->tcpi_snd_wnd = tp->snd_wnd; + ti->tcpi_snd_bwnd = tp->snd_bwnd; +} + +/* * The new sockopt interface makes it possible for us to block in the * copyin/out step (if we take a page fault). Taking a page fault at * splnet() is probably a Bad Thing. (Since sockets and pcbs both now * use TSM, there probably isn't any need for this function to run at * splnet() any more. This needs more examination.) + * + * XXXRW: The locking here is wrong; we may take a page fault while holding + * the inpcb lock. */ int tcp_ctloutput(so, sopt) @@ -956,6 +996,7 @@ tcp_ctloutput(so, sopt) int error, opt, optval; struct inpcb *inp; struct tcpcb *tp; + struct tcp_info ti; error = 0; INP_INFO_RLOCK(&tcbinfo); @@ -1046,6 +1087,10 @@ tcp_ctloutput(so, sopt) error = EINVAL; break; + case TCP_INFO: + error = EINVAL; + break; + default: error = ENOPROTOOPT; break; @@ -1057,26 +1102,33 @@ tcp_ctloutput(so, sopt) #ifdef TCP_SIGNATURE case TCP_MD5SIG: optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0; + error = sooptcopyout(sopt, &optval, sizeof optval); break; #endif case TCP_NODELAY: optval = tp->t_flags & TF_NODELAY; + error = sooptcopyout(sopt, &optval, sizeof optval); break; case TCP_MAXSEG: optval = tp->t_maxseg; + error = sooptcopyout(sopt, &optval, sizeof optval); break; case TCP_NOOPT: optval = tp->t_flags & TF_NOOPT; + error = sooptcopyout(sopt, &optval, sizeof optval); break; case TCP_NOPUSH: optval = tp->t_flags & TF_NOPUSH; + error = sooptcopyout(sopt, &optval, sizeof optval); + break; + case TCP_INFO: + tcp_fill_info(tp, &ti); + error = sooptcopyout(sopt, &ti, sizeof ti); break; default: error = ENOPROTOOPT; break; } - if (error == 0) - error = sooptcopyout(sopt, &optval, sizeof optval); break; } INP_UNLOCK(inp); |