diff options
Diffstat (limited to 'sys/netinet')
-rw-r--r-- | sys/netinet/tcp_input.c | 86 | ||||
-rw-r--r-- | sys/netinet/tcp_reass.c | 86 | ||||
-rw-r--r-- | sys/netinet/tcp_subr.c | 9 | ||||
-rw-r--r-- | sys/netinet/tcp_timewait.c | 9 | ||||
-rw-r--r-- | sys/netinet/tcp_var.h | 9 |
5 files changed, 174 insertions, 25 deletions
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 39d2dc1..151e083 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -58,6 +58,8 @@ #include <machine/cpu.h> /* before tcp_seq.h, for tcp_random18() */ +#include <vm/uma.h> + #include <net/if.h> #include <net/route.h> @@ -98,8 +100,6 @@ #include <machine/in_cksum.h> -MALLOC_DEFINE(M_TSEGQ, "tseg_qent", "TCP segment queue entry"); - static const int tcprexmtthresh = 3; tcp_cc tcp_ccgen; @@ -135,6 +135,29 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3390, CTLFLAG_RW, &tcp_do_rfc3390, 0, "Enable RFC 3390 (Increasing TCP's Initial Congestion Window)"); +SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0, + "TCP Segment Reassembly Queue"); + +static int tcp_reass_maxseg = 0; +SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN, + &tcp_reass_maxseg, 0, + "Global maximum number of TCP Segments in Reassembly Queue"); + +int tcp_reass_qsize = 0; +SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, cursegments, CTLFLAG_RD, + &tcp_reass_qsize, 0, + "Global number of TCP Segments currently in Reassembly Queue"); + +static int tcp_reass_maxqlen = 48; +SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxqlen, CTLFLAG_RW, + &tcp_reass_maxqlen, 0, + "Maximum number of TCP Segments per individual Reassembly Queue"); + +static int tcp_reass_overflows = 0; +SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD, + &tcp_reass_overflows, 0, + "Global number of TCP Segment Reassembly Queue Overflows"); + struct inpcbhead tcb; #define tcb6 tcb /* for KAME src sync over BSD*'s */ struct inpcbinfo tcbinfo; @@ -175,6 +198,19 @@ do { \ (tp->t_flags & TF_RXWIN0SENT) == 0) && \ (tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN))) +/* Initialize TCP reassembly queue */ +uma_zone_t tcp_reass_zone; +void +tcp_reass_init() +{ + tcp_reass_maxseg = nmbclusters / 16; + TUNABLE_INT_FETCH("net.inet.tcp.reass.maxsegments", + &tcp_reass_maxseg); + tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct tseg_qent), + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); + uma_zone_set_max(tcp_reass_zone, tcp_reass_maxseg); +} + static int tcp_reass(tp, th, tlenp, m) register struct tcpcb *tp; @@ -185,25 +221,51 @@ tcp_reass(tp, th, tlenp, m) struct tseg_qent *q; struct tseg_qent *p = NULL; struct tseg_qent *nq; - struct tseg_qent *te; + struct tseg_qent *te = NULL; struct socket *so = tp->t_inpcb->inp_socket; int flags; /* + * XXX: tcp_reass() is rather inefficient with its data structures + * and should be rewritten (see NetBSD for optimizations). While + * doing that it should move to its own file tcp_reass.c. + */ + + /* * Call with th==0 after become established to * force pre-ESTABLISHED data up to user socket. */ if (th == 0) goto present; - /* Allocate a new queue entry. If we can't, just drop the pkt. XXX */ - MALLOC(te, struct tseg_qent *, sizeof (struct tseg_qent), M_TSEGQ, - M_NOWAIT); + /* + * Limit the number of segments in the reassembly queue to prevent + * holding on to too many segments (and thus running out of mbufs). + * Make sure to let the missing segment through which caused this + * queue. Always keep one global queue entry spare to be able to + * process the missing segment. + */ + if (th->th_seq != tp->rcv_nxt && + (tcp_reass_qsize + 1 >= tcp_reass_maxseg || + tp->t_segqlen >= tcp_reass_maxqlen)) { + tcp_reass_overflows++; + tcpstat.tcps_rcvmemdrop++; + m_freem(m); + return (0); + } + + /* + * Allocate a new queue entry. If we can't, or hit the zone limit + * just drop the pkt. + */ + te = uma_zalloc(tcp_reass_zone, M_NOWAIT); if (te == NULL) { tcpstat.tcps_rcvmemdrop++; m_freem(m); return (0); } + tp->t_segqlen++; + tcp_reass_qsize++; /* * Find a segment which begins after this one does. @@ -228,7 +290,9 @@ tcp_reass(tp, th, tlenp, m) tcpstat.tcps_rcvduppack++; tcpstat.tcps_rcvdupbyte += *tlenp; m_freem(m); - FREE(te, M_TSEGQ); + uma_zfree(tcp_reass_zone, te); + tp->t_segqlen--; + tcp_reass_qsize--; /* * Try to present any queued data * at the left window edge to the user. @@ -263,7 +327,9 @@ tcp_reass(tp, th, tlenp, m) nq = LIST_NEXT(q, tqe_q); LIST_REMOVE(q, tqe_q); m_freem(q->tqe_m); - FREE(q, M_TSEGQ); + uma_zfree(tcp_reass_zone, q); + tp->t_segqlen--; + tcp_reass_qsize--; q = nq; } @@ -297,7 +363,9 @@ present: m_freem(q->tqe_m); else sbappendstream(&so->so_rcv, q->tqe_m); - FREE(q, M_TSEGQ); + uma_zfree(tcp_reass_zone, q); + tp->t_segqlen--; + tcp_reass_qsize--; q = nq; } while (q && q->tqe_th->th_seq == tp->rcv_nxt); ND6_HINT(tp); diff --git a/sys/netinet/tcp_reass.c b/sys/netinet/tcp_reass.c index 39d2dc1..151e083 100644 --- a/sys/netinet/tcp_reass.c +++ b/sys/netinet/tcp_reass.c @@ -58,6 +58,8 @@ #include <machine/cpu.h> /* before tcp_seq.h, for tcp_random18() */ +#include <vm/uma.h> + #include <net/if.h> #include <net/route.h> @@ -98,8 +100,6 @@ #include <machine/in_cksum.h> -MALLOC_DEFINE(M_TSEGQ, "tseg_qent", "TCP segment queue entry"); - static const int tcprexmtthresh = 3; tcp_cc tcp_ccgen; @@ -135,6 +135,29 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3390, CTLFLAG_RW, &tcp_do_rfc3390, 0, "Enable RFC 3390 (Increasing TCP's Initial Congestion Window)"); +SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0, + "TCP Segment Reassembly Queue"); + +static int tcp_reass_maxseg = 0; +SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN, + &tcp_reass_maxseg, 0, + "Global maximum number of TCP Segments in Reassembly Queue"); + +int tcp_reass_qsize = 0; +SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, cursegments, CTLFLAG_RD, + &tcp_reass_qsize, 0, + "Global number of TCP Segments currently in Reassembly Queue"); + +static int tcp_reass_maxqlen = 48; +SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxqlen, CTLFLAG_RW, + &tcp_reass_maxqlen, 0, + "Maximum number of TCP Segments per individual Reassembly Queue"); + +static int tcp_reass_overflows = 0; +SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD, + &tcp_reass_overflows, 0, + "Global number of TCP Segment Reassembly Queue Overflows"); + struct inpcbhead tcb; #define tcb6 tcb /* for KAME src sync over BSD*'s */ struct inpcbinfo tcbinfo; @@ -175,6 +198,19 @@ do { \ (tp->t_flags & TF_RXWIN0SENT) == 0) && \ (tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN))) +/* Initialize TCP reassembly queue */ +uma_zone_t tcp_reass_zone; +void +tcp_reass_init() +{ + tcp_reass_maxseg = nmbclusters / 16; + TUNABLE_INT_FETCH("net.inet.tcp.reass.maxsegments", + &tcp_reass_maxseg); + tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct tseg_qent), + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); + uma_zone_set_max(tcp_reass_zone, tcp_reass_maxseg); +} + static int tcp_reass(tp, th, tlenp, m) register struct tcpcb *tp; @@ -185,25 +221,51 @@ tcp_reass(tp, th, tlenp, m) struct tseg_qent *q; struct tseg_qent *p = NULL; struct tseg_qent *nq; - struct tseg_qent *te; + struct tseg_qent *te = NULL; struct socket *so = tp->t_inpcb->inp_socket; int flags; /* + * XXX: tcp_reass() is rather inefficient with its data structures + * and should be rewritten (see NetBSD for optimizations). While + * doing that it should move to its own file tcp_reass.c. + */ + + /* * Call with th==0 after become established to * force pre-ESTABLISHED data up to user socket. */ if (th == 0) goto present; - /* Allocate a new queue entry. If we can't, just drop the pkt. XXX */ - MALLOC(te, struct tseg_qent *, sizeof (struct tseg_qent), M_TSEGQ, - M_NOWAIT); + /* + * Limit the number of segments in the reassembly queue to prevent + * holding on to too many segments (and thus running out of mbufs). + * Make sure to let the missing segment through which caused this + * queue. Always keep one global queue entry spare to be able to + * process the missing segment. + */ + if (th->th_seq != tp->rcv_nxt && + (tcp_reass_qsize + 1 >= tcp_reass_maxseg || + tp->t_segqlen >= tcp_reass_maxqlen)) { + tcp_reass_overflows++; + tcpstat.tcps_rcvmemdrop++; + m_freem(m); + return (0); + } + + /* + * Allocate a new queue entry. If we can't, or hit the zone limit + * just drop the pkt. + */ + te = uma_zalloc(tcp_reass_zone, M_NOWAIT); if (te == NULL) { tcpstat.tcps_rcvmemdrop++; m_freem(m); return (0); } + tp->t_segqlen++; + tcp_reass_qsize++; /* * Find a segment which begins after this one does. @@ -228,7 +290,9 @@ tcp_reass(tp, th, tlenp, m) tcpstat.tcps_rcvduppack++; tcpstat.tcps_rcvdupbyte += *tlenp; m_freem(m); - FREE(te, M_TSEGQ); + uma_zfree(tcp_reass_zone, te); + tp->t_segqlen--; + tcp_reass_qsize--; /* * Try to present any queued data * at the left window edge to the user. @@ -263,7 +327,9 @@ tcp_reass(tp, th, tlenp, m) nq = LIST_NEXT(q, tqe_q); LIST_REMOVE(q, tqe_q); m_freem(q->tqe_m); - FREE(q, M_TSEGQ); + uma_zfree(tcp_reass_zone, q); + tp->t_segqlen--; + tcp_reass_qsize--; q = nq; } @@ -297,7 +363,9 @@ present: m_freem(q->tqe_m); else sbappendstream(&so->so_rcv, q->tqe_m); - FREE(q, M_TSEGQ); + uma_zfree(tcp_reass_zone, q); + tp->t_segqlen--; + tcp_reass_qsize--; q = nq; } while (q && q->tqe_th->th_seq == tp->rcv_nxt); ND6_HINT(tp); diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 0878744..210a582 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -289,6 +289,7 @@ tcp_init() tcp_timer_init(); syncache_init(); tcp_hc_init(); + tcp_reass_init(); } /* @@ -711,7 +712,9 @@ tcp_discardcb(tp) while ((q = LIST_FIRST(&tp->t_segq)) != NULL) { LIST_REMOVE(q, tqe_q); m_freem(q->tqe_m); - FREE(q, M_TSEGQ); + uma_zfree(tcp_reass_zone, q); + tp->t_segqlen--; + tcp_reass_qsize--; } inp->inp_ppcb = NULL; tp->t_inpcb = NULL; @@ -772,7 +775,9 @@ tcp_drain() != NULL) { LIST_REMOVE(te, tqe_q); m_freem(te->tqe_m); - FREE(te, M_TSEGQ); + uma_zfree(tcp_reass_zone, te); + tcpb->t_segqlen--; + tcp_reass_qsize--; } } INP_UNLOCK(inpb); diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c index 0878744..210a582 100644 --- a/sys/netinet/tcp_timewait.c +++ b/sys/netinet/tcp_timewait.c @@ -289,6 +289,7 @@ tcp_init() tcp_timer_init(); syncache_init(); tcp_hc_init(); + tcp_reass_init(); } /* @@ -711,7 +712,9 @@ tcp_discardcb(tp) while ((q = LIST_FIRST(&tp->t_segq)) != NULL) { LIST_REMOVE(q, tqe_q); m_freem(q->tqe_m); - FREE(q, M_TSEGQ); + uma_zfree(tcp_reass_zone, q); + tp->t_segqlen--; + tcp_reass_qsize--; } inp->inp_ppcb = NULL; tp->t_inpcb = NULL; @@ -772,7 +775,9 @@ tcp_drain() != NULL) { LIST_REMOVE(te, tqe_q); m_freem(te->tqe_m); - FREE(te, M_TSEGQ); + uma_zfree(tcp_reass_zone, te); + tcpb->t_segqlen--; + tcp_reass_qsize--; } } INP_UNLOCK(inpb); diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index e8aa435..07ebd06 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -54,8 +54,9 @@ struct tseg_qent { struct mbuf *tqe_m; /* mbuf contains packet */ }; LIST_HEAD(tsegqe_head, tseg_qent); -#ifdef MALLOC_DECLARE -MALLOC_DECLARE(M_TSEGQ); +extern int tcp_reass_qsize; +#ifdef VM_UMA_H +extern uma_zone_t tcp_reass_zone; #endif struct tcptemp { @@ -70,7 +71,8 @@ struct tcptemp { * Organized for 16 byte cacheline efficiency. */ struct tcpcb { - struct tsegqe_head t_segq; + struct tsegqe_head t_segq; /* segment reassembly queue */ + int t_segqlen; /* segment reassembly queue length */ int t_dupacks; /* consecutive dup acks recd */ struct tcptemp *unused; /* unused */ @@ -519,6 +521,7 @@ struct tcpcb * void tcp_drain(void); void tcp_fasttimo(void); void tcp_init(void); +void tcp_reass_init(void); void tcp_input(struct mbuf *, int); u_long tcp_maxmtu(struct in_conninfo *); u_long tcp_maxmtu6(struct in_conninfo *); |