summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/netinet/tcp_input.c86
-rw-r--r--sys/netinet/tcp_reass.c86
-rw-r--r--sys/netinet/tcp_subr.c9
-rw-r--r--sys/netinet/tcp_timewait.c9
-rw-r--r--sys/netinet/tcp_var.h9
5 files changed, 174 insertions, 25 deletions
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 39d2dc1..151e083 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -58,6 +58,8 @@
#include <machine/cpu.h> /* before tcp_seq.h, for tcp_random18() */
+#include <vm/uma.h>
+
#include <net/if.h>
#include <net/route.h>
@@ -98,8 +100,6 @@
#include <machine/in_cksum.h>
-MALLOC_DEFINE(M_TSEGQ, "tseg_qent", "TCP segment queue entry");
-
static const int tcprexmtthresh = 3;
tcp_cc tcp_ccgen;
@@ -135,6 +135,29 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3390, CTLFLAG_RW,
&tcp_do_rfc3390, 0,
"Enable RFC 3390 (Increasing TCP's Initial Congestion Window)");
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0,
+ "TCP Segment Reassembly Queue");
+
+static int tcp_reass_maxseg = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN,
+ &tcp_reass_maxseg, 0,
+ "Global maximum number of TCP Segments in Reassembly Queue");
+
+int tcp_reass_qsize = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, cursegments, CTLFLAG_RD,
+ &tcp_reass_qsize, 0,
+ "Global number of TCP Segments currently in Reassembly Queue");
+
+static int tcp_reass_maxqlen = 48;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxqlen, CTLFLAG_RW,
+ &tcp_reass_maxqlen, 0,
+ "Maximum number of TCP Segments per individual Reassembly Queue");
+
+static int tcp_reass_overflows = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD,
+ &tcp_reass_overflows, 0,
+ "Global number of TCP Segment Reassembly Queue Overflows");
+
struct inpcbhead tcb;
#define tcb6 tcb /* for KAME src sync over BSD*'s */
struct inpcbinfo tcbinfo;
@@ -175,6 +198,19 @@ do { \
(tp->t_flags & TF_RXWIN0SENT) == 0) && \
(tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN)))
+/* Initialize TCP reassembly queue */
+uma_zone_t tcp_reass_zone;
+void
+tcp_reass_init()
+{
+ tcp_reass_maxseg = nmbclusters / 16;
+ TUNABLE_INT_FETCH("net.inet.tcp.reass.maxsegments",
+ &tcp_reass_maxseg);
+ tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct tseg_qent),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ uma_zone_set_max(tcp_reass_zone, tcp_reass_maxseg);
+}
+
static int
tcp_reass(tp, th, tlenp, m)
register struct tcpcb *tp;
@@ -185,25 +221,51 @@ tcp_reass(tp, th, tlenp, m)
struct tseg_qent *q;
struct tseg_qent *p = NULL;
struct tseg_qent *nq;
- struct tseg_qent *te;
+ struct tseg_qent *te = NULL;
struct socket *so = tp->t_inpcb->inp_socket;
int flags;
/*
+ * XXX: tcp_reass() is rather inefficient with its data structures
+ * and should be rewritten (see NetBSD for optimizations). While
+ * doing that it should move to its own file tcp_reass.c.
+ */
+
+ /*
* Call with th==0 after become established to
* force pre-ESTABLISHED data up to user socket.
*/
if (th == 0)
goto present;
- /* Allocate a new queue entry. If we can't, just drop the pkt. XXX */
- MALLOC(te, struct tseg_qent *, sizeof (struct tseg_qent), M_TSEGQ,
- M_NOWAIT);
+ /*
+ * Limit the number of segments in the reassembly queue to prevent
+ * holding on to too many segments (and thus running out of mbufs).
+ * Make sure to let the missing segment through which caused this
+ * queue. Always keep one global queue entry spare to be able to
+ * process the missing segment.
+ */
+ if (th->th_seq != tp->rcv_nxt &&
+ (tcp_reass_qsize + 1 >= tcp_reass_maxseg ||
+ tp->t_segqlen >= tcp_reass_maxqlen)) {
+ tcp_reass_overflows++;
+ tcpstat.tcps_rcvmemdrop++;
+ m_freem(m);
+ return (0);
+ }
+
+ /*
+ * Allocate a new queue entry. If we can't, or hit the zone limit
+ * just drop the pkt.
+ */
+ te = uma_zalloc(tcp_reass_zone, M_NOWAIT);
if (te == NULL) {
tcpstat.tcps_rcvmemdrop++;
m_freem(m);
return (0);
}
+ tp->t_segqlen++;
+ tcp_reass_qsize++;
/*
* Find a segment which begins after this one does.
@@ -228,7 +290,9 @@ tcp_reass(tp, th, tlenp, m)
tcpstat.tcps_rcvduppack++;
tcpstat.tcps_rcvdupbyte += *tlenp;
m_freem(m);
- FREE(te, M_TSEGQ);
+ uma_zfree(tcp_reass_zone, te);
+ tp->t_segqlen--;
+ tcp_reass_qsize--;
/*
* Try to present any queued data
* at the left window edge to the user.
@@ -263,7 +327,9 @@ tcp_reass(tp, th, tlenp, m)
nq = LIST_NEXT(q, tqe_q);
LIST_REMOVE(q, tqe_q);
m_freem(q->tqe_m);
- FREE(q, M_TSEGQ);
+ uma_zfree(tcp_reass_zone, q);
+ tp->t_segqlen--;
+ tcp_reass_qsize--;
q = nq;
}
@@ -297,7 +363,9 @@ present:
m_freem(q->tqe_m);
else
sbappendstream(&so->so_rcv, q->tqe_m);
- FREE(q, M_TSEGQ);
+ uma_zfree(tcp_reass_zone, q);
+ tp->t_segqlen--;
+ tcp_reass_qsize--;
q = nq;
} while (q && q->tqe_th->th_seq == tp->rcv_nxt);
ND6_HINT(tp);
diff --git a/sys/netinet/tcp_reass.c b/sys/netinet/tcp_reass.c
index 39d2dc1..151e083 100644
--- a/sys/netinet/tcp_reass.c
+++ b/sys/netinet/tcp_reass.c
@@ -58,6 +58,8 @@
#include <machine/cpu.h> /* before tcp_seq.h, for tcp_random18() */
+#include <vm/uma.h>
+
#include <net/if.h>
#include <net/route.h>
@@ -98,8 +100,6 @@
#include <machine/in_cksum.h>
-MALLOC_DEFINE(M_TSEGQ, "tseg_qent", "TCP segment queue entry");
-
static const int tcprexmtthresh = 3;
tcp_cc tcp_ccgen;
@@ -135,6 +135,29 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3390, CTLFLAG_RW,
&tcp_do_rfc3390, 0,
"Enable RFC 3390 (Increasing TCP's Initial Congestion Window)");
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0,
+ "TCP Segment Reassembly Queue");
+
+static int tcp_reass_maxseg = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN,
+ &tcp_reass_maxseg, 0,
+ "Global maximum number of TCP Segments in Reassembly Queue");
+
+int tcp_reass_qsize = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, cursegments, CTLFLAG_RD,
+ &tcp_reass_qsize, 0,
+ "Global number of TCP Segments currently in Reassembly Queue");
+
+static int tcp_reass_maxqlen = 48;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxqlen, CTLFLAG_RW,
+ &tcp_reass_maxqlen, 0,
+ "Maximum number of TCP Segments per individual Reassembly Queue");
+
+static int tcp_reass_overflows = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD,
+ &tcp_reass_overflows, 0,
+ "Global number of TCP Segment Reassembly Queue Overflows");
+
struct inpcbhead tcb;
#define tcb6 tcb /* for KAME src sync over BSD*'s */
struct inpcbinfo tcbinfo;
@@ -175,6 +198,19 @@ do { \
(tp->t_flags & TF_RXWIN0SENT) == 0) && \
(tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN)))
+/* Initialize TCP reassembly queue */
+uma_zone_t tcp_reass_zone;
+void
+tcp_reass_init()
+{
+ tcp_reass_maxseg = nmbclusters / 16;
+ TUNABLE_INT_FETCH("net.inet.tcp.reass.maxsegments",
+ &tcp_reass_maxseg);
+ tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct tseg_qent),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ uma_zone_set_max(tcp_reass_zone, tcp_reass_maxseg);
+}
+
static int
tcp_reass(tp, th, tlenp, m)
register struct tcpcb *tp;
@@ -185,25 +221,51 @@ tcp_reass(tp, th, tlenp, m)
struct tseg_qent *q;
struct tseg_qent *p = NULL;
struct tseg_qent *nq;
- struct tseg_qent *te;
+ struct tseg_qent *te = NULL;
struct socket *so = tp->t_inpcb->inp_socket;
int flags;
/*
+ * XXX: tcp_reass() is rather inefficient with its data structures
+ * and should be rewritten (see NetBSD for optimizations). While
+ * doing that it should move to its own file tcp_reass.c.
+ */
+
+ /*
* Call with th==0 after become established to
* force pre-ESTABLISHED data up to user socket.
*/
if (th == 0)
goto present;
- /* Allocate a new queue entry. If we can't, just drop the pkt. XXX */
- MALLOC(te, struct tseg_qent *, sizeof (struct tseg_qent), M_TSEGQ,
- M_NOWAIT);
+ /*
+ * Limit the number of segments in the reassembly queue to prevent
+ * holding on to too many segments (and thus running out of mbufs).
+ * Make sure to let the missing segment through which caused this
+ * queue. Always keep one global queue entry spare to be able to
+ * process the missing segment.
+ */
+ if (th->th_seq != tp->rcv_nxt &&
+ (tcp_reass_qsize + 1 >= tcp_reass_maxseg ||
+ tp->t_segqlen >= tcp_reass_maxqlen)) {
+ tcp_reass_overflows++;
+ tcpstat.tcps_rcvmemdrop++;
+ m_freem(m);
+ return (0);
+ }
+
+ /*
+ * Allocate a new queue entry. If we can't, or hit the zone limit
+ * just drop the pkt.
+ */
+ te = uma_zalloc(tcp_reass_zone, M_NOWAIT);
if (te == NULL) {
tcpstat.tcps_rcvmemdrop++;
m_freem(m);
return (0);
}
+ tp->t_segqlen++;
+ tcp_reass_qsize++;
/*
* Find a segment which begins after this one does.
@@ -228,7 +290,9 @@ tcp_reass(tp, th, tlenp, m)
tcpstat.tcps_rcvduppack++;
tcpstat.tcps_rcvdupbyte += *tlenp;
m_freem(m);
- FREE(te, M_TSEGQ);
+ uma_zfree(tcp_reass_zone, te);
+ tp->t_segqlen--;
+ tcp_reass_qsize--;
/*
* Try to present any queued data
* at the left window edge to the user.
@@ -263,7 +327,9 @@ tcp_reass(tp, th, tlenp, m)
nq = LIST_NEXT(q, tqe_q);
LIST_REMOVE(q, tqe_q);
m_freem(q->tqe_m);
- FREE(q, M_TSEGQ);
+ uma_zfree(tcp_reass_zone, q);
+ tp->t_segqlen--;
+ tcp_reass_qsize--;
q = nq;
}
@@ -297,7 +363,9 @@ present:
m_freem(q->tqe_m);
else
sbappendstream(&so->so_rcv, q->tqe_m);
- FREE(q, M_TSEGQ);
+ uma_zfree(tcp_reass_zone, q);
+ tp->t_segqlen--;
+ tcp_reass_qsize--;
q = nq;
} while (q && q->tqe_th->th_seq == tp->rcv_nxt);
ND6_HINT(tp);
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 0878744..210a582 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -289,6 +289,7 @@ tcp_init()
tcp_timer_init();
syncache_init();
tcp_hc_init();
+ tcp_reass_init();
}
/*
@@ -711,7 +712,9 @@ tcp_discardcb(tp)
while ((q = LIST_FIRST(&tp->t_segq)) != NULL) {
LIST_REMOVE(q, tqe_q);
m_freem(q->tqe_m);
- FREE(q, M_TSEGQ);
+ uma_zfree(tcp_reass_zone, q);
+ tp->t_segqlen--;
+ tcp_reass_qsize--;
}
inp->inp_ppcb = NULL;
tp->t_inpcb = NULL;
@@ -772,7 +775,9 @@ tcp_drain()
!= NULL) {
LIST_REMOVE(te, tqe_q);
m_freem(te->tqe_m);
- FREE(te, M_TSEGQ);
+ uma_zfree(tcp_reass_zone, te);
+ tcpb->t_segqlen--;
+ tcp_reass_qsize--;
}
}
INP_UNLOCK(inpb);
diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c
index 0878744..210a582 100644
--- a/sys/netinet/tcp_timewait.c
+++ b/sys/netinet/tcp_timewait.c
@@ -289,6 +289,7 @@ tcp_init()
tcp_timer_init();
syncache_init();
tcp_hc_init();
+ tcp_reass_init();
}
/*
@@ -711,7 +712,9 @@ tcp_discardcb(tp)
while ((q = LIST_FIRST(&tp->t_segq)) != NULL) {
LIST_REMOVE(q, tqe_q);
m_freem(q->tqe_m);
- FREE(q, M_TSEGQ);
+ uma_zfree(tcp_reass_zone, q);
+ tp->t_segqlen--;
+ tcp_reass_qsize--;
}
inp->inp_ppcb = NULL;
tp->t_inpcb = NULL;
@@ -772,7 +775,9 @@ tcp_drain()
!= NULL) {
LIST_REMOVE(te, tqe_q);
m_freem(te->tqe_m);
- FREE(te, M_TSEGQ);
+ uma_zfree(tcp_reass_zone, te);
+ tcpb->t_segqlen--;
+ tcp_reass_qsize--;
}
}
INP_UNLOCK(inpb);
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index e8aa435..07ebd06 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -54,8 +54,9 @@ struct tseg_qent {
struct mbuf *tqe_m; /* mbuf contains packet */
};
LIST_HEAD(tsegqe_head, tseg_qent);
-#ifdef MALLOC_DECLARE
-MALLOC_DECLARE(M_TSEGQ);
+extern int tcp_reass_qsize;
+#ifdef VM_UMA_H
+extern uma_zone_t tcp_reass_zone;
#endif
struct tcptemp {
@@ -70,7 +71,8 @@ struct tcptemp {
* Organized for 16 byte cacheline efficiency.
*/
struct tcpcb {
- struct tsegqe_head t_segq;
+ struct tsegqe_head t_segq; /* segment reassembly queue */
+ int t_segqlen; /* segment reassembly queue length */
int t_dupacks; /* consecutive dup acks recd */
struct tcptemp *unused; /* unused */
@@ -519,6 +521,7 @@ struct tcpcb *
void tcp_drain(void);
void tcp_fasttimo(void);
void tcp_init(void);
+void tcp_reass_init(void);
void tcp_input(struct mbuf *, int);
u_long tcp_maxmtu(struct in_conninfo *);
u_long tcp_maxmtu6(struct in_conninfo *);
OpenPOWER on IntegriCloud