summaryrefslogtreecommitdiffstats
path: root/sys/netinet
diff options
context:
space:
mode:
authorandre <andre@FreeBSD.org>2004-02-24 15:27:41 +0000
committerandre <andre@FreeBSD.org>2004-02-24 15:27:41 +0000
commit5ef70fe223ecca1ecc385a720742f0b16159e859 (patch)
treeeda4f0335f73851268852863dcbbd967de416d1f /sys/netinet
parente94b95684a67ec548cafe6ed6e753eecec3c60ec (diff)
downloadFreeBSD-src-5ef70fe223ecca1ecc385a720742f0b16159e859.zip
FreeBSD-src-5ef70fe223ecca1ecc385a720742f0b16159e859.tar.gz
Convert the tcp segment reassembly queue to UMA and limit the maximum
amount of segments it will hold. The following tuneables and sysctls control the behaviour of the tcp segment reassembly queue: net.inet.tcp.reass.maxsegments (loader tuneable) specifies the maximum number of segments all tcp reassemly queues can hold (defaults to 1/16 of nmbclusters). net.inet.tcp.reass.maxqlen specifies the maximum number of segments any individual tcp session queue can hold (defaults to 48). net.inet.tcp.reass.cursegments (readonly) counts the number of segments currently in all reassembly queues. net.inet.tcp.reass.overflows (readonly) counts how often either the global or local queue limit has been reached. Tested by: bms, silby Reviewed by: bms, silby
Diffstat (limited to 'sys/netinet')
-rw-r--r--sys/netinet/tcp_input.c86
-rw-r--r--sys/netinet/tcp_reass.c86
-rw-r--r--sys/netinet/tcp_subr.c9
-rw-r--r--sys/netinet/tcp_timewait.c9
-rw-r--r--sys/netinet/tcp_var.h9
5 files changed, 174 insertions, 25 deletions
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 39d2dc1..151e083 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -58,6 +58,8 @@
#include <machine/cpu.h> /* before tcp_seq.h, for tcp_random18() */
+#include <vm/uma.h>
+
#include <net/if.h>
#include <net/route.h>
@@ -98,8 +100,6 @@
#include <machine/in_cksum.h>
-MALLOC_DEFINE(M_TSEGQ, "tseg_qent", "TCP segment queue entry");
-
static const int tcprexmtthresh = 3;
tcp_cc tcp_ccgen;
@@ -135,6 +135,29 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3390, CTLFLAG_RW,
&tcp_do_rfc3390, 0,
"Enable RFC 3390 (Increasing TCP's Initial Congestion Window)");
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0,
+ "TCP Segment Reassembly Queue");
+
+static int tcp_reass_maxseg = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN,
+ &tcp_reass_maxseg, 0,
+ "Global maximum number of TCP Segments in Reassembly Queue");
+
+int tcp_reass_qsize = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, cursegments, CTLFLAG_RD,
+ &tcp_reass_qsize, 0,
+ "Global number of TCP Segments currently in Reassembly Queue");
+
+static int tcp_reass_maxqlen = 48;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxqlen, CTLFLAG_RW,
+ &tcp_reass_maxqlen, 0,
+ "Maximum number of TCP Segments per individual Reassembly Queue");
+
+static int tcp_reass_overflows = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD,
+ &tcp_reass_overflows, 0,
+ "Global number of TCP Segment Reassembly Queue Overflows");
+
struct inpcbhead tcb;
#define tcb6 tcb /* for KAME src sync over BSD*'s */
struct inpcbinfo tcbinfo;
@@ -175,6 +198,19 @@ do { \
(tp->t_flags & TF_RXWIN0SENT) == 0) && \
(tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN)))
+/* Initialize TCP reassembly queue */
+uma_zone_t tcp_reass_zone;
+void
+tcp_reass_init()
+{
+ tcp_reass_maxseg = nmbclusters / 16;
+ TUNABLE_INT_FETCH("net.inet.tcp.reass.maxsegments",
+ &tcp_reass_maxseg);
+ tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct tseg_qent),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ uma_zone_set_max(tcp_reass_zone, tcp_reass_maxseg);
+}
+
static int
tcp_reass(tp, th, tlenp, m)
register struct tcpcb *tp;
@@ -185,25 +221,51 @@ tcp_reass(tp, th, tlenp, m)
struct tseg_qent *q;
struct tseg_qent *p = NULL;
struct tseg_qent *nq;
- struct tseg_qent *te;
+ struct tseg_qent *te = NULL;
struct socket *so = tp->t_inpcb->inp_socket;
int flags;
/*
+ * XXX: tcp_reass() is rather inefficient with its data structures
+ * and should be rewritten (see NetBSD for optimizations). While
+ * doing that it should move to its own file tcp_reass.c.
+ */
+
+ /*
* Call with th==0 after become established to
* force pre-ESTABLISHED data up to user socket.
*/
if (th == 0)
goto present;
- /* Allocate a new queue entry. If we can't, just drop the pkt. XXX */
- MALLOC(te, struct tseg_qent *, sizeof (struct tseg_qent), M_TSEGQ,
- M_NOWAIT);
+ /*
+ * Limit the number of segments in the reassembly queue to prevent
+ * holding on to too many segments (and thus running out of mbufs).
+ * Make sure to let the missing segment through which caused this
+ * queue. Always keep one global queue entry spare to be able to
+ * process the missing segment.
+ */
+ if (th->th_seq != tp->rcv_nxt &&
+ (tcp_reass_qsize + 1 >= tcp_reass_maxseg ||
+ tp->t_segqlen >= tcp_reass_maxqlen)) {
+ tcp_reass_overflows++;
+ tcpstat.tcps_rcvmemdrop++;
+ m_freem(m);
+ return (0);
+ }
+
+ /*
+ * Allocate a new queue entry. If we can't, or hit the zone limit
+ * just drop the pkt.
+ */
+ te = uma_zalloc(tcp_reass_zone, M_NOWAIT);
if (te == NULL) {
tcpstat.tcps_rcvmemdrop++;
m_freem(m);
return (0);
}
+ tp->t_segqlen++;
+ tcp_reass_qsize++;
/*
* Find a segment which begins after this one does.
@@ -228,7 +290,9 @@ tcp_reass(tp, th, tlenp, m)
tcpstat.tcps_rcvduppack++;
tcpstat.tcps_rcvdupbyte += *tlenp;
m_freem(m);
- FREE(te, M_TSEGQ);
+ uma_zfree(tcp_reass_zone, te);
+ tp->t_segqlen--;
+ tcp_reass_qsize--;
/*
* Try to present any queued data
* at the left window edge to the user.
@@ -263,7 +327,9 @@ tcp_reass(tp, th, tlenp, m)
nq = LIST_NEXT(q, tqe_q);
LIST_REMOVE(q, tqe_q);
m_freem(q->tqe_m);
- FREE(q, M_TSEGQ);
+ uma_zfree(tcp_reass_zone, q);
+ tp->t_segqlen--;
+ tcp_reass_qsize--;
q = nq;
}
@@ -297,7 +363,9 @@ present:
m_freem(q->tqe_m);
else
sbappendstream(&so->so_rcv, q->tqe_m);
- FREE(q, M_TSEGQ);
+ uma_zfree(tcp_reass_zone, q);
+ tp->t_segqlen--;
+ tcp_reass_qsize--;
q = nq;
} while (q && q->tqe_th->th_seq == tp->rcv_nxt);
ND6_HINT(tp);
diff --git a/sys/netinet/tcp_reass.c b/sys/netinet/tcp_reass.c
index 39d2dc1..151e083 100644
--- a/sys/netinet/tcp_reass.c
+++ b/sys/netinet/tcp_reass.c
@@ -58,6 +58,8 @@
#include <machine/cpu.h> /* before tcp_seq.h, for tcp_random18() */
+#include <vm/uma.h>
+
#include <net/if.h>
#include <net/route.h>
@@ -98,8 +100,6 @@
#include <machine/in_cksum.h>
-MALLOC_DEFINE(M_TSEGQ, "tseg_qent", "TCP segment queue entry");
-
static const int tcprexmtthresh = 3;
tcp_cc tcp_ccgen;
@@ -135,6 +135,29 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3390, CTLFLAG_RW,
&tcp_do_rfc3390, 0,
"Enable RFC 3390 (Increasing TCP's Initial Congestion Window)");
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0,
+ "TCP Segment Reassembly Queue");
+
+static int tcp_reass_maxseg = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN,
+ &tcp_reass_maxseg, 0,
+ "Global maximum number of TCP Segments in Reassembly Queue");
+
+int tcp_reass_qsize = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, cursegments, CTLFLAG_RD,
+ &tcp_reass_qsize, 0,
+ "Global number of TCP Segments currently in Reassembly Queue");
+
+static int tcp_reass_maxqlen = 48;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxqlen, CTLFLAG_RW,
+ &tcp_reass_maxqlen, 0,
+ "Maximum number of TCP Segments per individual Reassembly Queue");
+
+static int tcp_reass_overflows = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD,
+ &tcp_reass_overflows, 0,
+ "Global number of TCP Segment Reassembly Queue Overflows");
+
struct inpcbhead tcb;
#define tcb6 tcb /* for KAME src sync over BSD*'s */
struct inpcbinfo tcbinfo;
@@ -175,6 +198,19 @@ do { \
(tp->t_flags & TF_RXWIN0SENT) == 0) && \
(tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN)))
+/* Initialize TCP reassembly queue */
+uma_zone_t tcp_reass_zone;
+void
+tcp_reass_init()
+{
+ tcp_reass_maxseg = nmbclusters / 16;
+ TUNABLE_INT_FETCH("net.inet.tcp.reass.maxsegments",
+ &tcp_reass_maxseg);
+ tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct tseg_qent),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ uma_zone_set_max(tcp_reass_zone, tcp_reass_maxseg);
+}
+
static int
tcp_reass(tp, th, tlenp, m)
register struct tcpcb *tp;
@@ -185,25 +221,51 @@ tcp_reass(tp, th, tlenp, m)
struct tseg_qent *q;
struct tseg_qent *p = NULL;
struct tseg_qent *nq;
- struct tseg_qent *te;
+ struct tseg_qent *te = NULL;
struct socket *so = tp->t_inpcb->inp_socket;
int flags;
/*
+ * XXX: tcp_reass() is rather inefficient with its data structures
+ * and should be rewritten (see NetBSD for optimizations). While
+ * doing that it should move to its own file tcp_reass.c.
+ */
+
+ /*
* Call with th==0 after become established to
* force pre-ESTABLISHED data up to user socket.
*/
if (th == 0)
goto present;
- /* Allocate a new queue entry. If we can't, just drop the pkt. XXX */
- MALLOC(te, struct tseg_qent *, sizeof (struct tseg_qent), M_TSEGQ,
- M_NOWAIT);
+ /*
+ * Limit the number of segments in the reassembly queue to prevent
+ * holding on to too many segments (and thus running out of mbufs).
+ * Make sure to let the missing segment through which caused this
+ * queue. Always keep one global queue entry spare to be able to
+ * process the missing segment.
+ */
+ if (th->th_seq != tp->rcv_nxt &&
+ (tcp_reass_qsize + 1 >= tcp_reass_maxseg ||
+ tp->t_segqlen >= tcp_reass_maxqlen)) {
+ tcp_reass_overflows++;
+ tcpstat.tcps_rcvmemdrop++;
+ m_freem(m);
+ return (0);
+ }
+
+ /*
+ * Allocate a new queue entry. If we can't, or hit the zone limit
+ * just drop the pkt.
+ */
+ te = uma_zalloc(tcp_reass_zone, M_NOWAIT);
if (te == NULL) {
tcpstat.tcps_rcvmemdrop++;
m_freem(m);
return (0);
}
+ tp->t_segqlen++;
+ tcp_reass_qsize++;
/*
* Find a segment which begins after this one does.
@@ -228,7 +290,9 @@ tcp_reass(tp, th, tlenp, m)
tcpstat.tcps_rcvduppack++;
tcpstat.tcps_rcvdupbyte += *tlenp;
m_freem(m);
- FREE(te, M_TSEGQ);
+ uma_zfree(tcp_reass_zone, te);
+ tp->t_segqlen--;
+ tcp_reass_qsize--;
/*
* Try to present any queued data
* at the left window edge to the user.
@@ -263,7 +327,9 @@ tcp_reass(tp, th, tlenp, m)
nq = LIST_NEXT(q, tqe_q);
LIST_REMOVE(q, tqe_q);
m_freem(q->tqe_m);
- FREE(q, M_TSEGQ);
+ uma_zfree(tcp_reass_zone, q);
+ tp->t_segqlen--;
+ tcp_reass_qsize--;
q = nq;
}
@@ -297,7 +363,9 @@ present:
m_freem(q->tqe_m);
else
sbappendstream(&so->so_rcv, q->tqe_m);
- FREE(q, M_TSEGQ);
+ uma_zfree(tcp_reass_zone, q);
+ tp->t_segqlen--;
+ tcp_reass_qsize--;
q = nq;
} while (q && q->tqe_th->th_seq == tp->rcv_nxt);
ND6_HINT(tp);
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 0878744..210a582 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -289,6 +289,7 @@ tcp_init()
tcp_timer_init();
syncache_init();
tcp_hc_init();
+ tcp_reass_init();
}
/*
@@ -711,7 +712,9 @@ tcp_discardcb(tp)
while ((q = LIST_FIRST(&tp->t_segq)) != NULL) {
LIST_REMOVE(q, tqe_q);
m_freem(q->tqe_m);
- FREE(q, M_TSEGQ);
+ uma_zfree(tcp_reass_zone, q);
+ tp->t_segqlen--;
+ tcp_reass_qsize--;
}
inp->inp_ppcb = NULL;
tp->t_inpcb = NULL;
@@ -772,7 +775,9 @@ tcp_drain()
!= NULL) {
LIST_REMOVE(te, tqe_q);
m_freem(te->tqe_m);
- FREE(te, M_TSEGQ);
+ uma_zfree(tcp_reass_zone, te);
+ tcpb->t_segqlen--;
+ tcp_reass_qsize--;
}
}
INP_UNLOCK(inpb);
diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c
index 0878744..210a582 100644
--- a/sys/netinet/tcp_timewait.c
+++ b/sys/netinet/tcp_timewait.c
@@ -289,6 +289,7 @@ tcp_init()
tcp_timer_init();
syncache_init();
tcp_hc_init();
+ tcp_reass_init();
}
/*
@@ -711,7 +712,9 @@ tcp_discardcb(tp)
while ((q = LIST_FIRST(&tp->t_segq)) != NULL) {
LIST_REMOVE(q, tqe_q);
m_freem(q->tqe_m);
- FREE(q, M_TSEGQ);
+ uma_zfree(tcp_reass_zone, q);
+ tp->t_segqlen--;
+ tcp_reass_qsize--;
}
inp->inp_ppcb = NULL;
tp->t_inpcb = NULL;
@@ -772,7 +775,9 @@ tcp_drain()
!= NULL) {
LIST_REMOVE(te, tqe_q);
m_freem(te->tqe_m);
- FREE(te, M_TSEGQ);
+ uma_zfree(tcp_reass_zone, te);
+ tcpb->t_segqlen--;
+ tcp_reass_qsize--;
}
}
INP_UNLOCK(inpb);
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index e8aa435..07ebd06 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -54,8 +54,9 @@ struct tseg_qent {
struct mbuf *tqe_m; /* mbuf contains packet */
};
LIST_HEAD(tsegqe_head, tseg_qent);
-#ifdef MALLOC_DECLARE
-MALLOC_DECLARE(M_TSEGQ);
+extern int tcp_reass_qsize;
+#ifdef VM_UMA_H
+extern uma_zone_t tcp_reass_zone;
#endif
struct tcptemp {
@@ -70,7 +71,8 @@ struct tcptemp {
* Organized for 16 byte cacheline efficiency.
*/
struct tcpcb {
- struct tsegqe_head t_segq;
+ struct tsegqe_head t_segq; /* segment reassembly queue */
+ int t_segqlen; /* segment reassembly queue length */
int t_dupacks; /* consecutive dup acks recd */
struct tcptemp *unused; /* unused */
@@ -519,6 +521,7 @@ struct tcpcb *
void tcp_drain(void);
void tcp_fasttimo(void);
void tcp_init(void);
+void tcp_reass_init(void);
void tcp_input(struct mbuf *, int);
u_long tcp_maxmtu(struct in_conninfo *);
u_long tcp_maxmtu6(struct in_conninfo *);
OpenPOWER on IntegriCloud