summaryrefslogtreecommitdiffstats
path: root/sys/netinet/tcp_reass.c
diff options
context:
space:
mode:
authorandre <andre@FreeBSD.org>2004-02-24 15:27:41 +0000
committerandre <andre@FreeBSD.org>2004-02-24 15:27:41 +0000
commit5ef70fe223ecca1ecc385a720742f0b16159e859 (patch)
treeeda4f0335f73851268852863dcbbd967de416d1f /sys/netinet/tcp_reass.c
parente94b95684a67ec548cafe6ed6e753eecec3c60ec (diff)
downloadFreeBSD-src-5ef70fe223ecca1ecc385a720742f0b16159e859.zip
FreeBSD-src-5ef70fe223ecca1ecc385a720742f0b16159e859.tar.gz
Convert the tcp segment reassembly queue to UMA and limit the maximum
amount of segments it will hold. The following tuneables and sysctls control the behaviour of the tcp segment reassembly queue: net.inet.tcp.reass.maxsegments (loader tuneable) specifies the maximum number of segments all tcp reassemly queues can hold (defaults to 1/16 of nmbclusters). net.inet.tcp.reass.maxqlen specifies the maximum number of segments any individual tcp session queue can hold (defaults to 48). net.inet.tcp.reass.cursegments (readonly) counts the number of segments currently in all reassembly queues. net.inet.tcp.reass.overflows (readonly) counts how often either the global or local queue limit has been reached. Tested by: bms, silby Reviewed by: bms, silby
Diffstat (limited to 'sys/netinet/tcp_reass.c')
-rw-r--r--sys/netinet/tcp_reass.c86
1 files changed, 77 insertions, 9 deletions
diff --git a/sys/netinet/tcp_reass.c b/sys/netinet/tcp_reass.c
index 39d2dc1..151e083 100644
--- a/sys/netinet/tcp_reass.c
+++ b/sys/netinet/tcp_reass.c
@@ -58,6 +58,8 @@
#include <machine/cpu.h> /* before tcp_seq.h, for tcp_random18() */
+#include <vm/uma.h>
+
#include <net/if.h>
#include <net/route.h>
@@ -98,8 +100,6 @@
#include <machine/in_cksum.h>
-MALLOC_DEFINE(M_TSEGQ, "tseg_qent", "TCP segment queue entry");
-
static const int tcprexmtthresh = 3;
tcp_cc tcp_ccgen;
@@ -135,6 +135,29 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3390, CTLFLAG_RW,
&tcp_do_rfc3390, 0,
"Enable RFC 3390 (Increasing TCP's Initial Congestion Window)");
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0,
+ "TCP Segment Reassembly Queue");
+
+static int tcp_reass_maxseg = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN,
+ &tcp_reass_maxseg, 0,
+ "Global maximum number of TCP Segments in Reassembly Queue");
+
+int tcp_reass_qsize = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, cursegments, CTLFLAG_RD,
+ &tcp_reass_qsize, 0,
+ "Global number of TCP Segments currently in Reassembly Queue");
+
+static int tcp_reass_maxqlen = 48;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxqlen, CTLFLAG_RW,
+ &tcp_reass_maxqlen, 0,
+ "Maximum number of TCP Segments per individual Reassembly Queue");
+
+static int tcp_reass_overflows = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD,
+ &tcp_reass_overflows, 0,
+ "Global number of TCP Segment Reassembly Queue Overflows");
+
struct inpcbhead tcb;
#define tcb6 tcb /* for KAME src sync over BSD*'s */
struct inpcbinfo tcbinfo;
@@ -175,6 +198,19 @@ do { \
(tp->t_flags & TF_RXWIN0SENT) == 0) && \
(tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN)))
+/* Initialize TCP reassembly queue */
+uma_zone_t tcp_reass_zone;
+void
+tcp_reass_init()
+{
+ tcp_reass_maxseg = nmbclusters / 16;
+ TUNABLE_INT_FETCH("net.inet.tcp.reass.maxsegments",
+ &tcp_reass_maxseg);
+ tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct tseg_qent),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
+ uma_zone_set_max(tcp_reass_zone, tcp_reass_maxseg);
+}
+
static int
tcp_reass(tp, th, tlenp, m)
register struct tcpcb *tp;
@@ -185,25 +221,51 @@ tcp_reass(tp, th, tlenp, m)
struct tseg_qent *q;
struct tseg_qent *p = NULL;
struct tseg_qent *nq;
- struct tseg_qent *te;
+ struct tseg_qent *te = NULL;
struct socket *so = tp->t_inpcb->inp_socket;
int flags;
/*
+ * XXX: tcp_reass() is rather inefficient with its data structures
+ * and should be rewritten (see NetBSD for optimizations). While
+ * doing that it should move to its own file tcp_reass.c.
+ */
+
+ /*
* Call with th==0 after become established to
* force pre-ESTABLISHED data up to user socket.
*/
if (th == 0)
goto present;
- /* Allocate a new queue entry. If we can't, just drop the pkt. XXX */
- MALLOC(te, struct tseg_qent *, sizeof (struct tseg_qent), M_TSEGQ,
- M_NOWAIT);
+ /*
+ * Limit the number of segments in the reassembly queue to prevent
+ * holding on to too many segments (and thus running out of mbufs).
+ * Make sure to let the missing segment through which caused this
+ * queue. Always keep one global queue entry spare to be able to
+ * process the missing segment.
+ */
+ if (th->th_seq != tp->rcv_nxt &&
+ (tcp_reass_qsize + 1 >= tcp_reass_maxseg ||
+ tp->t_segqlen >= tcp_reass_maxqlen)) {
+ tcp_reass_overflows++;
+ tcpstat.tcps_rcvmemdrop++;
+ m_freem(m);
+ return (0);
+ }
+
+ /*
+ * Allocate a new queue entry. If we can't, or hit the zone limit
+ * just drop the pkt.
+ */
+ te = uma_zalloc(tcp_reass_zone, M_NOWAIT);
if (te == NULL) {
tcpstat.tcps_rcvmemdrop++;
m_freem(m);
return (0);
}
+ tp->t_segqlen++;
+ tcp_reass_qsize++;
/*
* Find a segment which begins after this one does.
@@ -228,7 +290,9 @@ tcp_reass(tp, th, tlenp, m)
tcpstat.tcps_rcvduppack++;
tcpstat.tcps_rcvdupbyte += *tlenp;
m_freem(m);
- FREE(te, M_TSEGQ);
+ uma_zfree(tcp_reass_zone, te);
+ tp->t_segqlen--;
+ tcp_reass_qsize--;
/*
* Try to present any queued data
* at the left window edge to the user.
@@ -263,7 +327,9 @@ tcp_reass(tp, th, tlenp, m)
nq = LIST_NEXT(q, tqe_q);
LIST_REMOVE(q, tqe_q);
m_freem(q->tqe_m);
- FREE(q, M_TSEGQ);
+ uma_zfree(tcp_reass_zone, q);
+ tp->t_segqlen--;
+ tcp_reass_qsize--;
q = nq;
}
@@ -297,7 +363,9 @@ present:
m_freem(q->tqe_m);
else
sbappendstream(&so->so_rcv, q->tqe_m);
- FREE(q, M_TSEGQ);
+ uma_zfree(tcp_reass_zone, q);
+ tp->t_segqlen--;
+ tcp_reass_qsize--;
q = nq;
} while (q && q->tqe_th->th_seq == tp->rcv_nxt);
ND6_HINT(tp);
OpenPOWER on IntegriCloud