summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorglebius <glebius@FreeBSD.org>2016-01-22 02:07:48 +0000
committerglebius <glebius@FreeBSD.org>2016-01-22 02:07:48 +0000
commit40ba1ae95a65027a63267d57c7b0fee6e030bbd2 (patch)
treedddc38b9e03f3559cc94d54c711282c48c319bd8
parent0840cf26409ce91dd72208f7439fe1b11f4204c8 (diff)
downloadFreeBSD-src-40ba1ae95a65027a63267d57c7b0fee6e030bbd2.zip
FreeBSD-src-40ba1ae95a65027a63267d57c7b0fee6e030bbd2.tar.gz
Provide new socket option TCP_CCALGOOPT, which stands for TCP congestion
control algorithm options. The argument is variable length and is opaque to TCP, forwarded directly to the algorithm's ctl_output method. Provide new includes directory netinet/cc, where algorithm specific headers can be installed. The new API doesn't yet have any in tree consumers. The original code written by lstewart. Reviewed by: rrs, emax Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D711
-rw-r--r--etc/mtree/BSD.include.dist2
-rw-r--r--include/Makefile1
-rw-r--r--share/man/man4/mod_cc.414
-rw-r--r--share/man/man4/tcp.47
-rw-r--r--share/man/man9/mod_cc.920
-rw-r--r--sys/netinet/tcp.h1
-rw-r--r--sys/netinet/tcp_cc.h3
-rw-r--r--sys/netinet/tcp_usrreq.c28
8 files changed, 70 insertions, 6 deletions
diff --git a/etc/mtree/BSD.include.dist b/etc/mtree/BSD.include.dist
index bb0b26d..88e80e6 100644
--- a/etc/mtree/BSD.include.dist
+++ b/etc/mtree/BSD.include.dist
@@ -270,6 +270,8 @@
..
..
netinet
+ cc
+ ..
..
netinet6
..
diff --git a/include/Makefile b/include/Makefile
index c75de6e..8033de1 100644
--- a/include/Makefile
+++ b/include/Makefile
@@ -53,6 +53,7 @@ LSUBDIRS= cam/ata cam/scsi \
geom/raid geom/raid3 geom/shsec geom/stripe geom/virstor \
net/altq \
netgraph/atm netgraph/netflow \
+ netinet/cc \
security/audit \
security/mac_biba security/mac_bsdextended security/mac_lomac \
security/mac_mls security/mac_partition \
diff --git a/share/man/man4/mod_cc.4 b/share/man/man4/mod_cc.4
index f5f4493..4712a39 100644
--- a/share/man/man4/mod_cc.4
+++ b/share/man/man4/mod_cc.4
@@ -30,7 +30,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd January 12, 2015
+.Dd January 21, 2016
.Dt MOD_CC 4
.Os
.Sh NAME
@@ -49,7 +49,9 @@ using the
facility.
.Pp
The default algorithm is NewReno, and all connections use the default unless
-explicitly overridden using the TCP_CONGESTION socket option (see
+explicitly overridden using the
+.Dv TCP_CONGESTION
+socket option (see
.Xr tcp 4
for details).
The default can be changed using a
@@ -57,6 +59,14 @@ The default can be changed using a
MIB variable detailed in the
.Sx MIB Variables
section below.
+.Pp
+Algorithm specific parameters can be set or queried using the
+.Dv TCP_CCALGOOPT
+socket option (see
+.Xr tcp 4
+for details).
+Callers must pass a pointer to an algorithm specific data, and specify
+its size.
.Sh MIB Variables
The framework exposes the following variables in the
.Va net.inet.tcp.cc
diff --git a/share/man/man4/tcp.4 b/share/man/man4/tcp.4
index 8c5887f..de993e7 100644
--- a/share/man/man4/tcp.4
+++ b/share/man/man4/tcp.4
@@ -34,7 +34,7 @@
.\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93
.\" $FreeBSD$
.\"
-.Dd October 27, 2015
+.Dd January 21, 2016
.Dt TCP 4
.Os
.Sh NAME
@@ -137,6 +137,11 @@ send window size,
receive window size,
and
bandwidth-controlled window space.
+.It Dv TCP_CCALGOOPT
+Set or query congestion control algorithm specific parameters.
+See
+.Xr mod_cc 4
+for details.
.It Dv TCP_CONGESTION
Select or query the congestion control algorithm that TCP will use for the
connection.
diff --git a/share/man/man9/mod_cc.9 b/share/man/man9/mod_cc.9
index f1cd5be..05205ed 100644
--- a/share/man/man9/mod_cc.9
+++ b/share/man/man9/mod_cc.9
@@ -31,7 +31,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd December 26, 2014
+.Dd January 21, 2016
.Dt MOD_CC 9
.Os
.Sh NAME
@@ -40,7 +40,8 @@
.Nm CCV
.Nd Modular Congestion Control
.Sh SYNOPSIS
-.In netinet/cc.h
+.In netinet/tcp.h
+.In netinet/tcp_cc.h
.In netinet/cc/cc_module.h
.Fn DECLARE_CC_MODULE "ccname" "ccalgo"
.Fn CCV "ccv" "what"
@@ -74,6 +75,7 @@ struct cc_algo {
void (*cong_signal) (struct cc_var *ccv, uint32_t type);
void (*post_recovery) (struct cc_var *ccv);
void (*after_idle) (struct cc_var *ccv);
+ int (*ctl_output)(struct cc_var *, struct sockopt *, void *);
};
.Ed
.Pp
@@ -166,6 +168,20 @@ function is called when data transfer resumes after an idle period.
It should be implemented to adjust state as required.
.Pp
The
+.Va ctl_output
+function is called when
+.Xr getsockopt 2
+or
+.Xr setsockopt 2
+is called on a
+.Xr tcp 4
+socket with the
+.Va struct sockopt
+pointer forwarded unmodified from the TCP control, and a
+.Va void *
+pointer to algorithm specific argument.
+.Pp
+The
.Fn DECLARE_CC_MODULE
macro provides a convenient wrapper around the
.Xr DECLARE_MODULE 9
diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h
index bfc8073..4703810 100644
--- a/sys/netinet/tcp.h
+++ b/sys/netinet/tcp.h
@@ -165,6 +165,7 @@ struct tcphdr {
#define TCP_MD5SIG 16 /* use MD5 digests (RFC2385) */
#define TCP_INFO 32 /* retrieve tcp_info structure */
#define TCP_CONGESTION 64 /* get/set congestion control algorithm */
+#define TCP_CCALGOOPT 65 /* get/set cc algorithm specific options */
#define TCP_KEEPINIT 128 /* N, time to establish connection */
#define TCP_KEEPIDLE 256 /* L,N,X start keeplives after this period */
#define TCP_KEEPINTVL 512 /* L,N interval between keepalives */
diff --git a/sys/netinet/tcp_cc.h b/sys/netinet/tcp_cc.h
index 4a2b0c8..d90cd19 100644
--- a/sys/netinet/tcp_cc.h
+++ b/sys/netinet/tcp_cc.h
@@ -151,6 +151,9 @@ struct cc_algo {
/* Called for an additional ECN processing apart from RFC3168. */
void (*ecnpkt_handler)(struct cc_var *ccv);
+ /* Called for {get|set}sockopt() on a TCP socket with TCP_CCALGOOPT. */
+ int (*ctl_output)(struct cc_var *, struct sockopt *, void *);
+
STAILQ_ENTRY (cc_algo) entries;
};
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index 29e92b2..4b3150b 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -1480,7 +1480,33 @@ tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp
struct tcp_info ti;
struct cc_algo *algo;
char *buf;
-
+
+ /*
+ * For TCP_CCALGOOPT forward the control to CC module, for both
+ * SOPT_SET and SOPT_GET.
+ */
+ switch (sopt->sopt_name) {
+ case TCP_CCALGOOPT:
+ INP_WUNLOCK(inp);
+ buf = malloc(sopt->sopt_valsize, M_TEMP, M_WAITOK | M_ZERO);
+ error = sooptcopyin(sopt, buf, sopt->sopt_valsize,
+ sopt->sopt_valsize);
+ if (error) {
+ free(buf, M_TEMP);
+ return (error);
+ }
+ INP_WLOCK_RECHECK(inp);
+ if (CC_ALGO(tp)->ctl_output != NULL)
+ error = CC_ALGO(tp)->ctl_output(tp->ccv, sopt, buf);
+ else
+ error = ENOENT;
+ INP_WUNLOCK(inp);
+ if (error == 0 && sopt->sopt_dir == SOPT_GET)
+ error = sooptcopyout(sopt, buf, sopt->sopt_valsize);
+ free(buf, M_TEMP);
+ return (error);
+ }
+
switch (sopt->sopt_dir) {
case SOPT_SET:
switch (sopt->sopt_name) {
OpenPOWER on IntegriCloud