This commit marks the first formal contribution of the "Five New TCP Congestion

Control Algorithms for FreeBSD" FreeBSD Foundation funded project. More details about the project are available at: http://caia.swin.edu.au/freebsd/5cc/ - Add a KPI and supporting infrastructure to allow modular congestion control algorithms to be used in the net stack. Algorithms can maintain per-connection state if required, and connections maintain their own algorithm pointer, which allows different connections to concurrently use different algorithms. The TCP_CONGESTION socket option can be used with getsockopt()/setsockopt() to programmatically query or change the congestion control algorithm respectively from within an application at runtime. - Integrate the framework with the TCP stack in as least intrusive a manner as possible. Care was also taken to develop the framework in a way that should allow integration with other congestion aware transport protocols (e.g. SCTP) in the future. The hope is that we will one day be able to share a single set of congestion control algorithm modules between all congestion aware transport protocols. - Introduce a new congestion recovery (TF_CONGRECOVERY) state into the TCP stack and use it to decouple the meaning of recovery from a congestion event and recovery from packet loss (TF_FASTRECOVERY) a la RFC2581. ECN and delay based congestion control protocols don't generally need to recover from packet loss and need a different way to note a congestion recovery episode within the stack. - Remove the net.inet.tcp.newreno sysctl, which simplifies some portions of code and ensures the stack always uses the appropriate mechanisms for recovering from packet loss during a congestion recovery episode. - Extract the NewReno congestion control algorithm from the TCP stack and massage it into module form. NewReno is always built into the kernel and will remain the default algorithm for the forseeable future. Implementations of additional different algorithms will become available in the near future. - Bump __FreeBSD_version to 900025 and note in UPDATING that rebuilding code that relies on the size of "struct tcpcb" is required. Many thanks go to the Cisco University Research Program Fund at Community Foundation Silicon Valley and the FreeBSD Foundation. Their support of our work at the Centre for Advanced Internet Architectures, Swinburne University of Technology is greatly appreciated. In collaboration with: David Hayes <dahayes at swin edu au> and Grenville Armitage <garmitage at swin edu au> Sponsored by: Cisco URP, FreeBSD Foundation Reviewed by: rpaulo Tested by: David Hayes (and many others over the years) MFC after: 3 months
author: lstewart <lstewart@FreeBSD.org> 2010-11-12 06:41:55 +0000
committer: lstewart <lstewart@FreeBSD.org> 2010-11-12 06:41:55 +0000
commit: df9f23bf3f9344eb84a9561e04b32f758166ffac (patch)
tree: 51f95e34f4dbcfcea13f1a10b6895a71aa3845e8 /sys/netinet/tcp_usrreq.c
parent: 358939cf2851c9d7833953cc0f31bbec62619b92 (diff)
download: FreeBSD-src-df9f23bf3f9344eb84a9561e04b32f758166ffac.zip
FreeBSD-src-df9f23bf3f9344eb84a9561e04b32f758166ffac.tar.gz
1 files changed, 61 insertions, 1 deletions
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index f35890b..a28ddef 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -62,6 +62,7 @@ __FBSDID("$FreeBSD$");
 #include <net/route.h>
 #include <net/vnet.h>
 
+#include <netinet/cc.h>
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #ifdef INET6
@@ -77,7 +78,6 @@ __FBSDID("$FreeBSD$");
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #endif
-#include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
@@ -1242,6 +1242,8 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
 	struct	inpcb *inp;
 	struct	tcpcb *tp;
 	struct	tcp_info ti;
+	char buf[TCP_CA_NAME_MAX];
+	struct cc_algo *algo;
 
 	error = 0;
 	inp = sotoinpcb(so);
@@ -1351,6 +1353,54 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
 			error = EINVAL;
 			break;
 
+		case TCP_CONGESTION:
+			INP_WUNLOCK(inp);
+			bzero(buf, sizeof(buf));
+			error = sooptcopyin(sopt, &buf, sizeof(buf), 1);
+			if (error)
+				break;
+			INP_WLOCK_RECHECK(inp);
+			/*
+			 * Return EINVAL if we can't find the requested cc algo.
+			 */
+			error = EINVAL;
+			CC_LIST_RLOCK();
+			STAILQ_FOREACH(algo, &cc_list, entries) {
+				if (strncmp(buf, algo->name, TCP_CA_NAME_MAX)
+				    == 0) {
+					/* We've found the requested algo. */
+					error = 0;
+					/*
+					 * We hold a write lock over the tcb
+					 * so it's safe to do these things
+					 * without ordering concerns.
+					 */
+					if (CC_ALGO(tp)->cb_destroy != NULL)
+						CC_ALGO(tp)->cb_destroy(tp->ccv);
+					CC_ALGO(tp) = algo;
+					/*
+					 * If something goes pear shaped
+					 * initialising the new algo,
+					 * fall back to newreno (which
+					 * does not require initialisation).
+					 */
+					if (algo->cb_init != NULL)
+						if (algo->cb_init(tp->ccv) > 0) {
+							CC_ALGO(tp) = &newreno_cc_algo;
+							/*
+							 * The only reason init
+							 * should fail is
+							 * because of malloc.
+							 */
+							error = ENOMEM;
+						}
+					break; /* Break the STAILQ_FOREACH. */
+				}
+			}
+			CC_LIST_RUNLOCK();
+			INP_WUNLOCK(inp);
+			break;
+
 		default:
 			INP_WUNLOCK(inp);
 			error = ENOPROTOOPT;
@@ -1394,6 +1444,12 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
 			INP_WUNLOCK(inp);
 			error = sooptcopyout(sopt, &ti, sizeof ti);
 			break;
+		case TCP_CONGESTION:
+			bzero(buf, sizeof(buf));
+			strlcpy(buf, CC_ALGO(tp)->name, TCP_CA_NAME_MAX);
+			INP_WUNLOCK(inp);
+			error = sooptcopyout(sopt, buf, TCP_CA_NAME_MAX);
+			break;
 		default:
 			INP_WUNLOCK(inp);
 			error = ENOPROTOOPT;
@@ -1707,6 +1763,10 @@ db_print_tflags(u_int t_flags)
 		db_printf("%sTF_FASTRECOVERY", comma ? ", " : "");
 		comma = 1;
 	}
+	if (t_flags & TF_CONGRECOVERY) {
+		db_printf("%sTF_CONGRECOVERY", comma ? ", " : "");
+		comma = 1;
+	}
 	if (t_flags & TF_WASFRECOVERY) {
 		db_printf("%sTF_WASFRECOVERY", comma ? ", " : "");
 		comma = 1;
author	lstewart <lstewart@FreeBSD.org>	2010-11-12 06:41:55 +0000
committer	lstewart <lstewart@FreeBSD.org>	2010-11-12 06:41:55 +0000
commit	df9f23bf3f9344eb84a9561e04b32f758166ffac (patch)
tree	51f95e34f4dbcfcea13f1a10b6895a71aa3845e8 /sys/netinet/tcp_usrreq.c
parent	358939cf2851c9d7833953cc0f31bbec62619b92 (diff)
download	FreeBSD-src-df9f23bf3f9344eb84a9561e04b32f758166ffac.zip FreeBSD-src-df9f23bf3f9344eb84a9561e04b32f758166ffac.tar.gz