diff options
Diffstat (limited to 'sys/netinet')
-rw-r--r-- | sys/netinet/cc.h | 9 | ||||
-rw-r--r-- | sys/netinet/cc/cc.c | 112 | ||||
-rw-r--r-- | sys/netinet/tcp_subr.c | 65 | ||||
-rw-r--r-- | sys/netinet/tcp_var.h | 1 |
4 files changed, 110 insertions, 77 deletions
diff --git a/sys/netinet/cc.h b/sys/netinet/cc.h index 6f24f11..aaa1d67 100644 --- a/sys/netinet/cc.h +++ b/sys/netinet/cc.h @@ -58,11 +58,14 @@ extern STAILQ_HEAD(cc_head, cc_algo) cc_list; extern const int tcprexmtthresh; extern struct cc_algo newreno_cc_algo; +/* Per-netstack bits. */ +VNET_DECLARE(struct cc_algo *, default_cc_ptr); +#define V_default_cc_ptr VNET(default_cc_ptr) + /* Define the new net.inet.tcp.cc sysctl tree. */ SYSCTL_DECL(_net_inet_tcp_cc); /* CC housekeeping functions. */ -void cc_init(void); int cc_register_algo(struct cc_algo *add_cc); int cc_deregister_algo(struct cc_algo *remove_cc); @@ -147,7 +150,7 @@ struct cc_algo { #define CC_DATA(tp) ((tp)->ccv->cc_data) /* Macro to obtain the system default CC algo's struct ptr. */ -#define CC_DEFAULT() STAILQ_FIRST(&cc_list) +#define CC_DEFAULT() V_default_cc_ptr extern struct rwlock cc_list_lock; #define CC_LIST_LOCK_INIT() rw_init(&cc_list_lock, "cc_list") @@ -156,6 +159,6 @@ extern struct rwlock cc_list_lock; #define CC_LIST_RUNLOCK() rw_runlock(&cc_list_lock) #define CC_LIST_WLOCK() rw_wlock(&cc_list_lock) #define CC_LIST_WUNLOCK() rw_wunlock(&cc_list_lock) -#define CC_LIST_WLOCK_ASSERT() rw_assert(&cc_list_lock, RA_WLOCKED) +#define CC_LIST_LOCK_ASSERT() rw_assert(&cc_list_lock, RA_LOCKED) #endif /* _NETINET_CC_H_ */ diff --git a/sys/netinet/cc/cc.c b/sys/netinet/cc/cc.c index 4643ca4..f075327 100644 --- a/sys/netinet/cc/cc.c +++ b/sys/netinet/cc/cc.c @@ -81,24 +81,7 @@ struct cc_head cc_list = STAILQ_HEAD_INITIALIZER(cc_list); /* Protects the cc_list TAILQ. */ struct rwlock cc_list_lock; -/* - * Set the default CC algorithm to new_default. The default is identified - * by being the first element in the cc_list TAILQ. - */ -static void -cc_set_default(struct cc_algo *new_default) -{ - CC_LIST_WLOCK_ASSERT(); - - /* - * Make the requested system default CC algorithm the first element in - * the list if it isn't already. - */ - if (new_default != CC_DEFAULT()) { - STAILQ_REMOVE(&cc_list, new_default, cc_algo, entries); - STAILQ_INSERT_HEAD(&cc_list, new_default, entries); - } -} +VNET_DEFINE(struct cc_algo *, default_cc_ptr) = &newreno_cc_algo; /* * Sysctl handler to show and change the default CC algorithm. @@ -106,14 +89,13 @@ cc_set_default(struct cc_algo *new_default) static int cc_default_algo(SYSCTL_HANDLER_ARGS) { + char default_cc[TCP_CA_NAME_MAX]; struct cc_algo *funcs; int err, found; err = found = 0; if (req->newptr == NULL) { - char default_cc[TCP_CA_NAME_MAX]; - /* Just print the current default. */ CC_LIST_RLOCK(); strlcpy(default_cc, CC_DEFAULT()->name, TCP_CA_NAME_MAX); @@ -121,15 +103,15 @@ cc_default_algo(SYSCTL_HANDLER_ARGS) err = sysctl_handle_string(oidp, default_cc, 1, req); } else { /* Find algo with specified name and set it to default. */ - CC_LIST_WLOCK(); + CC_LIST_RLOCK(); STAILQ_FOREACH(funcs, &cc_list, entries) { if (strncmp((char *)req->newptr, funcs->name, TCP_CA_NAME_MAX) == 0) { found = 1; - cc_set_default(funcs); + V_default_cc_ptr = funcs; } } - CC_LIST_WUNLOCK(); + CC_LIST_RUNLOCK(); if (!found) err = ESRCH; @@ -174,10 +156,32 @@ cc_list_available(SYSCTL_HANDLER_ARGS) } /* + * Reset the default CC algo to NewReno for any netstack which is using the algo + * that is about to go away as its default. + */ +static void +cc_checkreset_default(struct cc_algo *remove_cc) +{ + VNET_ITERATOR_DECL(vnet_iter); + + CC_LIST_LOCK_ASSERT(); + + VNET_LIST_RLOCK_NOSLEEP(); + VNET_FOREACH(vnet_iter) { + CURVNET_SET(vnet_iter); + if (strncmp(CC_DEFAULT()->name, remove_cc->name, + TCP_CA_NAME_MAX) == 0) + V_default_cc_ptr = &newreno_cc_algo; + CURVNET_RESTORE(); + } + VNET_LIST_RUNLOCK_NOSLEEP(); +} + +/* * Initialise CC subsystem on system boot. */ -void -cc_init() +static void +cc_init(void) { CC_LIST_LOCK_INIT(); STAILQ_INIT(&cc_list); @@ -190,8 +194,6 @@ int cc_deregister_algo(struct cc_algo *remove_cc) { struct cc_algo *funcs, *tmpfuncs; - struct tcpcb *tp; - struct inpcb *inp; int err; err = ENOENT; @@ -204,58 +206,22 @@ cc_deregister_algo(struct cc_algo *remove_cc) CC_LIST_WLOCK(); STAILQ_FOREACH_SAFE(funcs, &cc_list, entries, tmpfuncs) { if (funcs == remove_cc) { - /* - * If we're removing the current system default, - * reset the default to newreno. - */ - if (strncmp(CC_DEFAULT()->name, remove_cc->name, - TCP_CA_NAME_MAX) == 0) - cc_set_default(&newreno_cc_algo); - + cc_checkreset_default(remove_cc); STAILQ_REMOVE(&cc_list, funcs, cc_algo, entries); err = 0; break; } } CC_LIST_WUNLOCK(); - - if (!err) { + + if (!err) /* - * Check all active control blocks and change any that are - * using this algorithm back to newreno. If the algorithm that - * was in use requires cleanup code to be run, call it. - * - * New connections already part way through being initialised - * with the CC algo we're removing will not race with this code - * because the INP_INFO_WLOCK is held during initialisation. - * We therefore don't enter the loop below until the connection - * list has stabilised. + * XXXLAS: + * - We may need to handle non-zero return values in future. + * - If we add CC framework support for protocols other than + * TCP, we may want a more generic way to handle this step. */ - INP_INFO_RLOCK(&V_tcbinfo); - LIST_FOREACH(inp, &V_tcb, inp_list) { - INP_WLOCK(inp); - /* Important to skip tcptw structs. */ - if (!(inp->inp_flags & INP_TIMEWAIT) && - (tp = intotcpcb(inp)) != NULL) { - /* - * By holding INP_WLOCK here, we are - * assured that the connection is not - * currently executing inside the CC - * module's functions i.e. it is safe to - * make the switch back to newreno. - */ - if (CC_ALGO(tp) == remove_cc) { - tmpfuncs = CC_ALGO(tp); - /* Newreno does not require any init. */ - CC_ALGO(tp) = &newreno_cc_algo; - if (tmpfuncs->cb_destroy != NULL) - tmpfuncs->cb_destroy(tp->ccv); - } - } - INP_WUNLOCK(inp); - } - INP_INFO_RUNLOCK(&V_tcbinfo); - } + tcp_ccalgounload(remove_cc); return (err); } @@ -328,11 +294,13 @@ cc_modevent(module_t mod, int event_type, void *data) return (err); } +SYSINIT(cc, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, cc_init, NULL); + /* Declare sysctl tree and populate it. */ SYSCTL_NODE(_net_inet_tcp, OID_AUTO, cc, CTLFLAG_RW, NULL, "congestion control related settings"); -SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, algorithm, CTLTYPE_STRING|CTLFLAG_RW, +SYSCTL_VNET_PROC(_net_inet_tcp_cc, OID_AUTO, algorithm, CTLTYPE_STRING|CTLFLAG_RW, NULL, 0, cc_default_algo, "A", "default congestion control algorithm"); SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, available, CTLTYPE_STRING|CTLFLAG_RD, diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index e6e7ca4..eebc023 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -278,8 +278,6 @@ tcp_init(void) { int hashsize; - cc_init(); - hashsize = TCBHASHSIZE; TUNABLE_INT_FETCH("net.inet.tcp.tcbhashsize", &hashsize); if (!powerof2(hashsize)) { @@ -710,6 +708,69 @@ tcp_newtcpcb(struct inpcb *inp) } /* + * Switch the congestion control algorithm back to NewReno for any active + * control blocks using an algorithm which is about to go away. + * This ensures the CC framework can allow the unload to proceed without leaving + * any dangling pointers which would trigger a panic. + * Returning non-zero would inform the CC framework that something went wrong + * and it would be unsafe to allow the unload to proceed. However, there is no + * way for this to occur with this implementation so we always return zero. + */ +int +tcp_ccalgounload(struct cc_algo *unload_algo) +{ + struct cc_algo *tmpalgo; + struct inpcb *inp; + struct tcpcb *tp; + VNET_ITERATOR_DECL(vnet_iter); + + /* + * Check all active control blocks across all network stacks and change + * any that are using "unload_algo" back to NewReno. If "unload_algo" + * requires cleanup code to be run, call it. + */ + VNET_LIST_RLOCK(); + VNET_FOREACH(vnet_iter) { + CURVNET_SET(vnet_iter); + INP_INFO_RLOCK(&V_tcbinfo); + /* + * New connections already part way through being initialised + * with the CC algo we're removing will not race with this code + * because the INP_INFO_WLOCK is held during initialisation. We + * therefore don't enter the loop below until the connection + * list has stabilised. + */ + LIST_FOREACH(inp, &V_tcb, inp_list) { + INP_WLOCK(inp); + /* Important to skip tcptw structs. */ + if (!(inp->inp_flags & INP_TIMEWAIT) && + (tp = intotcpcb(inp)) != NULL) { + /* + * By holding INP_WLOCK here, we are assured + * that the connection is not currently + * executing inside the CC module's functions + * i.e. it is safe to make the switch back to + * NewReno. + */ + if (CC_ALGO(tp) == unload_algo) { + tmpalgo = CC_ALGO(tp); + /* NewReno does not require any init. */ + CC_ALGO(tp) = &newreno_cc_algo; + if (tmpalgo->cb_destroy != NULL) + tmpalgo->cb_destroy(tp->ccv); + } + } + INP_WUNLOCK(inp); + } + INP_INFO_RUNLOCK(&V_tcbinfo); + CURVNET_RESTORE(); + } + VNET_LIST_RUNLOCK(); + + return (0); +} + +/* * Drop a TCP connection, reporting * the specified error. If connection is synchronized, * then send a RST to peer. diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index 442c736..7b38667 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -605,6 +605,7 @@ VNET_DECLARE(int, tcp_ecn_maxretries); #define V_tcp_ecn_maxretries VNET(tcp_ecn_maxretries) int tcp_addoptions(struct tcpopt *, u_char *); +int tcp_ccalgounload(struct cc_algo *unload_algo); struct tcpcb * tcp_close(struct tcpcb *); void tcp_discardcb(struct tcpcb *); |