summaryrefslogtreecommitdiffstats
path: root/sys/netinet/tcp_subr.c
diff options
context:
space:
mode:
authorngie <ngie@FreeBSD.org>2015-12-17 06:55:25 +0000
committerngie <ngie@FreeBSD.org>2015-12-17 06:55:25 +0000
commit490921132f201193a73d81699cb455aa2ae87357 (patch)
tree447ebf673b9e1d362dbcf1b55fd34fa0de86d693 /sys/netinet/tcp_subr.c
parent3fed53d02350ae9cbd7b2786b72b83d2e292b8d1 (diff)
parenta7e4d91c2357d6f2c732cccc35fd4ddda5f2d58e (diff)
downloadFreeBSD-src-490921132f201193a73d81699cb455aa2ae87357.zip
FreeBSD-src-490921132f201193a73d81699cb455aa2ae87357.tar.gz
MFhead @ r292396
Diffstat (limited to 'sys/netinet/tcp_subr.c')
-rw-r--r--sys/netinet/tcp_subr.c312
1 files changed, 308 insertions, 4 deletions
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 29af766..00869a6 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <sys/jail.h>
#include <sys/malloc.h>
+#include <sys/refcount.h>
#include <sys/mbuf.h>
#ifdef INET6
#include <sys/domain.h>
@@ -125,6 +126,8 @@ VNET_DEFINE(int, tcp_mssdflt) = TCP_MSS;
VNET_DEFINE(int, tcp_v6mssdflt) = TCP6_MSS;
#endif
+struct rwlock tcp_function_lock;
+
static int
sysctl_net_inet_tcp_mss_check(SYSCTL_HANDLER_ARGS)
{
@@ -236,6 +239,179 @@ static char * tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th,
void *ip4hdr, const void *ip6hdr);
static void tcp_timer_discard(struct tcpcb *, uint32_t);
+
+static struct tcp_function_block tcp_def_funcblk = {
+ "default",
+ tcp_output,
+ tcp_do_segment,
+ tcp_default_ctloutput,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ 0,
+ 0
+};
+
+struct tcp_funchead t_functions;
+static struct tcp_function_block *tcp_func_set_ptr = &tcp_def_funcblk;
+
+static struct tcp_function_block *
+find_tcp_functions_locked(struct tcp_function_set *fs)
+{
+ struct tcp_function *f;
+ struct tcp_function_block *blk=NULL;
+
+ TAILQ_FOREACH(f, &t_functions, tf_next) {
+ if (strcmp(f->tf_fb->tfb_tcp_block_name, fs->function_set_name) == 0) {
+ blk = f->tf_fb;
+ break;
+ }
+ }
+ return(blk);
+}
+
+static struct tcp_function_block *
+find_tcp_fb_locked(struct tcp_function_block *blk, struct tcp_function **s)
+{
+ struct tcp_function_block *rblk=NULL;
+ struct tcp_function *f;
+
+ TAILQ_FOREACH(f, &t_functions, tf_next) {
+ if (f->tf_fb == blk) {
+ rblk = blk;
+ if (s) {
+ *s = f;
+ }
+ break;
+ }
+ }
+ return (rblk);
+}
+
+struct tcp_function_block *
+find_and_ref_tcp_functions(struct tcp_function_set *fs)
+{
+ struct tcp_function_block *blk;
+
+ rw_rlock(&tcp_function_lock);
+ blk = find_tcp_functions_locked(fs);
+ if (blk)
+ refcount_acquire(&blk->tfb_refcnt);
+ rw_runlock(&tcp_function_lock);
+ return(blk);
+}
+
+struct tcp_function_block *
+find_and_ref_tcp_fb(struct tcp_function_block *blk)
+{
+ struct tcp_function_block *rblk;
+
+ rw_rlock(&tcp_function_lock);
+ rblk = find_tcp_fb_locked(blk, NULL);
+ if (rblk)
+ refcount_acquire(&rblk->tfb_refcnt);
+ rw_runlock(&tcp_function_lock);
+ return(rblk);
+}
+
+
+static int
+sysctl_net_inet_default_tcp_functions(SYSCTL_HANDLER_ARGS)
+{
+ int error=ENOENT;
+ struct tcp_function_set fs;
+ struct tcp_function_block *blk;
+
+ memset(&fs, 0, sizeof(fs));
+ rw_rlock(&tcp_function_lock);
+ blk = find_tcp_fb_locked(tcp_func_set_ptr, NULL);
+ if (blk) {
+ /* Found him */
+ strcpy(fs.function_set_name, blk->tfb_tcp_block_name);
+ fs.pcbcnt = blk->tfb_refcnt;
+ }
+ rw_runlock(&tcp_function_lock);
+ error = sysctl_handle_string(oidp, fs.function_set_name,
+ sizeof(fs.function_set_name), req);
+
+ /* Check for error or no change */
+ if (error != 0 || req->newptr == NULL)
+ return(error);
+
+ rw_wlock(&tcp_function_lock);
+ blk = find_tcp_functions_locked(&fs);
+ if ((blk == NULL) ||
+ (blk->tfb_flags & TCP_FUNC_BEING_REMOVED)) {
+ error = ENOENT;
+ goto done;
+ }
+ tcp_func_set_ptr = blk;
+done:
+ rw_wunlock(&tcp_function_lock);
+ return (error);
+}
+
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_default,
+ CTLTYPE_STRING | CTLFLAG_RW,
+ NULL, 0, sysctl_net_inet_default_tcp_functions, "A",
+ "Set/get the default TCP functions");
+
+static int
+sysctl_net_inet_list_available(SYSCTL_HANDLER_ARGS)
+{
+ int error, cnt, linesz;
+ struct tcp_function *f;
+ char *buffer, *cp;
+ size_t bufsz, outsz;
+
+ cnt = 0;
+ rw_rlock(&tcp_function_lock);
+ TAILQ_FOREACH(f, &t_functions, tf_next) {
+ cnt++;
+ }
+ rw_runlock(&tcp_function_lock);
+
+ bufsz = (cnt+2) * (TCP_FUNCTION_NAME_LEN_MAX + 12) + 1;
+ buffer = malloc(bufsz, M_TEMP, M_WAITOK);
+
+ error = 0;
+ cp = buffer;
+
+ linesz = snprintf(cp, bufsz, "\n%-32s%c %s\n", "Stack", 'D', "PCB count");
+ cp += linesz;
+ bufsz -= linesz;
+ outsz = linesz;
+
+ rw_rlock(&tcp_function_lock);
+ TAILQ_FOREACH(f, &t_functions, tf_next) {
+ linesz = snprintf(cp, bufsz, "%-32s%c %u\n",
+ f->tf_fb->tfb_tcp_block_name,
+ (f->tf_fb == tcp_func_set_ptr) ? '*' : ' ',
+ f->tf_fb->tfb_refcnt);
+ if (linesz >= bufsz) {
+ error = EOVERFLOW;
+ break;
+ }
+ cp += linesz;
+ bufsz -= linesz;
+ outsz += linesz;
+ }
+ rw_runlock(&tcp_function_lock);
+ if (error == 0)
+ error = sysctl_handle_string(oidp, buffer, outsz + 1, req);
+ free(buffer, M_TEMP);
+ return (error);
+}
+
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_available,
+ CTLTYPE_STRING|CTLFLAG_RD,
+ NULL, 0, sysctl_net_inet_list_available, "A",
+ "list available TCP Function sets");
+
/*
* Target size of TCP PCB hash tables. Must be a power of two.
*
@@ -263,6 +439,8 @@ static VNET_DEFINE(uma_zone_t, tcpcb_zone);
#define V_tcpcb_zone VNET(tcpcb_zone)
MALLOC_DEFINE(M_TCPLOG, "tcplog", "TCP address and flags print buffers");
+MALLOC_DEFINE(M_TCPFUNCTIONS, "tcpfunc", "TCP function set memory");
+
static struct mtx isn_mtx;
#define ISN_LOCK_INIT() mtx_init(&isn_mtx, "isn_mtx", NULL, MTX_DEF)
@@ -311,6 +489,96 @@ maketcp_hashsize(int size)
return (hashsize);
}
+int
+register_tcp_functions(struct tcp_function_block *blk, int wait)
+{
+ struct tcp_function_block *lblk;
+ struct tcp_function *n;
+ struct tcp_function_set fs;
+
+ if ((blk->tfb_tcp_output == NULL) ||
+ (blk->tfb_tcp_do_segment == NULL) ||
+ (blk->tfb_tcp_ctloutput == NULL) ||
+ (strlen(blk->tfb_tcp_block_name) == 0)) {
+ /*
+ * These functions are required and you
+ * need a name.
+ */
+ return (EINVAL);
+ }
+ if (blk->tfb_tcp_timer_stop_all ||
+ blk->tfb_tcp_timers_left ||
+ blk->tfb_tcp_timer_activate ||
+ blk->tfb_tcp_timer_active ||
+ blk->tfb_tcp_timer_stop) {
+ /*
+ * If you define one timer function you
+ * must have them all.
+ */
+ if ((blk->tfb_tcp_timer_stop_all == NULL) ||
+ (blk->tfb_tcp_timers_left == NULL) ||
+ (blk->tfb_tcp_timer_activate == NULL) ||
+ (blk->tfb_tcp_timer_active == NULL) ||
+ (blk->tfb_tcp_timer_stop == NULL)) {
+ return (EINVAL);
+ }
+ }
+ n = malloc(sizeof(struct tcp_function), M_TCPFUNCTIONS, wait);
+ if (n == NULL) {
+ return (ENOMEM);
+ }
+ n->tf_fb = blk;
+ strcpy(fs.function_set_name, blk->tfb_tcp_block_name);
+ rw_wlock(&tcp_function_lock);
+ lblk = find_tcp_functions_locked(&fs);
+ if (lblk) {
+ /* Duplicate name space not allowed */
+ rw_wunlock(&tcp_function_lock);
+ free(n, M_TCPFUNCTIONS);
+ return (EALREADY);
+ }
+ refcount_init(&blk->tfb_refcnt, 0);
+ blk->tfb_flags = 0;
+ TAILQ_INSERT_TAIL(&t_functions, n, tf_next);
+ rw_wunlock(&tcp_function_lock);
+ return(0);
+}
+
+int
+deregister_tcp_functions(struct tcp_function_block *blk)
+{
+ struct tcp_function_block *lblk;
+ struct tcp_function *f;
+ int error=ENOENT;
+
+ if (strcmp(blk->tfb_tcp_block_name, "default") == 0) {
+ /* You can't un-register the default */
+ return (EPERM);
+ }
+ rw_wlock(&tcp_function_lock);
+ if (blk == tcp_func_set_ptr) {
+ /* You can't free the current default */
+ rw_wunlock(&tcp_function_lock);
+ return (EBUSY);
+ }
+ if (blk->tfb_refcnt) {
+ /* Still tcb attached, mark it. */
+ blk->tfb_flags |= TCP_FUNC_BEING_REMOVED;
+ rw_wunlock(&tcp_function_lock);
+ return (EBUSY);
+ }
+ lblk = find_tcp_fb_locked(blk, &f);
+ if (lblk) {
+ /* Found */
+ TAILQ_REMOVE(&t_functions, f, tf_next);
+ f->tf_fb = NULL;
+ free(f, M_TCPFUNCTIONS);
+ error = 0;
+ }
+ rw_wunlock(&tcp_function_lock);
+ return (error);
+}
+
void
tcp_init(void)
{
@@ -325,7 +593,10 @@ tcp_init(void)
if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_OUT,
&V_tcp_hhh[HHOOK_TCP_EST_OUT], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0)
printf("%s: WARNING: unable to register helper hook\n", __func__);
-
+ /* Setup the tcp function block list */
+ TAILQ_INIT(&t_functions);
+ rw_init_flags(&tcp_function_lock, "tcp_func_lock" , 0);
+ register_tcp_functions(&tcp_def_funcblk, M_WAITOK);
hashsize = TCBHASHSIZE;
TUNABLE_INT_FETCH(tcbhash_tuneable, &hashsize);
if (hashsize == 0) {
@@ -768,7 +1039,13 @@ tcp_newtcpcb(struct inpcb *inp)
tp->ccv = &tm->ccv;
tp->ccv->type = IPPROTO_TCP;
tp->ccv->ccvc.tcp = tp;
-
+ rw_rlock(&tcp_function_lock);
+ tp->t_fb = tcp_func_set_ptr;
+ refcount_acquire(&tp->t_fb->tfb_refcnt);
+ rw_runlock(&tcp_function_lock);
+ if (tp->t_fb->tfb_tcp_fb_init) {
+ (*tp->t_fb->tfb_tcp_fb_init)(tp);
+ }
/*
* Use the current system default CC algorithm.
*/
@@ -779,12 +1056,18 @@ tcp_newtcpcb(struct inpcb *inp)
if (CC_ALGO(tp)->cb_init != NULL)
if (CC_ALGO(tp)->cb_init(tp->ccv) > 0) {
+ if (tp->t_fb->tfb_tcp_fb_fini)
+ (*tp->t_fb->tfb_tcp_fb_fini)(tp);
+ refcount_release(&tp->t_fb->tfb_refcnt);
uma_zfree(V_tcpcb_zone, tm);
return (NULL);
}
tp->osd = &tm->osd;
if (khelp_init_osd(HELPER_CLASS_TCP, tp->osd)) {
+ if (tp->t_fb->tfb_tcp_fb_fini)
+ (*tp->t_fb->tfb_tcp_fb_fini)(tp);
+ refcount_release(&tp->t_fb->tfb_refcnt);
uma_zfree(V_tcpcb_zone, tm);
return (NULL);
}
@@ -925,7 +1208,7 @@ tcp_drop(struct tcpcb *tp, int errno)
if (TCPS_HAVERCVDSYN(tp->t_state)) {
tcp_state_change(tp, TCPS_CLOSED);
- (void) tcp_output(tp);
+ (void) tp->t_fb->tfb_tcp_output(tp);
TCPSTAT_INC(tcps_drops);
} else
TCPSTAT_INC(tcps_conndrops);
@@ -960,6 +1243,10 @@ tcp_discardcb(struct tcpcb *tp)
tcp_timer_stop(tp, TT_KEEP);
tcp_timer_stop(tp, TT_2MSL);
tcp_timer_stop(tp, TT_DELACK);
+ if (tp->t_fb->tfb_tcp_timer_stop_all) {
+ /* Call the stop-all function of the methods */
+ tp->t_fb->tfb_tcp_timer_stop_all(tp);
+ }
/*
* If we got enough samples through the srtt filter,
@@ -1044,6 +1331,14 @@ tcp_discardcb(struct tcpcb *tp)
inp->inp_ppcb = NULL;
if ((tp->t_timers->tt_flags & TT_MASK) == 0) {
/* We own the last reference on tcpcb, let's free it. */
+ if ((tp->t_fb->tfb_tcp_timers_left) &&
+ (tp->t_fb->tfb_tcp_timers_left(tp))) {
+ /* Some fb timers left running! */
+ return;
+ }
+ if (tp->t_fb->tfb_tcp_fb_fini)
+ (*tp->t_fb->tfb_tcp_fb_fini)(tp);
+ refcount_release(&tp->t_fb->tfb_refcnt);
tp->t_inpcb = NULL;
uma_zfree(V_tcpcb_zone, tp);
released = in_pcbrele_wlocked(inp);
@@ -1105,6 +1400,14 @@ tcp_timer_discard(struct tcpcb *tp, uint32_t timer_type)
tp->t_timers->tt_flags &= ~timer_type;
if ((tp->t_timers->tt_flags & TT_MASK) == 0) {
/* We own the last reference on this tcpcb, let's free it. */
+ if ((tp->t_fb->tfb_tcp_timers_left) &&
+ (tp->t_fb->tfb_tcp_timers_left(tp))) {
+ /* Some fb timers left running! */
+ goto leave;
+ }
+ if (tp->t_fb->tfb_tcp_fb_fini)
+ (*tp->t_fb->tfb_tcp_fb_fini)(tp);
+ refcount_release(&tp->t_fb->tfb_refcnt);
tp->t_inpcb = NULL;
uma_zfree(V_tcpcb_zone, tp);
if (in_pcbrele_wlocked(inp)) {
@@ -1113,6 +1416,7 @@ tcp_timer_discard(struct tcpcb *tp, uint32_t timer_type)
return;
}
}
+leave:
INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
@@ -1865,7 +2169,7 @@ tcp_mtudisc(struct inpcb *inp, int mtuoffer)
tp->snd_recover = tp->snd_max;
if (tp->t_flags & TF_SACK_PERMIT)
EXIT_FASTRECOVERY(tp->t_flags);
- tcp_output(tp);
+ tp->t_fb->tfb_tcp_output(tp);
}
#ifdef INET
OpenPOWER on IntegriCloud