summaryrefslogtreecommitdiffstats
path: root/sys/netinet/toecore.c
diff options
context:
space:
mode:
authornp <np@FreeBSD.org>2012-06-19 07:34:13 +0000
committernp <np@FreeBSD.org>2012-06-19 07:34:13 +0000
commit67d5f1a727273d8e141e96c429114dff9fb06ec3 (patch)
tree9255a545bbd49a0458ed8850371b4fe6ed2cd01f /sys/netinet/toecore.c
parent27063437e23a5e5e7debf9144ee974d21b6a6774 (diff)
downloadFreeBSD-src-67d5f1a727273d8e141e96c429114dff9fb06ec3.zip
FreeBSD-src-67d5f1a727273d8e141e96c429114dff9fb06ec3.tar.gz
- Updated TOE support in the kernel.
- Stateful TCP offload drivers for Terminator 3 and 4 (T3 and T4) ASICs. These are available as t3_tom and t4_tom modules that augment cxgb(4) and cxgbe(4) respectively. The cxgb/cxgbe drivers continue to work as usual with or without these extra features. - iWARP driver for Terminator 3 ASIC (kernel verbs). T4 iWARP in the works and will follow soon. Build-tested with make universe. 30s overview ============ What interfaces support TCP offload? Look for TOE4 and/or TOE6 in the capabilities of an interface: # ifconfig -m | grep TOE Enable/disable TCP offload on an interface (just like any other ifnet capability): # ifconfig cxgbe0 toe # ifconfig cxgbe0 -toe Which connections are offloaded? Look for toe4 and/or toe6 in the output of netstat and sockstat: # netstat -np tcp | grep toe # sockstat -46c | grep toe Reviewed by: bz, gnn Sponsored by: Chelsio communications. MFC after: ~3 months (after 9.1, and after ensuring MFC is feasible)
Diffstat (limited to 'sys/netinet/toecore.c')
-rw-r--r--sys/netinet/toecore.c575
1 files changed, 575 insertions, 0 deletions
diff --git a/sys/netinet/toecore.c b/sys/netinet/toecore.c
new file mode 100644
index 0000000..4b4efb7
--- /dev/null
+++ b/sys/netinet/toecore.c
@@ -0,0 +1,575 @@
+/*-
+ * Copyright (c) 2012 Chelsio Communications, Inc.
+ * All rights reserved.
+ * Written by: Navdeep Parhar <np@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/types.h>
+#include <sys/sockopt.h>
+#include <sys/sysctl.h>
+#include <sys/socket.h>
+
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/if_vlan_var.h>
+#include <net/if_llatbl.h>
+#include <net/route.h>
+
+#include <netinet/if_ether.h>
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet6/nd6.h>
+#define TCPSTATES
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp_syncache.h>
+#include <netinet/tcp_offload.h>
+#include <netinet/toecore.h>
+
+static struct mtx toedev_lock;
+static TAILQ_HEAD(, toedev) toedev_list;
+static eventhandler_tag listen_start_eh;
+static eventhandler_tag listen_stop_eh;
+static eventhandler_tag lle_event_eh;
+static eventhandler_tag route_redirect_eh;
+
+static int
+toedev_connect(struct toedev *tod __unused, struct socket *so __unused,
+ struct rtentry *rt __unused, struct sockaddr *nam __unused)
+{
+
+ return (ENOTSUP);
+}
+
+static int
+toedev_listen_start(struct toedev *tod __unused, struct tcpcb *tp __unused)
+{
+
+ return (ENOTSUP);
+}
+
+static int
+toedev_listen_stop(struct toedev *tod __unused, struct tcpcb *tp __unused)
+{
+
+ return (ENOTSUP);
+}
+
+static void
+toedev_input(struct toedev *tod __unused, struct tcpcb *tp __unused,
+ struct mbuf *m)
+{
+
+ m_freem(m);
+ return;
+}
+
+static void
+toedev_rcvd(struct toedev *tod __unused, struct tcpcb *tp __unused)
+{
+
+ return;
+}
+
+static int
+toedev_output(struct toedev *tod __unused, struct tcpcb *tp __unused)
+{
+
+ return (ENOTSUP);
+}
+
+static void
+toedev_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp __unused)
+{
+
+ return;
+}
+
+static void
+toedev_l2_update(struct toedev *tod __unused, struct ifnet *ifp __unused,
+ struct sockaddr *sa __unused, uint8_t *lladdr __unused,
+ uint16_t vtag __unused)
+{
+
+ return;
+}
+
+static void
+toedev_route_redirect(struct toedev *tod __unused, struct ifnet *ifp __unused,
+ struct rtentry *rt0 __unused, struct rtentry *rt1 __unused)
+{
+
+ return;
+}
+
+static void
+toedev_syncache_added(struct toedev *tod __unused, void *ctx __unused)
+{
+
+ return;
+}
+
+static void
+toedev_syncache_removed(struct toedev *tod __unused, void *ctx __unused)
+{
+
+ return;
+}
+
+static int
+toedev_syncache_respond(struct toedev *tod __unused, void *ctx __unused,
+ struct mbuf *m)
+{
+
+ m_freem(m);
+ return (0);
+}
+
+static void
+toedev_offload_socket(struct toedev *tod __unused, void *ctx __unused,
+ struct socket *so __unused)
+{
+
+ return;
+}
+
+static void
+toedev_ctloutput(struct toedev *tod __unused, struct tcpcb *tp __unused,
+ int sopt_dir __unused, int sopt_name __unused)
+{
+
+ return;
+}
+
+/*
+ * Inform one or more TOE devices about a listening socket.
+ */
+static void
+toe_listen_start(struct inpcb *inp, void *arg)
+{
+ struct toedev *t, *tod;
+ struct tcpcb *tp;
+
+ INP_WLOCK_ASSERT(inp);
+ KASSERT(inp->inp_pcbinfo == &V_tcbinfo,
+ ("%s: inp is not a TCP inp", __func__));
+
+ if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT))
+ return;
+
+ tp = intotcpcb(inp);
+ if (tp->t_state != TCPS_LISTEN)
+ return;
+
+ t = arg;
+ mtx_lock(&toedev_lock);
+ TAILQ_FOREACH(tod, &toedev_list, link) {
+ if (t == NULL || t == tod)
+ tod->tod_listen_start(tod, tp);
+ }
+ mtx_unlock(&toedev_lock);
+}
+
+static void
+toe_listen_start_event(void *arg __unused, struct tcpcb *tp)
+{
+ struct inpcb *inp = tp->t_inpcb;
+
+ INP_WLOCK_ASSERT(inp);
+ KASSERT(tp->t_state == TCPS_LISTEN,
+ ("%s: t_state %s", __func__, tcpstates[tp->t_state]));
+
+ toe_listen_start(inp, NULL);
+}
+
+static void
+toe_listen_stop_event(void *arg __unused, struct tcpcb *tp)
+{
+ struct toedev *tod;
+#ifdef INVARIANTS
+ struct inpcb *inp = tp->t_inpcb;
+#endif
+
+ INP_WLOCK_ASSERT(inp);
+ KASSERT(tp->t_state == TCPS_LISTEN,
+ ("%s: t_state %s", __func__, tcpstates[tp->t_state]));
+
+ mtx_lock(&toedev_lock);
+ TAILQ_FOREACH(tod, &toedev_list, link)
+ tod->tod_listen_stop(tod, tp);
+ mtx_unlock(&toedev_lock);
+}
+
+/*
+ * Fill up a freshly allocated toedev struct with reasonable defaults.
+ */
+void
+init_toedev(struct toedev *tod)
+{
+
+ tod->tod_softc = NULL;
+
+ /*
+ * Provide no-op defaults so that the kernel can call any toedev
+ * function without having to check whether the TOE driver supplied one
+ * or not.
+ */
+ tod->tod_connect = toedev_connect;
+ tod->tod_listen_start = toedev_listen_start;
+ tod->tod_listen_stop = toedev_listen_stop;
+ tod->tod_input = toedev_input;
+ tod->tod_rcvd = toedev_rcvd;
+ tod->tod_output = toedev_output;
+ tod->tod_send_rst = toedev_output;
+ tod->tod_send_fin = toedev_output;
+ tod->tod_pcb_detach = toedev_pcb_detach;
+ tod->tod_l2_update = toedev_l2_update;
+ tod->tod_route_redirect = toedev_route_redirect;
+ tod->tod_syncache_added = toedev_syncache_added;
+ tod->tod_syncache_removed = toedev_syncache_removed;
+ tod->tod_syncache_respond = toedev_syncache_respond;
+ tod->tod_offload_socket = toedev_offload_socket;
+ tod->tod_ctloutput = toedev_ctloutput;
+}
+
+/*
+ * Register an active TOE device with the system. This allows it to receive
+ * notifications from the kernel.
+ */
+int
+register_toedev(struct toedev *tod)
+{
+ struct toedev *t;
+
+ mtx_lock(&toedev_lock);
+ TAILQ_FOREACH(t, &toedev_list, link) {
+ if (t == tod) {
+ mtx_unlock(&toedev_lock);
+ return (EEXIST);
+ }
+ }
+
+ TAILQ_INSERT_TAIL(&toedev_list, tod, link);
+ registered_toedevs++;
+ mtx_unlock(&toedev_lock);
+
+ inp_apply_all(toe_listen_start, tod);
+
+ return (0);
+}
+
+/*
+ * Remove the TOE device from the global list of active TOE devices. It is the
+ * caller's responsibility to ensure that the TOE device is quiesced prior to
+ * this call.
+ */
+int
+unregister_toedev(struct toedev *tod)
+{
+ struct toedev *t, *t2;
+ int rc = ENODEV;
+
+ mtx_lock(&toedev_lock);
+ TAILQ_FOREACH_SAFE(t, &toedev_list, link, t2) {
+ if (t == tod) {
+ TAILQ_REMOVE(&toedev_list, tod, link);
+ registered_toedevs--;
+ rc = 0;
+ break;
+ }
+ }
+ KASSERT(registered_toedevs >= 0,
+ ("%s: registered_toedevs (%d) < 0", __func__, registered_toedevs));
+ mtx_unlock(&toedev_lock);
+ return (rc);
+}
+
+void
+toe_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
+ struct inpcb *inp, void *tod, void *todctx)
+{
+ struct socket *lso = inp->inp_socket;
+
+ INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_WLOCK_ASSERT(inp);
+
+ syncache_add(inc, to, th, inp, &lso, NULL, tod, todctx);
+}
+
+int
+toe_syncache_expand(struct in_conninfo *inc, struct tcpopt *to,
+ struct tcphdr *th, struct socket **lsop)
+{
+
+ INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+
+ return (syncache_expand(inc, to, th, lsop, NULL));
+}
+
+/*
+ * General purpose check to see if a 4-tuple is in use by the kernel. If a TCP
+ * header (presumably for an incoming SYN) is also provided, an existing 4-tuple
+ * in TIME_WAIT may be assassinated freeing it up for re-use.
+ *
+ * Note that the TCP header must have been run through tcp_fields_to_host() or
+ * equivalent.
+ */
+int
+toe_4tuple_check(struct in_conninfo *inc, struct tcphdr *th, struct ifnet *ifp)
+{
+ struct inpcb *inp;
+
+ if (inc->inc_flags & INC_ISIPV6)
+ return (ENOSYS); /* XXX: implement */
+
+ inp = in_pcblookup(&V_tcbinfo, inc->inc_faddr, inc->inc_fport,
+ inc->inc_laddr, inc->inc_lport, INPLOOKUP_WLOCKPCB, ifp);
+ if (inp != NULL) {
+ INP_WLOCK_ASSERT(inp);
+
+ if ((inp->inp_flags & INP_TIMEWAIT) && th != NULL) {
+
+ INP_INFO_WLOCK_ASSERT(&V_tcbinfo); /* for twcheck */
+ if (!tcp_twcheck(inp, NULL, th, NULL, 0))
+ return (EADDRINUSE);
+ } else {
+ INP_WUNLOCK(inp);
+ return (EADDRINUSE);
+ }
+ }
+
+ return (0);
+}
+
+static void
+toe_lle_event(void *arg __unused, struct llentry *lle, int evt)
+{
+ struct toedev *tod;
+ struct ifnet *ifp;
+ struct sockaddr *sa;
+ uint8_t *lladdr;
+ uint16_t vtag;
+
+ LLE_WLOCK_ASSERT(lle);
+
+ ifp = lle->lle_tbl->llt_ifp;
+ sa = L3_ADDR(lle);
+
+ KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6,
+ ("%s: lle_event %d for lle %p but sa %p !INET && !INET6",
+ __func__, evt, lle, sa));
+
+ /*
+ * Not interested if the interface's TOE capability is not enabled.
+ */
+ if ((sa->sa_family == AF_INET && !(ifp->if_capenable & IFCAP_TOE4)) ||
+ (sa->sa_family == AF_INET6 && !(ifp->if_capenable & IFCAP_TOE6)))
+ return;
+
+ tod = TOEDEV(ifp);
+ if (tod == NULL)
+ return;
+
+ vtag = 0xfff;
+ if (evt != LLENTRY_RESOLVED) {
+
+ /*
+ * LLENTRY_TIMEDOUT, LLENTRY_DELETED, LLENTRY_EXPIRED all mean
+ * this entry is going to be deleted.
+ */
+
+ lladdr = NULL;
+ } else {
+
+ KASSERT(lle->la_flags & LLE_VALID,
+ ("%s: %p resolved but not valid?", __func__, lle));
+
+ lladdr = (uint8_t *)&lle->ll_addr;
+#ifdef VLAN_TAG
+ VLAN_TAG(ifp, &vtag);
+#endif
+ }
+
+ tod->tod_l2_update(tod, ifp, sa, lladdr, vtag);
+}
+
+/*
+ * XXX: implement.
+ */
+static void
+toe_route_redirect_event(void *arg __unused, struct rtentry *rt0,
+ struct rtentry *rt1, struct sockaddr *sa)
+{
+
+ return;
+}
+
+/*
+ * Returns 0 or EWOULDBLOCK on success (any other value is an error). 0 means
+ * lladdr and vtag are valid on return, EWOULDBLOCK means the TOE driver's
+ * tod_l2_update will be called later, when the entry is resolved or times out.
+ */
+int
+toe_l2_resolve(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa,
+ uint8_t *lladdr, uint16_t *vtag)
+{
+ struct llentry *lle;
+ int rc;
+
+ switch (sa->sa_family) {
+#ifdef INET
+ case AF_INET:
+ rc = arpresolve(ifp, NULL, NULL, sa, lladdr, &lle);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ rc = nd6_storelladdr(ifp, NULL, sa, lladdr, &lle);
+ break;
+#endif
+ default:
+ return (EPROTONOSUPPORT);
+ }
+
+ if (rc == 0) {
+#ifdef VLAN_TAG
+ if (VLAN_TAG(ifp, vtag) != 0)
+#endif
+ *vtag = 0xfff;
+ }
+
+ return (rc);
+}
+
+void
+toe_connect_failed(struct toedev *tod, struct tcpcb *tp, int err)
+{
+ struct inpcb *inp = tp->t_inpcb;
+
+ INP_WLOCK_ASSERT(inp);
+ KASSERT(tp->t_flags & TF_TOE,
+ ("%s: tp %p not offloaded.", __func__, tp));
+
+ if (!(inp->inp_flags & INP_DROPPED)) {
+ if (err == EAGAIN) {
+
+ /*
+ * Temporary failure during offload, take this PCB back.
+ * Detach from the TOE driver and do the rest of what
+ * TCP's pru_connect would have done if the connection
+ * wasn't offloaded.
+ */
+
+ tod->tod_pcb_detach(tod, tp);
+ KASSERT(!(tp->t_flags & TF_TOE),
+ ("%s: tp %p still offloaded.", __func__, tp));
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
+ (void) tcp_output(tp);
+ } else {
+
+ INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ tp = tcp_drop(tp, err);
+ if (tp == NULL)
+ INP_WLOCK(inp); /* re-acquire */
+ }
+ }
+ INP_WLOCK_ASSERT(inp);
+}
+
+static int
+toecore_load(void)
+{
+
+ mtx_init(&toedev_lock, "toedev lock", NULL, MTX_DEF);
+ TAILQ_INIT(&toedev_list);
+
+ listen_start_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_start,
+ toe_listen_start_event, NULL, EVENTHANDLER_PRI_ANY);
+ listen_stop_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_stop,
+ toe_listen_stop_event, NULL, EVENTHANDLER_PRI_ANY);
+ lle_event_eh = EVENTHANDLER_REGISTER(lle_event, toe_lle_event, NULL,
+ EVENTHANDLER_PRI_ANY);
+ route_redirect_eh = EVENTHANDLER_REGISTER(route_redirect_event,
+ toe_route_redirect_event, NULL, EVENTHANDLER_PRI_ANY);
+
+ return (0);
+}
+
+static int
+toecore_unload(void)
+{
+
+ mtx_lock(&toedev_lock);
+ if (!TAILQ_EMPTY(&toedev_list)) {
+ mtx_unlock(&toedev_lock);
+ return (EBUSY);
+ }
+
+ EVENTHANDLER_DEREGISTER(tcp_offload_listen_start, listen_start_eh);
+ EVENTHANDLER_DEREGISTER(tcp_offload_listen_stop, listen_stop_eh);
+ EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh);
+ EVENTHANDLER_DEREGISTER(route_redirect_event, route_redirect_eh);
+
+ mtx_unlock(&toedev_lock);
+ mtx_destroy(&toedev_lock);
+
+ return (0);
+}
+
+static int
+toecore_mod_handler(module_t mod, int cmd, void *arg)
+{
+
+ if (cmd == MOD_LOAD)
+ return (toecore_load());
+
+ if (cmd == MOD_UNLOAD)
+ return (toecore_unload());
+
+ return (EOPNOTSUPP);
+}
+
+static moduledata_t mod_data= {
+ "toecore",
+ toecore_mod_handler,
+ 0
+};
+
+MODULE_VERSION(toecore, 1);
+DECLARE_MODULE(toecore, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);
OpenPOWER on IntegriCloud