summaryrefslogtreecommitdiffstats
path: root/sys/netinet
diff options
context:
space:
mode:
authornp <np@FreeBSD.org>2012-06-19 07:34:13 +0000
committernp <np@FreeBSD.org>2012-06-19 07:34:13 +0000
commit67d5f1a727273d8e141e96c429114dff9fb06ec3 (patch)
tree9255a545bbd49a0458ed8850371b4fe6ed2cd01f /sys/netinet
parent27063437e23a5e5e7debf9144ee974d21b6a6774 (diff)
downloadFreeBSD-src-67d5f1a727273d8e141e96c429114dff9fb06ec3.zip
FreeBSD-src-67d5f1a727273d8e141e96c429114dff9fb06ec3.tar.gz
- Updated TOE support in the kernel.
- Stateful TCP offload drivers for Terminator 3 and 4 (T3 and T4) ASICs. These are available as t3_tom and t4_tom modules that augment cxgb(4) and cxgbe(4) respectively. The cxgb/cxgbe drivers continue to work as usual with or without these extra features. - iWARP driver for Terminator 3 ASIC (kernel verbs). T4 iWARP in the works and will follow soon. Build-tested with make universe. 30s overview ============ What interfaces support TCP offload? Look for TOE4 and/or TOE6 in the capabilities of an interface: # ifconfig -m | grep TOE Enable/disable TCP offload on an interface (just like any other ifnet capability): # ifconfig cxgbe0 toe # ifconfig cxgbe0 -toe Which connections are offloaded? Look for toe4 and/or toe6 in the output of netstat and sockstat: # netstat -np tcp | grep toe # sockstat -46c | grep toe Reviewed by: bz, gnn Sponsored by: Chelsio communications. MFC after: ~3 months (after 9.1, and after ensuring MFC is feasible)
Diffstat (limited to 'sys/netinet')
-rw-r--r--sys/netinet/if_ether.c13
-rw-r--r--sys/netinet/if_ether.h10
-rw-r--r--sys/netinet/in.c2
-rw-r--r--sys/netinet/tcp_input.c13
-rw-r--r--sys/netinet/tcp_offload.c209
-rw-r--r--sys/netinet/tcp_offload.h364
-rw-r--r--sys/netinet/tcp_output.c8
-rw-r--r--sys/netinet/tcp_subr.c19
-rw-r--r--sys/netinet/tcp_syncache.c135
-rw-r--r--sys/netinet/tcp_syncache.h19
-rw-r--r--sys/netinet/tcp_timer.c5
-rw-r--r--sys/netinet/tcp_usrreq.c75
-rw-r--r--sys/netinet/tcp_var.h2
-rw-r--r--sys/netinet/toecore.c575
-rw-r--r--sys/netinet/toecore.h130
-rw-r--r--sys/netinet/toedev.h162
16 files changed, 1035 insertions, 706 deletions
diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c
index bdb4efc..d6a7fd1 100644
--- a/sys/netinet/if_ether.c
+++ b/sys/netinet/if_ether.c
@@ -180,6 +180,17 @@ arptimer(void *arg)
callout_active(&lle->la_timer)) {
callout_stop(&lle->la_timer);
LLE_REMREF(lle);
+
+ if (lle->la_flags != LLE_DELETED) {
+ int evt;
+
+ if (lle->la_flags & LLE_VALID)
+ evt = LLENTRY_EXPIRED;
+ else
+ evt = LLENTRY_TIMEDOUT;
+ EVENTHANDLER_INVOKE(lle_event, lle, evt);
+ }
+
pkts_dropped = llentry_free(lle);
ARPSTAT_ADD(dropped, pkts_dropped);
ARPSTAT_INC(timeouts);
@@ -726,7 +737,7 @@ match:
(void)memcpy(&la->ll_addr, ar_sha(ah), ifp->if_addrlen);
la->la_flags |= LLE_VALID;
- EVENTHANDLER_INVOKE(arp_update_event, la);
+ EVENTHANDLER_INVOKE(lle_event, la, LLENTRY_RESOLVED);
if (!(la->la_flags & LLE_STATIC)) {
int canceled;
diff --git a/sys/netinet/if_ether.h b/sys/netinet/if_ether.h
index 8d44d35..e37a964 100644
--- a/sys/netinet/if_ether.h
+++ b/sys/netinet/if_ether.h
@@ -122,8 +122,14 @@ void arp_ifinit2(struct ifnet *, struct ifaddr *, u_char *);
void arp_ifscrub(struct ifnet *, uint32_t);
#include <sys/eventhandler.h>
-typedef void (*llevent_arp_update_fn)(void *, struct llentry *);
-EVENTHANDLER_DECLARE(arp_update_event, llevent_arp_update_fn);
+enum {
+ LLENTRY_RESOLVED,
+ LLENTRY_TIMEDOUT,
+ LLENTRY_DELETED,
+ LLENTRY_EXPIRED,
+};
+typedef void (*lle_event_fn)(void *, struct llentry *, int);
+EVENTHANDLER_DECLARE(lle_event, lle_event_fn);
#endif
diff --git a/sys/netinet/in.c b/sys/netinet/in.c
index c1cbcb1..1beddd6 100644
--- a/sys/netinet/in.c
+++ b/sys/netinet/in.c
@@ -1469,7 +1469,7 @@ in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3add
if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) {
LLE_WLOCK(lle);
lle->la_flags = LLE_DELETED;
- EVENTHANDLER_INVOKE(arp_update_event, lle);
+ EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_DELETED);
LLE_WUNLOCK(lle);
#ifdef DIAGNOSTIC
log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 4d3234f..6d8ebee 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -105,6 +105,9 @@ __FBSDID("$FreeBSD$");
#ifdef TCPDEBUG
#include <netinet/tcp_debug.h>
#endif /* TCPDEBUG */
+#ifdef TCP_OFFLOAD
+#include <netinet/tcp_offload.h>
+#endif
#ifdef IPSEC
#include <netipsec/ipsec.h>
@@ -958,6 +961,14 @@ relocked:
goto dropwithreset;
}
+#ifdef TCP_OFFLOAD
+ if (tp->t_flags & TF_TOE) {
+ tcp_offload_input(tp, m);
+ m = NULL; /* consumed by the TOE driver */
+ goto dropunlock;
+ }
+#endif
+
/*
* We've identified a valid inpcb, but it could be that we need an
* inpcbinfo write lock but don't hold it. In this case, attempt to
@@ -1320,7 +1331,7 @@ relocked:
(void *)tcp_saveipgen, &tcp_savetcp, 0);
#endif
tcp_dooptions(&to, optp, optlen, TO_SYN);
- syncache_add(&inc, &to, th, inp, &so, m);
+ syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL);
/*
* Entry added to syncache and mbuf consumed.
* Everything already unlocked by syncache_add().
diff --git a/sys/netinet/tcp_offload.c b/sys/netinet/tcp_offload.c
index 899a37c..b772418 100644
--- a/sys/netinet/tcp_offload.c
+++ b/sys/netinet/tcp_offload.c
@@ -1,145 +1,176 @@
/*-
- * Copyright (c) 2007, Chelsio Inc.
+ * Copyright (c) 2012 Chelsio Communications, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
*
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Neither the name of the Chelsio Corporation nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_inet.h"
+
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/types.h>
-#include <sys/malloc.h>
-#include <sys/kernel.h>
-#include <sys/sysctl.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
-
+#include <sys/sockopt.h>
#include <net/if.h>
-#include <net/if_types.h>
-#include <net/if_var.h>
#include <net/route.h>
-#include <net/vnet.h>
-
#include <netinet/in.h>
-#include <netinet/in_systm.h>
#include <netinet/in_pcb.h>
#include <netinet/tcp.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_offload.h>
-#include <netinet/toedev.h>
+#define TCPOUTFLAGS
+#include <netinet/tcp_fsm.h>
+#include <netinet/toecore.h>
-uint32_t toedev_registration_count;
+int registered_toedevs;
+/*
+ * Provide an opportunity for a TOE driver to offload.
+ */
int
tcp_offload_connect(struct socket *so, struct sockaddr *nam)
{
struct ifnet *ifp;
- struct toedev *tdev;
+ struct toedev *tod;
struct rtentry *rt;
- int error;
-
- if (toedev_registration_count == 0)
- return (EINVAL);
-
- /*
- * Look up the route used for the connection to
- * determine if it uses an interface capable of
- * offloading the connection.
- */
- rt = rtalloc1(nam, 0 /*report*/, 0 /*ignflags*/);
- if (rt)
+ int error = EOPNOTSUPP;
+
+ INP_WLOCK_ASSERT(sotoinpcb(so));
+ KASSERT(nam->sa_family == AF_INET || nam->sa_family == AF_INET6,
+ ("%s: called with sa_family %d", __func__, nam->sa_family));
+
+ if (registered_toedevs == 0)
+ return (error);
+
+ rt = rtalloc1(nam, 0, 0);
+ if (rt)
RT_UNLOCK(rt);
- else
+ else
return (EHOSTUNREACH);
ifp = rt->rt_ifp;
- if ((ifp->if_capenable & IFCAP_TOE) == 0) {
- error = EINVAL;
- goto fail;
- }
-
- tdev = TOEDEV(ifp);
- if (tdev == NULL) {
- error = EPERM;
- goto fail;
- }
-
- if (tdev->tod_can_offload(tdev, so) == 0) {
- error = EPERM;
- goto fail;
- }
-
- return (tdev->tod_connect(tdev, so, rt, nam));
-fail:
+
+ if (nam->sa_family == AF_INET && !(ifp->if_capenable & IFCAP_TOE4))
+ goto done;
+ if (nam->sa_family == AF_INET6 && !(ifp->if_capenable & IFCAP_TOE6))
+ goto done;
+
+ tod = TOEDEV(ifp);
+ if (tod != NULL)
+ error = tod->tod_connect(tod, so, rt, nam);
+done:
RTFREE(rt);
return (error);
}
+void
+tcp_offload_listen_start(struct tcpcb *tp)
+{
-/*
- * This file contains code as a short-term staging area before it is moved in
- * to sys/netinet/tcp_offload.c
- */
+ INP_WLOCK_ASSERT(tp->t_inpcb);
+
+ EVENTHANDLER_INVOKE(tcp_offload_listen_start, tp);
+}
void
-tcp_offload_twstart(struct tcpcb *tp)
+tcp_offload_listen_stop(struct tcpcb *tp)
{
- INP_INFO_WLOCK(&V_tcbinfo);
- INP_WLOCK(tp->t_inpcb);
- tcp_twstart(tp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
+ INP_WLOCK_ASSERT(tp->t_inpcb);
+
+ EVENTHANDLER_INVOKE(tcp_offload_listen_stop, tp);
}
-struct tcpcb *
-tcp_offload_close(struct tcpcb *tp)
+void
+tcp_offload_input(struct tcpcb *tp, struct mbuf *m)
{
+ struct toedev *tod = tp->tod;
- INP_INFO_WLOCK(&V_tcbinfo);
- INP_WLOCK(tp->t_inpcb);
- tp = tcp_close(tp);
- INP_INFO_WUNLOCK(&V_tcbinfo);
- if (tp)
- INP_WUNLOCK(tp->t_inpcb);
+ KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
+ INP_WLOCK_ASSERT(tp->t_inpcb);
- return (tp);
+ tod->tod_input(tod, tp, m);
}
-struct tcpcb *
-tcp_offload_drop(struct tcpcb *tp, int error)
+int
+tcp_offload_output(struct tcpcb *tp)
{
+ struct toedev *tod = tp->tod;
+ int error, flags;
- INP_INFO_WLOCK(&V_tcbinfo);
- INP_WLOCK(tp->t_inpcb);
- tp = tcp_drop(tp, error);
- INP_INFO_WUNLOCK(&V_tcbinfo);
- if (tp)
- INP_WUNLOCK(tp->t_inpcb);
+ KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
+ INP_WLOCK_ASSERT(tp->t_inpcb);
- return (tp);
+ flags = tcp_outflags[tp->t_state];
+
+ if (flags & TH_RST) {
+ /* XXX: avoid repeated calls like we do for FIN */
+ error = tod->tod_send_rst(tod, tp);
+ } else if ((flags & TH_FIN || tp->t_flags & TF_NEEDFIN) &&
+ (tp->t_flags & TF_SENTFIN) == 0) {
+ error = tod->tod_send_fin(tod, tp);
+ if (error == 0)
+ tp->t_flags |= TF_SENTFIN;
+ } else
+ error = tod->tod_output(tod, tp);
+
+ return (error);
+}
+
+void
+tcp_offload_rcvd(struct tcpcb *tp)
+{
+ struct toedev *tod = tp->tod;
+
+ KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
+ INP_WLOCK_ASSERT(tp->t_inpcb);
+
+ tod->tod_rcvd(tod, tp);
+}
+
+void
+tcp_offload_ctloutput(struct tcpcb *tp, int sopt_dir, int sopt_name)
+{
+ struct toedev *tod = tp->tod;
+
+ KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
+ INP_WLOCK_ASSERT(tp->t_inpcb);
+
+ tod->tod_ctloutput(tod, tp, sopt_dir, sopt_name);
}
+void
+tcp_offload_detach(struct tcpcb *tp)
+{
+ struct toedev *tod = tp->tod;
+
+ KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp));
+ INP_WLOCK_ASSERT(tp->t_inpcb);
+
+ tod->tod_pcb_detach(tod, tp);
+}
diff --git a/sys/netinet/tcp_offload.h b/sys/netinet/tcp_offload.h
index 313185f..a052366 100644
--- a/sys/netinet/tcp_offload.h
+++ b/sys/netinet/tcp_offload.h
@@ -1,30 +1,30 @@
/*-
- * Copyright (c) 2007, Chelsio Inc.
+ * Copyright (c) 2012 Chelsio Communications, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Neither the name of the Chelsio Corporation nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
*
* $FreeBSD$
+ *
*/
#ifndef _NETINET_TCP_OFFLOAD_H_
@@ -34,321 +34,15 @@
#error "no user-serviceable parts inside"
#endif
-/*
- * A driver publishes that it provides offload services
- * by setting IFCAP_TOE in the ifnet. The offload connect
- * will bypass any further work if the interface that a
- * connection would use does not support TCP offload.
- *
- * The TOE API assumes that the tcp offload engine can offload the
- * the entire connection from set up to teardown, with some provision
- * being made to allowing the software stack to handle time wait. If
- * the device does not meet these criteria, it is the driver's responsibility
- * to overload the functions that it needs to in tcp_usrreqs and make
- * its own calls to tcp_output if it needs to do so.
- *
- * There is currently no provision for the device advertising the congestion
- * control algorithms it supports as there is currently no API for querying
- * an operating system for the protocols that it has loaded. This is a desirable
- * future extension.
- *
- *
- *
- * It is assumed that individuals deploying TOE will want connections
- * to be offloaded without software changes so all connections on an
- * interface providing TOE are offloaded unless the SO_NO_OFFLOAD
- * flag is set on the socket.
- *
- *
- * The toe_usrreqs structure constitutes the TOE driver's
- * interface to the TCP stack for functionality that doesn't
- * interact directly with userspace. If one wants to provide
- * (optional) functionality to do zero-copy to/from
- * userspace one still needs to override soreceive/sosend
- * with functions that fault in and pin the user buffers.
- *
- * + tu_send
- * - tells the driver that new data may have been added to the
- * socket's send buffer - the driver should not fail if the
- * buffer is in fact unchanged
- * - the driver is responsible for providing credits (bytes in the send window)
- * back to the socket by calling sbdrop() as segments are acknowledged.
- * - The driver expects the inpcb lock to be held - the driver is expected
- * not to drop the lock. Hence the driver is not allowed to acquire the
- * pcbinfo lock during this call.
- *
- * + tu_rcvd
- * - returns credits to the driver and triggers window updates
- * to the peer (a credit as used here is a byte in the peer's receive window)
- * - the driver is expected to determine how many bytes have been
- * consumed and credit that back to the card so that it can grow
- * the window again by maintaining its own state between invocations.
- * - In principle this could be used to shrink the window as well as
- * grow the window, although it is not used for that now.
- * - this function needs to correctly handle being called any number of
- * times without any bytes being consumed from the receive buffer.
- * - The driver expects the inpcb lock to be held - the driver is expected
- * not to drop the lock. Hence the driver is not allowed to acquire the
- * pcbinfo lock during this call.
- *
- * + tu_disconnect
- * - tells the driver to send FIN to peer
- * - driver is expected to send the remaining data and then do a clean half close
- * - disconnect implies at least half-close so only send, reset, and detach
- * are legal
- * - the driver is expected to handle transition through the shutdown
- * state machine and allow the stack to support SO_LINGER.
- * - The driver expects the inpcb lock to be held - the driver is expected
- * not to drop the lock. Hence the driver is not allowed to acquire the
- * pcbinfo lock during this call.
- *
- * + tu_reset
- * - closes the connection and sends a RST to peer
- * - driver is expectd to trigger an RST and detach the toepcb
- * - no further calls are legal after reset
- * - The driver expects the inpcb lock to be held - the driver is expected
- * not to drop the lock. Hence the driver is not allowed to acquire the
- * pcbinfo lock during this call.
- *
- * The following fields in the tcpcb are expected to be referenced by the driver:
- * + iss
- * + rcv_nxt
- * + rcv_wnd
- * + snd_isn
- * + snd_max
- * + snd_nxt
- * + snd_una
- * + t_flags
- * + t_inpcb
- * + t_maxseg
- * + t_toe
- *
- * The following fields in the inpcb are expected to be referenced by the driver:
- * + inp_lport
- * + inp_fport
- * + inp_laddr
- * + inp_fport
- * + inp_socket
- * + inp_ip_tos
- *
- * The following fields in the socket are expected to be referenced by the
- * driver:
- * + so_comp
- * + so_error
- * + so_linger
- * + so_options
- * + so_rcv
- * + so_snd
- * + so_state
- * + so_timeo
- *
- * These functions all return 0 on success and can return the following errors
- * as appropriate:
- * + EPERM:
- * + ENOBUFS: memory allocation failed
- * + EMSGSIZE: MTU changed during the call
- * + EHOSTDOWN:
- * + EHOSTUNREACH:
- * + ENETDOWN:
- * * ENETUNREACH: the peer is no longer reachable
- *
- * + tu_detach
- * - tells driver that the socket is going away so disconnect
- * the toepcb and free appropriate resources
- * - allows the driver to cleanly handle the case of connection state
- * outliving the socket
- * - no further calls are legal after detach
- * - the driver is expected to provide its own synchronization between
- * detach and receiving new data.
- *
- * + tu_syncache_event
- * - even if it is not actually needed, the driver is expected to
- * call syncache_add for the initial SYN and then syncache_expand
- * for the SYN,ACK
- * - tells driver that a connection either has not been added or has
- * been dropped from the syncache
- * - the driver is expected to maintain state that lives outside the
- * software stack so the syncache needs to be able to notify the
- * toe driver that the software stack is not going to create a connection
- * for a received SYN
- * - The driver is responsible for any synchronization required between
- * the syncache dropping an entry and the driver processing the SYN,ACK.
- *
- */
-struct toe_usrreqs {
- int (*tu_send)(struct tcpcb *tp);
- int (*tu_rcvd)(struct tcpcb *tp);
- int (*tu_disconnect)(struct tcpcb *tp);
- int (*tu_reset)(struct tcpcb *tp);
- void (*tu_detach)(struct tcpcb *tp);
- void (*tu_syncache_event)(int event, void *toep);
-};
-
-/*
- * Proxy for struct tcpopt between TOE drivers and TCP functions.
- */
-struct toeopt {
- u_int64_t to_flags; /* see tcpopt in tcp_var.h */
- u_int16_t to_mss; /* maximum segment size */
- u_int8_t to_wscale; /* window scaling */
+extern int registered_toedevs;
- u_int8_t _pad1; /* explicit pad for 64bit alignment */
- u_int32_t _pad2; /* explicit pad for 64bit alignment */
- u_int64_t _pad3[4]; /* TBD */
-};
+int tcp_offload_connect(struct socket *, struct sockaddr *);
+void tcp_offload_listen_start(struct tcpcb *);
+void tcp_offload_listen_stop(struct tcpcb *);
+void tcp_offload_input(struct tcpcb *, struct mbuf *);
+int tcp_offload_output(struct tcpcb *);
+void tcp_offload_rcvd(struct tcpcb *);
+void tcp_offload_ctloutput(struct tcpcb *, int, int);
+void tcp_offload_detach(struct tcpcb *);
-#define TOE_SC_ENTRY_PRESENT 1 /* 4-tuple already present */
-#define TOE_SC_DROP 2 /* connection was timed out */
-
-/*
- * Because listen is a one-to-many relationship (a socket can be listening
- * on all interfaces on a machine some of which may be using different TCP
- * offload devices), listen uses a publish/subscribe mechanism. The TCP
- * offload driver registers a listen notification function with the stack.
- * When a listen socket is created all TCP offload devices are notified
- * so that they can do the appropriate set up to offload connections on the
- * port to which the socket is bound. When the listen socket is closed,
- * the offload devices are notified so that they will stop listening on that
- * port and free any associated resources as well as sending RSTs on any
- * connections in the SYN_RCVD state.
- *
- */
-
-typedef void (*tcp_offload_listen_start_fn)(void *, struct tcpcb *);
-typedef void (*tcp_offload_listen_stop_fn)(void *, struct tcpcb *);
-
-EVENTHANDLER_DECLARE(tcp_offload_listen_start, tcp_offload_listen_start_fn);
-EVENTHANDLER_DECLARE(tcp_offload_listen_stop, tcp_offload_listen_stop_fn);
-
-/*
- * Check if the socket can be offloaded by the following steps:
- * - determine the egress interface
- * - check the interface for TOE capability and TOE is enabled
- * - check if the device has resources to offload the connection
- */
-int tcp_offload_connect(struct socket *so, struct sockaddr *nam);
-
-/*
- * The tcp_output_* routines are wrappers around the toe_usrreqs calls
- * which trigger packet transmission. In the non-offloaded case they
- * translate to tcp_output. The tcp_offload_* routines notify TOE
- * of specific events. I the non-offloaded case they are no-ops.
- *
- * Listen is a special case because it is a 1 to many relationship
- * and there can be more than one offload driver in the system.
- */
-
-/*
- * Connection is offloaded
- */
-#define tp_offload(tp) ((tp)->t_flags & TF_TOE)
-
-/*
- * hackish way of allowing this file to also be included by TOE
- * which needs to be kept ignorant of socket implementation details
- */
-#ifdef _SYS_SOCKETVAR_H_
-/*
- * The socket has not been marked as "do not offload"
- */
-#define SO_OFFLOADABLE(so) ((so->so_options & SO_NO_OFFLOAD) == 0)
-
-static __inline int
-tcp_output_connect(struct socket *so, struct sockaddr *nam)
-{
- struct tcpcb *tp = sototcpcb(so);
- int error;
-
- /*
- * If offload has been disabled for this socket or the
- * connection cannot be offloaded just call tcp_output
- * to start the TCP state machine.
- */
-#ifndef TCP_OFFLOAD_DISABLE
- if (!SO_OFFLOADABLE(so) || (error = tcp_offload_connect(so, nam)) != 0)
-#endif
- error = tcp_output(tp);
- return (error);
-}
-
-static __inline int
-tcp_output_send(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
- if (tp_offload(tp))
- return (tp->t_tu->tu_send(tp));
-#endif
- return (tcp_output(tp));
-}
-
-static __inline int
-tcp_output_rcvd(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
- if (tp_offload(tp))
- return (tp->t_tu->tu_rcvd(tp));
#endif
- return (tcp_output(tp));
-}
-
-static __inline int
-tcp_output_disconnect(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
- if (tp_offload(tp))
- return (tp->t_tu->tu_disconnect(tp));
-#endif
- return (tcp_output(tp));
-}
-
-static __inline int
-tcp_output_reset(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
- if (tp_offload(tp))
- return (tp->t_tu->tu_reset(tp));
-#endif
- return (tcp_output(tp));
-}
-
-static __inline void
-tcp_offload_detach(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
- if (tp_offload(tp))
- tp->t_tu->tu_detach(tp);
-#endif
-}
-
-static __inline void
-tcp_offload_listen_open(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
- if (SO_OFFLOADABLE(tp->t_inpcb->inp_socket))
- EVENTHANDLER_INVOKE(tcp_offload_listen_start, tp);
-#endif
-}
-
-static __inline void
-tcp_offload_listen_close(struct tcpcb *tp)
-{
-
-#ifndef TCP_OFFLOAD_DISABLE
- EVENTHANDLER_INVOKE(tcp_offload_listen_stop, tp);
-#endif
-}
-#undef SO_OFFLOADABLE
-#endif /* _SYS_SOCKETVAR_H_ */
-#undef tp_offload
-
-void tcp_offload_twstart(struct tcpcb *tp);
-struct tcpcb *tcp_offload_close(struct tcpcb *tp);
-struct tcpcb *tcp_offload_drop(struct tcpcb *tp, int error);
-
-#endif /* _NETINET_TCP_OFFLOAD_H_ */
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index 8e0f369..1881c54 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -75,6 +75,9 @@ __FBSDID("$FreeBSD$");
#ifdef TCPDEBUG
#include <netinet/tcp_debug.h>
#endif
+#ifdef TCP_OFFLOAD
+#include <netinet/tcp_offload.h>
+#endif
#ifdef IPSEC
#include <netipsec/ipsec.h>
@@ -191,6 +194,11 @@ tcp_output(struct tcpcb *tp)
INP_WLOCK_ASSERT(tp->t_inpcb);
+#ifdef TCP_OFFLOAD
+ if (tp->t_flags & TF_TOE)
+ return (tcp_offload_output(tp));
+#endif
+
/*
* Determine length of data that should be transmitted,
* and flags that will be used.
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 05da82e..9d35e0a 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -85,7 +85,6 @@ __FBSDID("$FreeBSD$");
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_syncache.h>
-#include <netinet/tcp_offload.h>
#ifdef INET6
#include <netinet6/tcp6_var.h>
#endif
@@ -96,6 +95,9 @@ __FBSDID("$FreeBSD$");
#ifdef INET6
#include <netinet6/ip6protosw.h>
#endif
+#ifdef TCP_OFFLOAD
+#include <netinet/tcp_offload.h>
+#endif
#ifdef IPSEC
#include <netipsec/ipsec.h>
@@ -824,7 +826,7 @@ tcp_drop(struct tcpcb *tp, int errno)
if (TCPS_HAVERCVDSYN(tp->t_state)) {
tp->t_state = TCPS_CLOSED;
- (void) tcp_output_reset(tp);
+ (void) tcp_output(tp);
TCPSTAT_INC(tcps_drops);
} else
TCPSTAT_INC(tcps_conndrops);
@@ -924,8 +926,12 @@ tcp_discardcb(struct tcpcb *tp)
/* free the reassembly queue, if any */
tcp_reass_flush(tp);
+
+#ifdef TCP_OFFLOAD
/* Disconnect offload device, if any. */
- tcp_offload_detach(tp);
+ if (tp->t_flags & TF_TOE)
+ tcp_offload_detach(tp);
+#endif
tcp_free_sackholes(tp);
@@ -954,9 +960,10 @@ tcp_close(struct tcpcb *tp)
INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
INP_WLOCK_ASSERT(inp);
- /* Notify any offload devices of listener close */
+#ifdef TCP_OFFLOAD
if (tp->t_state == TCPS_LISTEN)
- tcp_offload_listen_close(tp);
+ tcp_offload_listen_stop(tp);
+#endif
in_pcbdrop(inp);
TCPSTAT_INC(tcps_closed);
KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL"));
@@ -1695,7 +1702,7 @@ tcp_mtudisc(struct inpcb *inp, int mtuoffer)
tp->snd_recover = tp->snd_max;
if (tp->t_flags & TF_SACK_PERMIT)
EXIT_FASTRECOVERY(tp->t_flags);
- tcp_output_send(tp);
+ tcp_output(tp);
return (inp);
}
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index 21a72f4..0f7637d 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -81,10 +81,12 @@ __FBSDID("$FreeBSD$");
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_syncache.h>
-#include <netinet/tcp_offload.h>
#ifdef INET6
#include <netinet6/tcp6_var.h>
#endif
+#ifdef TCP_OFFLOAD
+#include <netinet/toecore.h>
+#endif
#ifdef IPSEC
#include <netipsec/ipsec.h>
@@ -110,10 +112,8 @@ SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, syncookies_only, CTLFLAG_RW,
&VNET_NAME(tcp_syncookiesonly), 0,
"Use only TCP SYN cookies");
-#ifdef TCP_OFFLOAD_DISABLE
-#define TOEPCB_ISSET(sc) (0)
-#else
-#define TOEPCB_ISSET(sc) ((sc)->sc_toepcb != NULL)
+#ifdef TCP_OFFLOAD
+#define ADDED_BY_TOE(sc) ((sc)->sc_tod != NULL)
#endif
static void syncache_drop(struct syncache *, struct syncache_head *);
@@ -332,6 +332,14 @@ syncache_insert(struct syncache *sc, struct syncache_head *sch)
TAILQ_INSERT_HEAD(&sch->sch_bucket, sc, sc_hash);
sch->sch_length++;
+#ifdef TCP_OFFLOAD
+ if (ADDED_BY_TOE(sc)) {
+ struct toedev *tod = sc->sc_tod;
+
+ tod->tod_syncache_added(tod, sc->sc_todctx);
+ }
+#endif
+
/* Reinitialize the bucket row's timer. */
if (sch->sch_length == 1)
sch->sch_nextc = ticks + INT_MAX;
@@ -356,10 +364,14 @@ syncache_drop(struct syncache *sc, struct syncache_head *sch)
TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
sch->sch_length--;
-#ifndef TCP_OFFLOAD_DISABLE
- if (sc->sc_tu)
- sc->sc_tu->tu_syncache_event(TOE_SC_DROP, sc->sc_toepcb);
-#endif
+#ifdef TCP_OFFLOAD
+ if (ADDED_BY_TOE(sc)) {
+ struct toedev *tod = sc->sc_tod;
+
+ tod->tod_syncache_removed(tod, sc->sc_todctx);
+ }
+#endif
+
syncache_free(sc);
V_tcp_syncache.cache_count--;
}
@@ -846,6 +858,18 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
if (sc->sc_rxmits > 1)
tp->snd_cwnd = tp->t_maxseg;
+#ifdef TCP_OFFLOAD
+ /*
+ * Allow a TOE driver to install its hooks. Note that we hold the
+ * pcbinfo lock too and that prevents tcp_usr_accept from accepting a
+ * new connection before the TOE driver has done its thing.
+ */
+ if (ADDED_BY_TOE(sc)) {
+ struct toedev *tod = sc->sc_tod;
+
+ tod->tod_offload_socket(tod, sc->sc_todctx, so);
+ }
+#endif
/*
* Copy and activate timers.
*/
@@ -926,6 +950,13 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
/* Pull out the entry to unlock the bucket row. */
TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
sch->sch_length--;
+#ifdef TCP_OFFLOAD
+ if (ADDED_BY_TOE(sc)) {
+ struct toedev *tod = sc->sc_tod;
+
+ tod->tod_syncache_removed(tod, sc->sc_todctx);
+ }
+#endif
V_tcp_syncache.cache_count--;
SCH_UNLOCK(sch);
}
@@ -934,7 +965,7 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
* Segment validation:
* ACK must match our initial sequence number + 1 (the SYN|ACK).
*/
- if (th->th_ack != sc->sc_iss + 1 && !TOEPCB_ISSET(sc)) {
+ if (th->th_ack != sc->sc_iss + 1) {
if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
log(LOG_DEBUG, "%s; %s: ACK %u != ISS+1 %u, segment "
"rejected\n", s, __func__, th->th_ack, sc->sc_iss);
@@ -945,9 +976,8 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
* The SEQ must fall in the window starting at the received
* initial receive sequence number + 1 (the SYN).
*/
- if ((SEQ_LEQ(th->th_seq, sc->sc_irs) ||
- SEQ_GT(th->th_seq, sc->sc_irs + sc->sc_wnd)) &&
- !TOEPCB_ISSET(sc)) {
+ if (SEQ_LEQ(th->th_seq, sc->sc_irs) ||
+ SEQ_GT(th->th_seq, sc->sc_irs + sc->sc_wnd)) {
if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
log(LOG_DEBUG, "%s; %s: SEQ %u != IRS+1 %u, segment "
"rejected\n", s, __func__, th->th_seq, sc->sc_irs);
@@ -964,8 +994,7 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
* If timestamps were negotiated the reflected timestamp
* must be equal to what we actually sent in the SYN|ACK.
*/
- if ((to->to_flags & TOF_TS) && to->to_tsecr != sc->sc_ts &&
- !TOEPCB_ISSET(sc)) {
+ if ((to->to_flags & TOF_TS) && to->to_tsecr != sc->sc_ts) {
if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
log(LOG_DEBUG, "%s; %s: TSECR %u != TS %u, "
"segment rejected\n",
@@ -993,25 +1022,6 @@ failed:
return (0);
}
-int
-tcp_offload_syncache_expand(struct in_conninfo *inc, struct toeopt *toeo,
- struct tcphdr *th, struct socket **lsop, struct mbuf *m)
-{
- struct tcpopt to;
- int rc;
-
- bzero(&to, sizeof(struct tcpopt));
- to.to_mss = toeo->to_mss;
- to.to_wscale = toeo->to_wscale;
- to.to_flags = toeo->to_flags;
-
- INP_INFO_WLOCK(&V_tcbinfo);
- rc = syncache_expand(inc, &to, th, lsop, m);
- INP_INFO_WUNLOCK(&V_tcbinfo);
-
- return (rc);
-}
-
/*
* Given a LISTEN socket and an inbound SYN request, add
* this to the syn cache, and send back a segment:
@@ -1025,10 +1035,10 @@ tcp_offload_syncache_expand(struct in_conninfo *inc, struct toeopt *toeo,
* consume all available buffer space if it were ACKed. By not ACKing
* the data, we avoid this DoS scenario.
*/
-static void
-_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
- struct inpcb *inp, struct socket **lsop, struct mbuf *m,
- struct toe_usrreqs *tu, void *toepcb)
+void
+syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
+ struct inpcb *inp, struct socket **lsop, struct mbuf *m, void *tod,
+ void *todctx)
{
struct tcpcb *tp;
struct socket *so;
@@ -1114,11 +1124,6 @@ _syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
sc = syncache_lookup(inc, &sch); /* returns locked entry */
SCH_LOCK_ASSERT(sch);
if (sc != NULL) {
-#ifndef TCP_OFFLOAD_DISABLE
- if (sc->sc_tu)
- sc->sc_tu->tu_syncache_event(TOE_SC_ENTRY_PRESENT,
- sc->sc_toepcb);
-#endif
TCPSTAT_INC(tcps_sc_dupsyn);
if (ipopts) {
/*
@@ -1151,7 +1156,7 @@ _syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
s, __func__);
free(s, M_TCPLOG);
}
- if (!TOEPCB_ISSET(sc) && syncache_respond(sc) == 0) {
+ if (syncache_respond(sc) == 0) {
sc->sc_rxmits = 0;
syncache_timeout(sc, sch, 1);
TCPSTAT_INC(tcps_sndacks);
@@ -1202,9 +1207,9 @@ _syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
sc->sc_ip_tos = ip_tos;
sc->sc_ip_ttl = ip_ttl;
}
-#ifndef TCP_OFFLOAD_DISABLE
- sc->sc_tu = tu;
- sc->sc_toepcb = toepcb;
+#ifdef TCP_OFFLOAD
+ sc->sc_tod = tod;
+ sc->sc_todctx = todctx;
#endif
sc->sc_irs = th->th_seq;
sc->sc_iss = arc4random();
@@ -1299,7 +1304,7 @@ _syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
/*
* Do a standard 3-way handshake.
*/
- if (TOEPCB_ISSET(sc) || syncache_respond(sc) == 0) {
+ if (syncache_respond(sc) == 0) {
if (V_tcp_syncookies && V_tcp_syncookiesonly && sc != &scs)
syncache_free(sc);
else if (sc != &scs)
@@ -1491,37 +1496,21 @@ syncache_respond(struct syncache *sc)
m->m_pkthdr.csum_flags = CSUM_TCP;
th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
htons(tlen + optlen - hlen + IPPROTO_TCP));
+#ifdef TCP_OFFLOAD
+ if (ADDED_BY_TOE(sc)) {
+ struct toedev *tod = sc->sc_tod;
+
+ error = tod->tod_syncache_respond(tod, sc->sc_todctx, m);
+
+ return (error);
+ }
+#endif
error = ip_output(m, sc->sc_ipopts, NULL, 0, NULL, NULL);
}
#endif
return (error);
}
-void
-syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
- struct inpcb *inp, struct socket **lsop, struct mbuf *m)
-{
- _syncache_add(inc, to, th, inp, lsop, m, NULL, NULL);
-}
-
-void
-tcp_offload_syncache_add(struct in_conninfo *inc, struct toeopt *toeo,
- struct tcphdr *th, struct inpcb *inp, struct socket **lsop,
- struct toe_usrreqs *tu, void *toepcb)
-{
- struct tcpopt to;
-
- bzero(&to, sizeof(struct tcpopt));
- to.to_mss = toeo->to_mss;
- to.to_wscale = toeo->to_wscale;
- to.to_flags = toeo->to_flags;
-
- INP_INFO_WLOCK(&V_tcbinfo);
- INP_WLOCK(inp);
-
- _syncache_add(inc, &to, th, inp, lsop, NULL, tu, toepcb);
-}
-
/*
* The purpose of SYN cookies is to avoid keeping track of all SYN's we
* receive and to be able to handle SYN floods from bogus source addresses
diff --git a/sys/netinet/tcp_syncache.h b/sys/netinet/tcp_syncache.h
index 5783b6d..d18ee07 100644
--- a/sys/netinet/tcp_syncache.h
+++ b/sys/netinet/tcp_syncache.h
@@ -34,8 +34,6 @@
#define _NETINET_TCP_SYNCACHE_H_
#ifdef _KERNEL
-struct toeopt;
-
void syncache_init(void);
#ifdef VIMAGE
void syncache_destroy(void);
@@ -43,14 +41,9 @@ void syncache_destroy(void);
void syncache_unreach(struct in_conninfo *, struct tcphdr *);
int syncache_expand(struct in_conninfo *, struct tcpopt *,
struct tcphdr *, struct socket **, struct mbuf *);
-int tcp_offload_syncache_expand(struct in_conninfo *inc, struct toeopt *toeo,
- struct tcphdr *th, struct socket **lsop, struct mbuf *m);
void syncache_add(struct in_conninfo *, struct tcpopt *,
- struct tcphdr *, struct inpcb *, struct socket **, struct mbuf *);
-void tcp_offload_syncache_add(struct in_conninfo *, struct toeopt *,
- struct tcphdr *, struct inpcb *, struct socket **,
- struct toe_usrreqs *tu, void *toepcb);
-
+ struct tcphdr *, struct inpcb *, struct socket **, struct mbuf *,
+ void *, void *);
void syncache_chkrst(struct in_conninfo *, struct tcphdr *);
void syncache_badack(struct in_conninfo *);
int syncache_pcbcount(void);
@@ -75,10 +68,10 @@ struct syncache {
u_int8_t sc_requested_s_scale:4,
sc_requested_r_scale:4;
u_int16_t sc_flags;
-#ifndef TCP_OFFLOAD_DISABLE
- struct toe_usrreqs *sc_tu; /* TOE operations */
- void *sc_toepcb; /* TOE protocol block */
-#endif
+#if defined(TCP_OFFLOAD) || !defined(TCP_OFFLOAD_DISABLE)
+ struct toedev *sc_tod; /* entry added by this TOE */
+ void *sc_todctx; /* TOE driver context */
+#endif
struct label *sc_label; /* MAC label reference */
struct ucred *sc_cred; /* cred cache for jail checks */
diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c
index 9c3c749..b3ddacc 100644
--- a/sys/netinet/tcp_timer.c
+++ b/sys/netinet/tcp_timer.c
@@ -602,6 +602,11 @@ tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta)
struct inpcb *inp = tp->t_inpcb;
int cpu = INP_CPU(inp);
+#ifdef TCP_OFFLOAD
+ if (tp->t_flags & TF_TOE)
+ return;
+#endif
+
switch (timer_type) {
case TT_DELACK:
t_callout = &tp->t_timers->tt_delack;
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index a9045f3..b69961e 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -87,7 +87,9 @@ __FBSDID("$FreeBSD$");
#ifdef TCPDEBUG
#include <netinet/tcp_debug.h>
#endif
+#ifdef TCP_OFFLOAD
#include <netinet/tcp_offload.h>
+#endif
/*
* TCP protocol interface to socket abstraction.
@@ -367,7 +369,9 @@ tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
if (error == 0) {
tp->t_state = TCPS_LISTEN;
solisten_proto(so, backlog);
- tcp_offload_listen_open(tp);
+#ifdef TCP_OFFLOAD
+ tcp_offload_listen_start(tp);
+#endif
}
SOCK_UNLOCK(so);
@@ -409,6 +413,9 @@ tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
if (error == 0) {
tp->t_state = TCPS_LISTEN;
solisten_proto(so, backlog);
+#ifdef TCP_OFFLOAD
+ tcp_offload_listen_start(tp);
+#endif
}
SOCK_UNLOCK(so);
@@ -459,7 +466,13 @@ tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
TCPDEBUG1();
if ((error = tcp_connect(tp, nam, td)) != 0)
goto out;
- error = tcp_output_connect(so, nam);
+#ifdef TCP_OFFLOAD
+ if (registered_toedevs > 0 &&
+ (error = tcp_offload_connect(so, nam)) == 0)
+ goto out;
+#endif
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
+ error = tcp_output(tp);
out:
TCPDEBUG2(PRU_CONNECT);
INP_WUNLOCK(inp);
@@ -519,7 +532,12 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
goto out;
if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0)
goto out;
- error = tcp_output_connect(so, nam);
+#ifdef TCP_OFFLOAD
+ if (registered_toedevs > 0 &&
+ (error = tcp_offload_connect(so, nam)) == 0)
+ goto out;
+#endif
+ error = tcp_output(tp);
goto out;
}
#endif
@@ -530,7 +548,13 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
goto out;
if ((error = tcp6_connect(tp, nam, td)) != 0)
goto out;
- error = tcp_output_connect(so, nam);
+#ifdef TCP_OFFLOAD
+ if (registered_toedevs > 0 &&
+ (error = tcp_offload_connect(so, nam)) == 0)
+ goto out;
+#endif
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
+ error = tcp_output(tp);
out:
TCPDEBUG2(PRU_CONNECT);
@@ -709,7 +733,7 @@ tcp_usr_shutdown(struct socket *so)
socantsendmore(so);
tcp_usrclosed(tp);
if (!(inp->inp_flags & INP_DROPPED))
- error = tcp_output_disconnect(tp);
+ error = tcp_output(tp);
out:
TCPDEBUG2(PRU_SHUTDOWN);
@@ -739,7 +763,11 @@ tcp_usr_rcvd(struct socket *so, int flags)
}
tp = intotcpcb(inp);
TCPDEBUG1();
- tcp_output_rcvd(tp);
+#ifdef TCP_OFFLOAD
+ if (tp->t_flags & TF_TOE)
+ tcp_offload_rcvd(tp);
+#endif
+ tcp_output(tp);
out:
TCPDEBUG2(PRU_RCVD);
@@ -835,7 +863,7 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
if (!(inp->inp_flags & INP_DROPPED)) {
if (flags & PRUS_MORETOCOME)
tp->t_flags |= TF_MORETOCOME;
- error = tcp_output_send(tp);
+ error = tcp_output(tp);
if (flags & PRUS_MORETOCOME)
tp->t_flags &= ~TF_MORETOCOME;
}
@@ -884,7 +912,7 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
}
tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
tp->t_flags |= TF_FORCEDATA;
- error = tcp_output_send(tp);
+ error = tcp_output(tp);
tp->t_flags &= ~TF_FORCEDATA;
}
out:
@@ -1119,7 +1147,6 @@ tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
soisconnecting(so);
TCPSTAT_INC(tcps_connattempt);
tp->t_state = TCPS_SYN_SENT;
- tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
tp->iss = tcp_new_isn(tp);
tcp_sendseqinit(tp);
@@ -1192,7 +1219,6 @@ tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
soisconnecting(so);
TCPSTAT_INC(tcps_connattempt);
tp->t_state = TCPS_SYN_SENT;
- tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
tp->iss = tcp_new_isn(tp);
tcp_sendseqinit(tp);
@@ -1323,9 +1349,9 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
tp->t_flags |= TF_SIGNATURE;
else
tp->t_flags &= ~TF_SIGNATURE;
- INP_WUNLOCK(inp);
- break;
+ goto unlock_and_done;
#endif /* TCP_SIGNATURE */
+
case TCP_NODELAY:
case TCP_NOOPT:
INP_WUNLOCK(inp);
@@ -1351,6 +1377,13 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
tp->t_flags |= opt;
else
tp->t_flags &= ~opt;
+unlock_and_done:
+#ifdef TCP_OFFLOAD
+ if (tp->t_flags & TF_TOE) {
+ tcp_offload_ctloutput(tp, sopt->sopt_dir,
+ sopt->sopt_name);
+ }
+#endif
INP_WUNLOCK(inp);
break;
@@ -1369,8 +1402,7 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
if (TCPS_HAVEESTABLISHED(tp->t_state))
error = tcp_output(tp);
}
- INP_WUNLOCK(inp);
- break;
+ goto unlock_and_done;
case TCP_MAXSEG:
INP_WUNLOCK(inp);
@@ -1385,8 +1417,7 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
tp->t_maxseg = optval;
else
error = EINVAL;
- INP_WUNLOCK(inp);
- break;
+ goto unlock_and_done;
case TCP_INFO:
INP_WUNLOCK(inp);
@@ -1438,8 +1469,7 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
}
}
CC_LIST_RUNLOCK();
- INP_WUNLOCK(inp);
- break;
+ goto unlock_and_done;
case TCP_KEEPIDLE:
case TCP_KEEPINTVL:
@@ -1491,8 +1521,7 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
TP_KEEPINIT(tp));
break;
}
- INP_WUNLOCK(inp);
- break;
+ goto unlock_and_done;
default:
INP_WUNLOCK(inp);
@@ -1635,7 +1664,7 @@ tcp_disconnect(struct tcpcb *tp)
sbflush(&so->so_rcv);
tcp_usrclosed(tp);
if (!(inp->inp_flags & INP_DROPPED))
- tcp_output_disconnect(tp);
+ tcp_output(tp);
}
}
@@ -1658,7 +1687,9 @@ tcp_usrclosed(struct tcpcb *tp)
switch (tp->t_state) {
case TCPS_LISTEN:
- tcp_offload_listen_close(tp);
+#ifdef TCP_OFFLOAD
+ tcp_offload_listen_stop(tp);
+#endif
/* FALLTHROUGH */
case TCPS_CLOSED:
tp->t_state = TCPS_CLOSED;
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index 3b8bdf7..90ecca1 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -194,7 +194,7 @@ struct tcpcb {
int t_rttlow; /* smallest observerved RTT */
u_int32_t rfbuf_ts; /* recv buffer autoscaling timestamp */
int rfbuf_cnt; /* recv buffer autoscaling byte count */
- struct toe_usrreqs *t_tu; /* offload operations vector */
+ struct toedev *tod; /* toedev handling this connection */
int t_sndrexmitpack; /* retransmit packets sent */
int t_rcvoopack; /* out-of-order packets received */
void *t_toe; /* TOE pcb pointer */
diff --git a/sys/netinet/toecore.c b/sys/netinet/toecore.c
new file mode 100644
index 0000000..4b4efb7
--- /dev/null
+++ b/sys/netinet/toecore.c
@@ -0,0 +1,575 @@
+/*-
+ * Copyright (c) 2012 Chelsio Communications, Inc.
+ * All rights reserved.
+ * Written by: Navdeep Parhar <np@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/types.h>
+#include <sys/sockopt.h>
+#include <sys/sysctl.h>
+#include <sys/socket.h>
+
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/if_vlan_var.h>
+#include <net/if_llatbl.h>
+#include <net/route.h>
+
+#include <netinet/if_ether.h>
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet6/nd6.h>
+#define TCPSTATES
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp_syncache.h>
+#include <netinet/tcp_offload.h>
+#include <netinet/toecore.h>
+
+static struct mtx toedev_lock;
+static TAILQ_HEAD(, toedev) toedev_list;
+static eventhandler_tag listen_start_eh;
+static eventhandler_tag listen_stop_eh;
+static eventhandler_tag lle_event_eh;
+static eventhandler_tag route_redirect_eh;
+
+static int
+toedev_connect(struct toedev *tod __unused, struct socket *so __unused,
+ struct rtentry *rt __unused, struct sockaddr *nam __unused)
+{
+
+ return (ENOTSUP);
+}
+
+static int
+toedev_listen_start(struct toedev *tod __unused, struct tcpcb *tp __unused)
+{
+
+ return (ENOTSUP);
+}
+
+static int
+toedev_listen_stop(struct toedev *tod __unused, struct tcpcb *tp __unused)
+{
+
+ return (ENOTSUP);
+}
+
+static void
+toedev_input(struct toedev *tod __unused, struct tcpcb *tp __unused,
+ struct mbuf *m)
+{
+
+ m_freem(m);
+ return;
+}
+
+static void
+toedev_rcvd(struct toedev *tod __unused, struct tcpcb *tp __unused)
+{
+
+ return;
+}
+
+static int
+toedev_output(struct toedev *tod __unused, struct tcpcb *tp __unused)
+{
+
+ return (ENOTSUP);
+}
+
+static void
+toedev_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp __unused)
+{
+
+ return;
+}
+
+static void
+toedev_l2_update(struct toedev *tod __unused, struct ifnet *ifp __unused,
+ struct sockaddr *sa __unused, uint8_t *lladdr __unused,
+ uint16_t vtag __unused)
+{
+
+ return;
+}
+
+static void
+toedev_route_redirect(struct toedev *tod __unused, struct ifnet *ifp __unused,
+ struct rtentry *rt0 __unused, struct rtentry *rt1 __unused)
+{
+
+ return;
+}
+
+static void
+toedev_syncache_added(struct toedev *tod __unused, void *ctx __unused)
+{
+
+ return;
+}
+
+static void
+toedev_syncache_removed(struct toedev *tod __unused, void *ctx __unused)
+{
+
+ return;
+}
+
+static int
+toedev_syncache_respond(struct toedev *tod __unused, void *ctx __unused,
+ struct mbuf *m)
+{
+
+ m_freem(m);
+ return (0);
+}
+
+static void
+toedev_offload_socket(struct toedev *tod __unused, void *ctx __unused,
+ struct socket *so __unused)
+{
+
+ return;
+}
+
+static void
+toedev_ctloutput(struct toedev *tod __unused, struct tcpcb *tp __unused,
+ int sopt_dir __unused, int sopt_name __unused)
+{
+
+ return;
+}
+
+/*
+ * Inform one or more TOE devices about a listening socket.
+ */
+static void
+toe_listen_start(struct inpcb *inp, void *arg)
+{
+ struct toedev *t, *tod;
+ struct tcpcb *tp;
+
+ INP_WLOCK_ASSERT(inp);
+ KASSERT(inp->inp_pcbinfo == &V_tcbinfo,
+ ("%s: inp is not a TCP inp", __func__));
+
+ if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT))
+ return;
+
+ tp = intotcpcb(inp);
+ if (tp->t_state != TCPS_LISTEN)
+ return;
+
+ t = arg;
+ mtx_lock(&toedev_lock);
+ TAILQ_FOREACH(tod, &toedev_list, link) {
+ if (t == NULL || t == tod)
+ tod->tod_listen_start(tod, tp);
+ }
+ mtx_unlock(&toedev_lock);
+}
+
+static void
+toe_listen_start_event(void *arg __unused, struct tcpcb *tp)
+{
+ struct inpcb *inp = tp->t_inpcb;
+
+ INP_WLOCK_ASSERT(inp);
+ KASSERT(tp->t_state == TCPS_LISTEN,
+ ("%s: t_state %s", __func__, tcpstates[tp->t_state]));
+
+ toe_listen_start(inp, NULL);
+}
+
+static void
+toe_listen_stop_event(void *arg __unused, struct tcpcb *tp)
+{
+ struct toedev *tod;
+#ifdef INVARIANTS
+ struct inpcb *inp = tp->t_inpcb;
+#endif
+
+ INP_WLOCK_ASSERT(inp);
+ KASSERT(tp->t_state == TCPS_LISTEN,
+ ("%s: t_state %s", __func__, tcpstates[tp->t_state]));
+
+ mtx_lock(&toedev_lock);
+ TAILQ_FOREACH(tod, &toedev_list, link)
+ tod->tod_listen_stop(tod, tp);
+ mtx_unlock(&toedev_lock);
+}
+
+/*
+ * Fill up a freshly allocated toedev struct with reasonable defaults.
+ */
+void
+init_toedev(struct toedev *tod)
+{
+
+ tod->tod_softc = NULL;
+
+ /*
+ * Provide no-op defaults so that the kernel can call any toedev
+ * function without having to check whether the TOE driver supplied one
+ * or not.
+ */
+ tod->tod_connect = toedev_connect;
+ tod->tod_listen_start = toedev_listen_start;
+ tod->tod_listen_stop = toedev_listen_stop;
+ tod->tod_input = toedev_input;
+ tod->tod_rcvd = toedev_rcvd;
+ tod->tod_output = toedev_output;
+ tod->tod_send_rst = toedev_output;
+ tod->tod_send_fin = toedev_output;
+ tod->tod_pcb_detach = toedev_pcb_detach;
+ tod->tod_l2_update = toedev_l2_update;
+ tod->tod_route_redirect = toedev_route_redirect;
+ tod->tod_syncache_added = toedev_syncache_added;
+ tod->tod_syncache_removed = toedev_syncache_removed;
+ tod->tod_syncache_respond = toedev_syncache_respond;
+ tod->tod_offload_socket = toedev_offload_socket;
+ tod->tod_ctloutput = toedev_ctloutput;
+}
+
+/*
+ * Register an active TOE device with the system. This allows it to receive
+ * notifications from the kernel.
+ */
+int
+register_toedev(struct toedev *tod)
+{
+ struct toedev *t;
+
+ mtx_lock(&toedev_lock);
+ TAILQ_FOREACH(t, &toedev_list, link) {
+ if (t == tod) {
+ mtx_unlock(&toedev_lock);
+ return (EEXIST);
+ }
+ }
+
+ TAILQ_INSERT_TAIL(&toedev_list, tod, link);
+ registered_toedevs++;
+ mtx_unlock(&toedev_lock);
+
+ inp_apply_all(toe_listen_start, tod);
+
+ return (0);
+}
+
+/*
+ * Remove the TOE device from the global list of active TOE devices. It is the
+ * caller's responsibility to ensure that the TOE device is quiesced prior to
+ * this call.
+ */
+int
+unregister_toedev(struct toedev *tod)
+{
+ struct toedev *t, *t2;
+ int rc = ENODEV;
+
+ mtx_lock(&toedev_lock);
+ TAILQ_FOREACH_SAFE(t, &toedev_list, link, t2) {
+ if (t == tod) {
+ TAILQ_REMOVE(&toedev_list, tod, link);
+ registered_toedevs--;
+ rc = 0;
+ break;
+ }
+ }
+ KASSERT(registered_toedevs >= 0,
+ ("%s: registered_toedevs (%d) < 0", __func__, registered_toedevs));
+ mtx_unlock(&toedev_lock);
+ return (rc);
+}
+
+void
+toe_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
+ struct inpcb *inp, void *tod, void *todctx)
+{
+ struct socket *lso = inp->inp_socket;
+
+ INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ INP_WLOCK_ASSERT(inp);
+
+ syncache_add(inc, to, th, inp, &lso, NULL, tod, todctx);
+}
+
+int
+toe_syncache_expand(struct in_conninfo *inc, struct tcpopt *to,
+ struct tcphdr *th, struct socket **lsop)
+{
+
+ INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+
+ return (syncache_expand(inc, to, th, lsop, NULL));
+}
+
+/*
+ * General purpose check to see if a 4-tuple is in use by the kernel. If a TCP
+ * header (presumably for an incoming SYN) is also provided, an existing 4-tuple
+ * in TIME_WAIT may be assassinated freeing it up for re-use.
+ *
+ * Note that the TCP header must have been run through tcp_fields_to_host() or
+ * equivalent.
+ */
+int
+toe_4tuple_check(struct in_conninfo *inc, struct tcphdr *th, struct ifnet *ifp)
+{
+ struct inpcb *inp;
+
+ if (inc->inc_flags & INC_ISIPV6)
+ return (ENOSYS); /* XXX: implement */
+
+ inp = in_pcblookup(&V_tcbinfo, inc->inc_faddr, inc->inc_fport,
+ inc->inc_laddr, inc->inc_lport, INPLOOKUP_WLOCKPCB, ifp);
+ if (inp != NULL) {
+ INP_WLOCK_ASSERT(inp);
+
+ if ((inp->inp_flags & INP_TIMEWAIT) && th != NULL) {
+
+ INP_INFO_WLOCK_ASSERT(&V_tcbinfo); /* for twcheck */
+ if (!tcp_twcheck(inp, NULL, th, NULL, 0))
+ return (EADDRINUSE);
+ } else {
+ INP_WUNLOCK(inp);
+ return (EADDRINUSE);
+ }
+ }
+
+ return (0);
+}
+
+static void
+toe_lle_event(void *arg __unused, struct llentry *lle, int evt)
+{
+ struct toedev *tod;
+ struct ifnet *ifp;
+ struct sockaddr *sa;
+ uint8_t *lladdr;
+ uint16_t vtag;
+
+ LLE_WLOCK_ASSERT(lle);
+
+ ifp = lle->lle_tbl->llt_ifp;
+ sa = L3_ADDR(lle);
+
+ KASSERT(sa->sa_family == AF_INET || sa->sa_family == AF_INET6,
+ ("%s: lle_event %d for lle %p but sa %p !INET && !INET6",
+ __func__, evt, lle, sa));
+
+ /*
+ * Not interested if the interface's TOE capability is not enabled.
+ */
+ if ((sa->sa_family == AF_INET && !(ifp->if_capenable & IFCAP_TOE4)) ||
+ (sa->sa_family == AF_INET6 && !(ifp->if_capenable & IFCAP_TOE6)))
+ return;
+
+ tod = TOEDEV(ifp);
+ if (tod == NULL)
+ return;
+
+ vtag = 0xfff;
+ if (evt != LLENTRY_RESOLVED) {
+
+ /*
+ * LLENTRY_TIMEDOUT, LLENTRY_DELETED, LLENTRY_EXPIRED all mean
+ * this entry is going to be deleted.
+ */
+
+ lladdr = NULL;
+ } else {
+
+ KASSERT(lle->la_flags & LLE_VALID,
+ ("%s: %p resolved but not valid?", __func__, lle));
+
+ lladdr = (uint8_t *)&lle->ll_addr;
+#ifdef VLAN_TAG
+ VLAN_TAG(ifp, &vtag);
+#endif
+ }
+
+ tod->tod_l2_update(tod, ifp, sa, lladdr, vtag);
+}
+
+/*
+ * XXX: implement.
+ */
+static void
+toe_route_redirect_event(void *arg __unused, struct rtentry *rt0,
+ struct rtentry *rt1, struct sockaddr *sa)
+{
+
+ return;
+}
+
+/*
+ * Returns 0 or EWOULDBLOCK on success (any other value is an error). 0 means
+ * lladdr and vtag are valid on return, EWOULDBLOCK means the TOE driver's
+ * tod_l2_update will be called later, when the entry is resolved or times out.
+ */
+int
+toe_l2_resolve(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa,
+ uint8_t *lladdr, uint16_t *vtag)
+{
+ struct llentry *lle;
+ int rc;
+
+ switch (sa->sa_family) {
+#ifdef INET
+ case AF_INET:
+ rc = arpresolve(ifp, NULL, NULL, sa, lladdr, &lle);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ rc = nd6_storelladdr(ifp, NULL, sa, lladdr, &lle);
+ break;
+#endif
+ default:
+ return (EPROTONOSUPPORT);
+ }
+
+ if (rc == 0) {
+#ifdef VLAN_TAG
+ if (VLAN_TAG(ifp, vtag) != 0)
+#endif
+ *vtag = 0xfff;
+ }
+
+ return (rc);
+}
+
+void
+toe_connect_failed(struct toedev *tod, struct tcpcb *tp, int err)
+{
+ struct inpcb *inp = tp->t_inpcb;
+
+ INP_WLOCK_ASSERT(inp);
+ KASSERT(tp->t_flags & TF_TOE,
+ ("%s: tp %p not offloaded.", __func__, tp));
+
+ if (!(inp->inp_flags & INP_DROPPED)) {
+ if (err == EAGAIN) {
+
+ /*
+ * Temporary failure during offload, take this PCB back.
+ * Detach from the TOE driver and do the rest of what
+ * TCP's pru_connect would have done if the connection
+ * wasn't offloaded.
+ */
+
+ tod->tod_pcb_detach(tod, tp);
+ KASSERT(!(tp->t_flags & TF_TOE),
+ ("%s: tp %p still offloaded.", __func__, tp));
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
+ (void) tcp_output(tp);
+ } else {
+
+ INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
+ tp = tcp_drop(tp, err);
+ if (tp == NULL)
+ INP_WLOCK(inp); /* re-acquire */
+ }
+ }
+ INP_WLOCK_ASSERT(inp);
+}
+
+static int
+toecore_load(void)
+{
+
+ mtx_init(&toedev_lock, "toedev lock", NULL, MTX_DEF);
+ TAILQ_INIT(&toedev_list);
+
+ listen_start_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_start,
+ toe_listen_start_event, NULL, EVENTHANDLER_PRI_ANY);
+ listen_stop_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_stop,
+ toe_listen_stop_event, NULL, EVENTHANDLER_PRI_ANY);
+ lle_event_eh = EVENTHANDLER_REGISTER(lle_event, toe_lle_event, NULL,
+ EVENTHANDLER_PRI_ANY);
+ route_redirect_eh = EVENTHANDLER_REGISTER(route_redirect_event,
+ toe_route_redirect_event, NULL, EVENTHANDLER_PRI_ANY);
+
+ return (0);
+}
+
+static int
+toecore_unload(void)
+{
+
+ mtx_lock(&toedev_lock);
+ if (!TAILQ_EMPTY(&toedev_list)) {
+ mtx_unlock(&toedev_lock);
+ return (EBUSY);
+ }
+
+ EVENTHANDLER_DEREGISTER(tcp_offload_listen_start, listen_start_eh);
+ EVENTHANDLER_DEREGISTER(tcp_offload_listen_stop, listen_stop_eh);
+ EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh);
+ EVENTHANDLER_DEREGISTER(route_redirect_event, route_redirect_eh);
+
+ mtx_unlock(&toedev_lock);
+ mtx_destroy(&toedev_lock);
+
+ return (0);
+}
+
+static int
+toecore_mod_handler(module_t mod, int cmd, void *arg)
+{
+
+ if (cmd == MOD_LOAD)
+ return (toecore_load());
+
+ if (cmd == MOD_UNLOAD)
+ return (toecore_unload());
+
+ return (EOPNOTSUPP);
+}
+
+static moduledata_t mod_data= {
+ "toecore",
+ toecore_mod_handler,
+ 0
+};
+
+MODULE_VERSION(toecore, 1);
+DECLARE_MODULE(toecore, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);
diff --git a/sys/netinet/toecore.h b/sys/netinet/toecore.h
new file mode 100644
index 0000000..a381825
--- /dev/null
+++ b/sys/netinet/toecore.h
@@ -0,0 +1,130 @@
+/*-
+ * Copyright (c) 2012 Chelsio Communications, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NETINET_TOE_H_
+#define _NETINET_TOE_H_
+
+#ifndef _KERNEL
+#error "no user-serviceable parts inside"
+#endif
+
+struct tcpopt;
+struct tcphdr;
+struct in_conninfo;
+
+struct toedev {
+ TAILQ_ENTRY(toedev) link; /* glue for toedev_list */
+ void *tod_softc; /* TOE driver private data */
+
+ /*
+ * Active open. If a failure occurs, it is reported back by the driver
+ * via toe_connect_failed.
+ */
+ int (*tod_connect)(struct toedev *, struct socket *, struct rtentry *,
+ struct sockaddr *);
+
+ /* Passive open. */
+ int (*tod_listen_start)(struct toedev *, struct tcpcb *);
+ int (*tod_listen_stop)(struct toedev *, struct tcpcb *);
+
+ /*
+ * The kernel uses this routine to pass on any frame it receives for an
+ * offloaded connection to the TOE driver. This is an unusual event.
+ */
+ void (*tod_input)(struct toedev *, struct tcpcb *, struct mbuf *);
+
+ /*
+ * This is called by the kernel during pru_rcvd for an offloaded TCP
+ * connection and provides an opportunity for the TOE driver to manage
+ * its rx window and credits.
+ */
+ void (*tod_rcvd)(struct toedev *, struct tcpcb *);
+
+ /*
+ * Transmit routine. The kernel calls this to have the TOE driver
+ * evaluate whether there is data to be transmitted, and transmit it.
+ */
+ int (*tod_output)(struct toedev *, struct tcpcb *);
+
+ /* Immediate teardown: send RST to peer. */
+ int (*tod_send_rst)(struct toedev *, struct tcpcb *);
+
+ /* Initiate orderly disconnect by sending FIN to the peer. */
+ int (*tod_send_fin)(struct toedev *, struct tcpcb *);
+
+ /* Called to indicate that the kernel is done with this TCP PCB. */
+ void (*tod_pcb_detach)(struct toedev *, struct tcpcb *);
+
+ /*
+ * The kernel calls this once it has information about an L2 entry that
+ * the TOE driver enquired about previously (via toe_l2_resolve).
+ */
+ void (*tod_l2_update)(struct toedev *, struct ifnet *,
+ struct sockaddr *, uint8_t *, uint16_t);
+
+ /* XXX. Route has been redirected. */
+ void (*tod_route_redirect)(struct toedev *, struct ifnet *,
+ struct rtentry *, struct rtentry *);
+
+ /* Syncache interaction. */
+ void (*tod_syncache_added)(struct toedev *, void *);
+ void (*tod_syncache_removed)(struct toedev *, void *);
+ int (*tod_syncache_respond)(struct toedev *, void *, struct mbuf *);
+ void (*tod_offload_socket)(struct toedev *, void *, struct socket *);
+
+ /* TCP socket option */
+ void (*tod_ctloutput)(struct toedev *, struct tcpcb *, int, int);
+};
+
+#include <sys/eventhandler.h>
+typedef void (*tcp_offload_listen_start_fn)(void *, struct tcpcb *);
+typedef void (*tcp_offload_listen_stop_fn)(void *, struct tcpcb *);
+EVENTHANDLER_DECLARE(tcp_offload_listen_start, tcp_offload_listen_start_fn);
+EVENTHANDLER_DECLARE(tcp_offload_listen_stop, tcp_offload_listen_stop_fn);
+
+void init_toedev(struct toedev *);
+int register_toedev(struct toedev *);
+int unregister_toedev(struct toedev *);
+
+/*
+ * General interface for looking up L2 information for an IP address. If an
+ * answer is not available right away then the TOE driver's tod_l2_update will
+ * be called later.
+ */
+int toe_l2_resolve(struct toedev *, struct ifnet *, struct sockaddr *,
+ uint8_t *, uint16_t *);
+
+void toe_connect_failed(struct toedev *, struct tcpcb *, int);
+
+void toe_syncache_add(struct in_conninfo *, struct tcpopt *, struct tcphdr *,
+ struct inpcb *, void *, void *);
+int toe_syncache_expand(struct in_conninfo *, struct tcpopt *, struct tcphdr *,
+ struct socket **);
+
+int toe_4tuple_check(struct in_conninfo *, struct tcphdr *, struct ifnet *);
+#endif
diff --git a/sys/netinet/toedev.h b/sys/netinet/toedev.h
deleted file mode 100644
index 7edaca1..0000000
--- a/sys/netinet/toedev.h
+++ /dev/null
@@ -1,162 +0,0 @@
-/*-
- * Copyright (c) 2007, Chelsio Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * 2. Neither the name of the Chelsio Corporation nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _NETINET_TOEDEV_H_
-#define _NETINET_TOEDEV_H_
-
-#ifndef _KERNEL
-#error "no user-serviceable parts inside"
-#endif
-
-extern uint32_t toedev_registration_count;
-
-/* Parameter values for offload_get_phys_egress(). */
-enum {
- TOE_OPEN,
- TOE_FAILOVER,
-};
-
-/* Parameter values for toe_failover(). */
-enum {
- TOE_ACTIVE_SLAVE,
- TOE_LINK_DOWN,
- TOE_LINK_UP,
- TOE_RELEASE,
- TOE_RELEASE_ALL,
-};
-
-#define TOENAMSIZ 16
-
-/* Get the toedev associated with a ifnet. */
-#define TOEDEV(ifp) ((ifp)->if_llsoftc)
-
-struct offload_id {
- unsigned int id;
- unsigned long data;
-};
-
-struct ifnet;
-struct rt_entry;
-struct tom_info;
-struct sysctl_oid;
-struct socket;
-struct mbuf;
-
-struct toedev {
- TAILQ_ENTRY(toedev) entry;
- char tod_name[TOENAMSIZ]; /* TOE device name */
- unsigned int tod_ttid; /* TOE type id */
- unsigned long tod_flags; /* device flags */
- unsigned int tod_mtu; /* max TX offloaded data */
- unsigned int tod_nconn; /* max # of offloaded
- * connections
- */
- struct ifnet *tod_lldev; /* first interface */
- const struct tom_info *tod_offload_mod; /* TCP offload module */
-
- /*
- * This TOE device is capable of offloading the connection for socket so
- */
- int (*tod_can_offload)(struct toedev *dev, struct socket *so);
-
- /*
- * Establish a connection to nam using the TOE device dev
- */
- int (*tod_connect)(struct toedev *dev, struct socket *so,
- struct rtentry *rt, struct sockaddr *nam);
- /*
- * Send an mbuf down to the toe device
- */
- int (*tod_send)(struct toedev *dev, struct mbuf *m);
- /*
- * Receive an array of mbufs from the TOE device dev
- */
- int (*tod_recv)(struct toedev *dev, struct mbuf **m, int n);
- /*
- * Device specific ioctl interface
- */
- int (*tod_ctl)(struct toedev *dev, unsigned int req, void *data);
- /*
- * Update L2 entry in toedev
- */
- void (*tod_arp_update)(struct toedev *dev, struct rtentry *neigh);
- /*
- * Failover from one toe device to another
- */
- void (*tod_failover)(struct toedev *dev, struct ifnet *bond_ifp,
- struct ifnet *ndev, int event);
- void *tod_priv; /* driver private data */
- void *tod_l2opt; /* optional layer 2 data */
- void *tod_l3opt; /* optional layer 3 data */
- void *tod_l4opt; /* optional layer 4 data */
- void *tod_ulp; /* upper lever protocol */
-};
-
-struct tom_info {
- TAILQ_ENTRY(tom_info) entry;
- int (*ti_attach)(struct toedev *dev,
- const struct offload_id *entry);
- int (*ti_detach)(struct toedev *dev);
- const char *ti_name;
- const struct offload_id *ti_id_table;
-};
-
-static __inline void
-init_offload_dev(struct toedev *dev)
-{
-}
-
-int register_tom(struct tom_info *t);
-int unregister_tom(struct tom_info *t);
-int register_toedev(struct toedev *dev, const char *name);
-int unregister_toedev(struct toedev *dev);
-int activate_offload(struct toedev *dev);
-int toe_send(struct toedev *dev, struct mbuf *m);
-void toe_arp_update(struct rtentry *rt);
-struct ifnet *offload_get_phys_egress(struct ifnet *ifp,
- struct socket *so, int context);
-int toe_receive_mbuf(struct toedev *dev, struct mbuf **m, int n);
-
-static __inline void
-toe_neigh_update(struct ifnet *ifp)
-{
-}
-
-static __inline void
-toe_failover(struct ifnet *bond_ifp, struct ifnet *fail_ifp, int event)
-{
-}
-
-static __inline int
-toe_enslave(struct ifnet *bond_ifp, struct ifnet *slave_ifp)
-{
- return (0);
-}
-
-#endif /* _NETINET_TOEDEV_H_ */
OpenPOWER on IntegriCloud