diff options
Diffstat (limited to 'sys/netinet/tcp_offload.h')
-rw-r--r-- | sys/netinet/tcp_offload.h | 364 |
1 files changed, 29 insertions, 335 deletions
diff --git a/sys/netinet/tcp_offload.h b/sys/netinet/tcp_offload.h index 313185f..a052366 100644 --- a/sys/netinet/tcp_offload.h +++ b/sys/netinet/tcp_offload.h @@ -1,30 +1,30 @@ /*- - * Copyright (c) 2007, Chelsio Inc. + * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Neither the name of the Chelsio Corporation nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. * * $FreeBSD$ + * */ #ifndef _NETINET_TCP_OFFLOAD_H_ @@ -34,321 +34,15 @@ #error "no user-serviceable parts inside" #endif -/* - * A driver publishes that it provides offload services - * by setting IFCAP_TOE in the ifnet. The offload connect - * will bypass any further work if the interface that a - * connection would use does not support TCP offload. - * - * The TOE API assumes that the tcp offload engine can offload the - * the entire connection from set up to teardown, with some provision - * being made to allowing the software stack to handle time wait. If - * the device does not meet these criteria, it is the driver's responsibility - * to overload the functions that it needs to in tcp_usrreqs and make - * its own calls to tcp_output if it needs to do so. - * - * There is currently no provision for the device advertising the congestion - * control algorithms it supports as there is currently no API for querying - * an operating system for the protocols that it has loaded. This is a desirable - * future extension. - * - * - * - * It is assumed that individuals deploying TOE will want connections - * to be offloaded without software changes so all connections on an - * interface providing TOE are offloaded unless the SO_NO_OFFLOAD - * flag is set on the socket. - * - * - * The toe_usrreqs structure constitutes the TOE driver's - * interface to the TCP stack for functionality that doesn't - * interact directly with userspace. If one wants to provide - * (optional) functionality to do zero-copy to/from - * userspace one still needs to override soreceive/sosend - * with functions that fault in and pin the user buffers. - * - * + tu_send - * - tells the driver that new data may have been added to the - * socket's send buffer - the driver should not fail if the - * buffer is in fact unchanged - * - the driver is responsible for providing credits (bytes in the send window) - * back to the socket by calling sbdrop() as segments are acknowledged. - * - The driver expects the inpcb lock to be held - the driver is expected - * not to drop the lock. Hence the driver is not allowed to acquire the - * pcbinfo lock during this call. - * - * + tu_rcvd - * - returns credits to the driver and triggers window updates - * to the peer (a credit as used here is a byte in the peer's receive window) - * - the driver is expected to determine how many bytes have been - * consumed and credit that back to the card so that it can grow - * the window again by maintaining its own state between invocations. - * - In principle this could be used to shrink the window as well as - * grow the window, although it is not used for that now. - * - this function needs to correctly handle being called any number of - * times without any bytes being consumed from the receive buffer. - * - The driver expects the inpcb lock to be held - the driver is expected - * not to drop the lock. Hence the driver is not allowed to acquire the - * pcbinfo lock during this call. - * - * + tu_disconnect - * - tells the driver to send FIN to peer - * - driver is expected to send the remaining data and then do a clean half close - * - disconnect implies at least half-close so only send, reset, and detach - * are legal - * - the driver is expected to handle transition through the shutdown - * state machine and allow the stack to support SO_LINGER. - * - The driver expects the inpcb lock to be held - the driver is expected - * not to drop the lock. Hence the driver is not allowed to acquire the - * pcbinfo lock during this call. - * - * + tu_reset - * - closes the connection and sends a RST to peer - * - driver is expectd to trigger an RST and detach the toepcb - * - no further calls are legal after reset - * - The driver expects the inpcb lock to be held - the driver is expected - * not to drop the lock. Hence the driver is not allowed to acquire the - * pcbinfo lock during this call. - * - * The following fields in the tcpcb are expected to be referenced by the driver: - * + iss - * + rcv_nxt - * + rcv_wnd - * + snd_isn - * + snd_max - * + snd_nxt - * + snd_una - * + t_flags - * + t_inpcb - * + t_maxseg - * + t_toe - * - * The following fields in the inpcb are expected to be referenced by the driver: - * + inp_lport - * + inp_fport - * + inp_laddr - * + inp_fport - * + inp_socket - * + inp_ip_tos - * - * The following fields in the socket are expected to be referenced by the - * driver: - * + so_comp - * + so_error - * + so_linger - * + so_options - * + so_rcv - * + so_snd - * + so_state - * + so_timeo - * - * These functions all return 0 on success and can return the following errors - * as appropriate: - * + EPERM: - * + ENOBUFS: memory allocation failed - * + EMSGSIZE: MTU changed during the call - * + EHOSTDOWN: - * + EHOSTUNREACH: - * + ENETDOWN: - * * ENETUNREACH: the peer is no longer reachable - * - * + tu_detach - * - tells driver that the socket is going away so disconnect - * the toepcb and free appropriate resources - * - allows the driver to cleanly handle the case of connection state - * outliving the socket - * - no further calls are legal after detach - * - the driver is expected to provide its own synchronization between - * detach and receiving new data. - * - * + tu_syncache_event - * - even if it is not actually needed, the driver is expected to - * call syncache_add for the initial SYN and then syncache_expand - * for the SYN,ACK - * - tells driver that a connection either has not been added or has - * been dropped from the syncache - * - the driver is expected to maintain state that lives outside the - * software stack so the syncache needs to be able to notify the - * toe driver that the software stack is not going to create a connection - * for a received SYN - * - The driver is responsible for any synchronization required between - * the syncache dropping an entry and the driver processing the SYN,ACK. - * - */ -struct toe_usrreqs { - int (*tu_send)(struct tcpcb *tp); - int (*tu_rcvd)(struct tcpcb *tp); - int (*tu_disconnect)(struct tcpcb *tp); - int (*tu_reset)(struct tcpcb *tp); - void (*tu_detach)(struct tcpcb *tp); - void (*tu_syncache_event)(int event, void *toep); -}; - -/* - * Proxy for struct tcpopt between TOE drivers and TCP functions. - */ -struct toeopt { - u_int64_t to_flags; /* see tcpopt in tcp_var.h */ - u_int16_t to_mss; /* maximum segment size */ - u_int8_t to_wscale; /* window scaling */ +extern int registered_toedevs; - u_int8_t _pad1; /* explicit pad for 64bit alignment */ - u_int32_t _pad2; /* explicit pad for 64bit alignment */ - u_int64_t _pad3[4]; /* TBD */ -}; +int tcp_offload_connect(struct socket *, struct sockaddr *); +void tcp_offload_listen_start(struct tcpcb *); +void tcp_offload_listen_stop(struct tcpcb *); +void tcp_offload_input(struct tcpcb *, struct mbuf *); +int tcp_offload_output(struct tcpcb *); +void tcp_offload_rcvd(struct tcpcb *); +void tcp_offload_ctloutput(struct tcpcb *, int, int); +void tcp_offload_detach(struct tcpcb *); -#define TOE_SC_ENTRY_PRESENT 1 /* 4-tuple already present */ -#define TOE_SC_DROP 2 /* connection was timed out */ - -/* - * Because listen is a one-to-many relationship (a socket can be listening - * on all interfaces on a machine some of which may be using different TCP - * offload devices), listen uses a publish/subscribe mechanism. The TCP - * offload driver registers a listen notification function with the stack. - * When a listen socket is created all TCP offload devices are notified - * so that they can do the appropriate set up to offload connections on the - * port to which the socket is bound. When the listen socket is closed, - * the offload devices are notified so that they will stop listening on that - * port and free any associated resources as well as sending RSTs on any - * connections in the SYN_RCVD state. - * - */ - -typedef void (*tcp_offload_listen_start_fn)(void *, struct tcpcb *); -typedef void (*tcp_offload_listen_stop_fn)(void *, struct tcpcb *); - -EVENTHANDLER_DECLARE(tcp_offload_listen_start, tcp_offload_listen_start_fn); -EVENTHANDLER_DECLARE(tcp_offload_listen_stop, tcp_offload_listen_stop_fn); - -/* - * Check if the socket can be offloaded by the following steps: - * - determine the egress interface - * - check the interface for TOE capability and TOE is enabled - * - check if the device has resources to offload the connection - */ -int tcp_offload_connect(struct socket *so, struct sockaddr *nam); - -/* - * The tcp_output_* routines are wrappers around the toe_usrreqs calls - * which trigger packet transmission. In the non-offloaded case they - * translate to tcp_output. The tcp_offload_* routines notify TOE - * of specific events. I the non-offloaded case they are no-ops. - * - * Listen is a special case because it is a 1 to many relationship - * and there can be more than one offload driver in the system. - */ - -/* - * Connection is offloaded - */ -#define tp_offload(tp) ((tp)->t_flags & TF_TOE) - -/* - * hackish way of allowing this file to also be included by TOE - * which needs to be kept ignorant of socket implementation details - */ -#ifdef _SYS_SOCKETVAR_H_ -/* - * The socket has not been marked as "do not offload" - */ -#define SO_OFFLOADABLE(so) ((so->so_options & SO_NO_OFFLOAD) == 0) - -static __inline int -tcp_output_connect(struct socket *so, struct sockaddr *nam) -{ - struct tcpcb *tp = sototcpcb(so); - int error; - - /* - * If offload has been disabled for this socket or the - * connection cannot be offloaded just call tcp_output - * to start the TCP state machine. - */ -#ifndef TCP_OFFLOAD_DISABLE - if (!SO_OFFLOADABLE(so) || (error = tcp_offload_connect(so, nam)) != 0) -#endif - error = tcp_output(tp); - return (error); -} - -static __inline int -tcp_output_send(struct tcpcb *tp) -{ - -#ifndef TCP_OFFLOAD_DISABLE - if (tp_offload(tp)) - return (tp->t_tu->tu_send(tp)); -#endif - return (tcp_output(tp)); -} - -static __inline int -tcp_output_rcvd(struct tcpcb *tp) -{ - -#ifndef TCP_OFFLOAD_DISABLE - if (tp_offload(tp)) - return (tp->t_tu->tu_rcvd(tp)); #endif - return (tcp_output(tp)); -} - -static __inline int -tcp_output_disconnect(struct tcpcb *tp) -{ - -#ifndef TCP_OFFLOAD_DISABLE - if (tp_offload(tp)) - return (tp->t_tu->tu_disconnect(tp)); -#endif - return (tcp_output(tp)); -} - -static __inline int -tcp_output_reset(struct tcpcb *tp) -{ - -#ifndef TCP_OFFLOAD_DISABLE - if (tp_offload(tp)) - return (tp->t_tu->tu_reset(tp)); -#endif - return (tcp_output(tp)); -} - -static __inline void -tcp_offload_detach(struct tcpcb *tp) -{ - -#ifndef TCP_OFFLOAD_DISABLE - if (tp_offload(tp)) - tp->t_tu->tu_detach(tp); -#endif -} - -static __inline void -tcp_offload_listen_open(struct tcpcb *tp) -{ - -#ifndef TCP_OFFLOAD_DISABLE - if (SO_OFFLOADABLE(tp->t_inpcb->inp_socket)) - EVENTHANDLER_INVOKE(tcp_offload_listen_start, tp); -#endif -} - -static __inline void -tcp_offload_listen_close(struct tcpcb *tp) -{ - -#ifndef TCP_OFFLOAD_DISABLE - EVENTHANDLER_INVOKE(tcp_offload_listen_stop, tp); -#endif -} -#undef SO_OFFLOADABLE -#endif /* _SYS_SOCKETVAR_H_ */ -#undef tp_offload - -void tcp_offload_twstart(struct tcpcb *tp); -struct tcpcb *tcp_offload_close(struct tcpcb *tp); -struct tcpcb *tcp_offload_drop(struct tcpcb *tp, int error); - -#endif /* _NETINET_TCP_OFFLOAD_H_ */ |