diff options
Diffstat (limited to 'sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c')
-rw-r--r-- | sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c | 1779 |
1 files changed, 1779 insertions, 0 deletions
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c new file mode 100644 index 0000000..cec4611 --- /dev/null +++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c @@ -0,0 +1,1779 @@ +/************************************************************************** + +Copyright (c) 2007, Chelsio Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Neither the name of the Chelsio Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +***************************************************************************/ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/bus.h> +#include <sys/module.h> +#include <sys/pciio.h> +#include <sys/conf.h> +#include <machine/bus.h> +#include <machine/resource.h> +#include <sys/bus_dma.h> +#include <sys/rman.h> +#include <sys/ioccom.h> +#include <sys/mbuf.h> +#include <sys/rwlock.h> +#include <sys/linker.h> +#include <sys/firmware.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/sockio.h> +#include <sys/smp.h> +#include <sys/sysctl.h> +#include <sys/syslog.h> +#include <sys/queue.h> +#include <sys/taskqueue.h> +#include <sys/proc.h> +#include <sys/uio.h> + +#include <net/route.h> +#include <netinet/in_systm.h> +#include <netinet/in.h> +#include <netinet/in_pcb.h> +#include <netinet/ip.h> +#include <netinet/ip_var.h> +#include <netinet/tcp_var.h> +#include <netinet/tcp.h> +#include <netinet/tcpip.h> + +#include <contrib/rdma/ib_verbs.h> + + +#ifdef CONFIG_DEFINED +#include <cxgb_include.h> +#include <ulp/tom/cxgb_tom.h> +#include <ulp/tom/cxgb_t3_ddp.h> +#include <ulp/tom/cxgb_defs.h> +#include <ulp/tom/cxgb_toepcb.h> +#include <ulp/iw_cxgb/iw_cxgb_wr.h> +#include <ulp/iw_cxgb/iw_cxgb_hal.h> +#include <ulp/iw_cxgb/iw_cxgb_provider.h> +#include <ulp/iw_cxgb/iw_cxgb_cm.h> +#include <ulp/iw_cxgb/iw_cxgb.h> +#else +#include <dev/cxgb/cxgb_include.h> +#include <dev/cxgb/ulp/tom/cxgb_tom.h> +#include <dev/ulp/tom/cxgb_t3_ddp.h> +#include <dev/cxgb/ulp/tom/cxgb_defs.h> +#include <dev/cxgb/ulp/tom/cxgb_toepcb.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb.h> +#endif + +#ifdef KTR +static char *states[] = { + "idle", + "listen", + "connecting", + "mpa_wait_req", + "mpa_req_sent", + "mpa_req_rcvd", + "mpa_rep_sent", + "fpdu_mode", + "aborting", + "closing", + "moribund", + "dead", + NULL, +}; +#endif + +SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "iw_cxgb driver parameters"); + +static int ep_timeout_secs = 10; +TUNABLE_INT("hw.iw_cxgb.ep_timeout_secs", &ep_timeout_secs); +SYSCTL_UINT(_hw_cxgb, OID_AUTO, ep_timeout_secs, CTLFLAG_RDTUN, &ep_timeout_secs, 0, + "CM Endpoint operation timeout in seconds (default=10)"); + +static int mpa_rev = 1; +TUNABLE_INT("hw.iw_cxgb.mpa_rev", &mpa_rev); +SYSCTL_UINT(_hw_cxgb, OID_AUTO, mpa_rev, CTLFLAG_RDTUN, &mpa_rev, 0, + "MPA Revision, 0 supports amso1100, 1 is spec compliant. (default=1)"); + +static int markers_enabled = 0; +TUNABLE_INT("hw.iw_cxgb.markers_enabled", &markers_enabled); +SYSCTL_UINT(_hw_cxgb, OID_AUTO, markers_enabled, CTLFLAG_RDTUN, &markers_enabled, 0, + "Enable MPA MARKERS (default(0)=disabled)"); + +static int crc_enabled = 1; +TUNABLE_INT("hw.iw_cxgb.crc_enabled", &crc_enabled); +SYSCTL_UINT(_hw_cxgb, OID_AUTO, crc_enabled, CTLFLAG_RDTUN, &crc_enabled, 0, + "Enable MPA CRC (default(1)=enabled)"); + +static int rcv_win = 256 * 1024; +TUNABLE_INT("hw.iw_cxgb.rcv_win", &rcv_win); +SYSCTL_UINT(_hw_cxgb, OID_AUTO, rcv_win, CTLFLAG_RDTUN, &rcv_win, 0, + "TCP receive window in bytes (default=256KB)"); + +static int snd_win = 32 * 1024; +TUNABLE_INT("hw.iw_cxgb.snd_win", &snd_win); +SYSCTL_UINT(_hw_cxgb, OID_AUTO, snd_win, CTLFLAG_RDTUN, &snd_win, 0, + "TCP send window in bytes (default=32KB)"); + +static unsigned int nocong = 0; +TUNABLE_INT("hw.iw_cxgb.nocong", &nocong); +SYSCTL_UINT(_hw_cxgb, OID_AUTO, nocong, CTLFLAG_RDTUN, &nocong, 0, + "Turn off congestion control (default=0)"); + +static unsigned int cong_flavor = 1; +TUNABLE_INT("hw.iw_cxgb.cong_flavor", &cong_flavor); +SYSCTL_UINT(_hw_cxgb, OID_AUTO, cong_flavor, CTLFLAG_RDTUN, &cong_flavor, 0, + "TCP Congestion control flavor (default=1)"); + +static void ep_timeout(void *arg); +static void connect_reply_upcall(struct iwch_ep *ep, int status); +static void iwch_so_upcall(struct socket *so, void *arg, int waitflag); + +/* + * Cruft to offload socket upcalls onto thread. + */ +static struct mtx req_lock; +static TAILQ_HEAD(iwch_ep_list, iwch_ep_common) req_list; +static struct task iw_cxgb_task; +static struct taskqueue *iw_cxgb_taskq; +static void process_req(void *ctx, int pending); + +static void +start_ep_timer(struct iwch_ep *ep) +{ + CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); + if (callout_pending(&ep->timer)) { + CTR2(KTR_IW_CXGB, "%s stopped / restarted timer ep %p", __FUNCTION__, ep); + callout_deactivate(&ep->timer); + callout_drain(&ep->timer); + } else { + /* + * XXX this looks racy + */ + get_ep(&ep->com); + callout_init(&ep->timer, TRUE); + } + callout_reset(&ep->timer, ep_timeout_secs * hz, ep_timeout, ep); +} + +static void +stop_ep_timer(struct iwch_ep *ep) +{ + CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); + callout_drain(&ep->timer); + put_ep(&ep->com); +} + +static int set_tcpinfo(struct iwch_ep *ep) +{ + struct tcp_info ti; + struct sockopt sopt; + int err; + + sopt.sopt_dir = SOPT_GET; + sopt.sopt_level = IPPROTO_TCP; + sopt.sopt_name = TCP_INFO; + sopt.sopt_val = (caddr_t)&ti; + sopt.sopt_valsize = sizeof ti; + sopt.sopt_td = NULL; + + err = sogetopt(ep->com.so, &sopt); + if (err) { + printf("%s can't get tcpinfo\n", __FUNCTION__); + return -err; + } + if (!(ti.tcpi_options & TCPI_OPT_TOE)) { + printf("%s connection NOT OFFLOADED!\n", __FUNCTION__); + return -EINVAL; + } + + ep->snd_seq = ti.tcpi_snd_nxt; + ep->rcv_seq = ti.tcpi_rcv_nxt; + ep->emss = ti.__tcpi_snd_mss - sizeof(struct tcpiphdr); + ep->hwtid = TOEPCB(ep->com.so)->tp_tid; /* XXX */ + if (ti.tcpi_options & TCPI_OPT_TIMESTAMPS) + ep->emss -= 12; + if (ep->emss < 128) + ep->emss = 128; + return 0; +} + +static enum iwch_ep_state +state_read(struct iwch_ep_common *epc) +{ + enum iwch_ep_state state; + + mtx_lock(&epc->lock); + state = epc->state; + mtx_unlock(&epc->lock); + return state; +} + +static void +__state_set(struct iwch_ep_common *epc, enum iwch_ep_state new) +{ + epc->state = new; +} + +static void +state_set(struct iwch_ep_common *epc, enum iwch_ep_state new) +{ + + mtx_lock(&epc->lock); + CTR3(KTR_IW_CXGB, "%s - %s -> %s", __FUNCTION__, states[epc->state], states[new]); + __state_set(epc, new); + mtx_unlock(&epc->lock); + return; +} + +static void * +alloc_ep(int size, int flags) +{ + struct iwch_ep_common *epc; + + epc = malloc(size, M_DEVBUF, flags); + if (epc) { + memset(epc, 0, size); + refcount_init(&epc->refcount, 1); + mtx_init(&epc->lock, "iwch_epc lock", NULL, MTX_DEF|MTX_DUPOK); + cv_init(&epc->waitq, "iwch_epc cv"); + } + CTR2(KTR_IW_CXGB, "%s alloc ep %p", __FUNCTION__, epc); + return epc; +} + +void __free_ep(struct iwch_ep_common *epc) +{ + CTR3(KTR_IW_CXGB, "%s ep %p state %s", __FUNCTION__, epc, states[state_read(epc)]); + KASSERT(!epc->so, ("%s warning ep->so %p \n", __FUNCTION__, epc->so)); + KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list!\n", __FUNCTION__, epc)); + free(epc, M_DEVBUF); +} + +int +iwch_quiesce_tid(struct iwch_ep *ep) +{ +#ifdef notyet + struct cpl_set_tcb_field *req; + struct mbuf *m = get_mbuf(NULL, sizeof(*req), M_NOWAIT); + + if (m == NULL) + return (-ENOMEM); + req = (struct cpl_set_tcb_field *) mbuf_put(m, sizeof(*req)); + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid)); + req->reply = 0; + req->cpu_idx = 0; + req->word = htons(W_TCB_RX_QUIESCE); + req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE); + req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE); + + m_set_priority(m, CPL_PRIORITY_DATA); + cxgb_ofld_send(ep->com.tdev, m); +#endif + return 0; +} + +int +iwch_resume_tid(struct iwch_ep *ep) +{ +#ifdef notyet + struct cpl_set_tcb_field *req; + struct mbuf *m = get_mbuf(NULL, sizeof(*req), M_NOWAIT); + + if (m == NULL) + return (-ENOMEM); + req = (struct cpl_set_tcb_field *) mbuf_put(m, sizeof(*req)); + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid)); + req->reply = 0; + req->cpu_idx = 0; + req->word = htons(W_TCB_RX_QUIESCE); + req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE); + req->val = 0; + + m_set_priority(m, CPL_PRIORITY_DATA); + cxgb_ofld_send(ep->com.tdev, m); +#endif + return 0; +} + +static struct rtentry * +find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port, + __be16 peer_port, u8 tos) +{ + struct route iproute; + struct sockaddr_in *dst = (struct sockaddr_in *)&iproute.ro_dst; + + bzero(&iproute, sizeof iproute); + dst->sin_family = AF_INET; + dst->sin_len = sizeof *dst; + dst->sin_addr.s_addr = peer_ip; + + rtalloc(&iproute); + return iproute.ro_rt; +} + +static void +close_socket(struct iwch_ep_common *epc) +{ + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]); + SOCK_LOCK(epc->so); + epc->so->so_upcall = NULL; + epc->so->so_upcallarg = NULL; + epc->so->so_rcv.sb_flags &= ~SB_UPCALL; + SOCK_UNLOCK(epc->so); + soshutdown(epc->so, SHUT_WR|SHUT_RD); + epc->so = NULL; +} + +static void +shutdown_socket(struct iwch_ep_common *epc) +{ + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]); + soshutdown(epc->so, SHUT_WR); +} + +static void +abort_socket(struct iwch_ep *ep) +{ + struct sockopt sopt; + int err; + struct linger l; + + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); + l.l_onoff = 1; + l.l_linger = 0; + + /* linger_time of 0 forces RST to be sent */ + sopt.sopt_dir = SOPT_SET; + sopt.sopt_level = SOL_SOCKET; + sopt.sopt_name = SO_LINGER; + sopt.sopt_val = (caddr_t)&l; + sopt.sopt_valsize = sizeof l; + sopt.sopt_td = NULL; + err = sosetopt(ep->com.so, &sopt); + if (err) + printf("%s can't set linger to 0, no RST! err %d\n", __FUNCTION__, err); +} + +static void +send_mpa_req(struct iwch_ep *ep) +{ + int mpalen; + struct mpa_message *mpa; + struct mbuf *m; + int err; + + CTR3(KTR_IW_CXGB, "%s ep %p pd_len %d", __FUNCTION__, ep, ep->plen); + + mpalen = sizeof(*mpa) + ep->plen; + m = m_gethdr(mpalen, M_NOWAIT); + if (m == NULL) { + connect_reply_upcall(ep, -ENOMEM); + return; + } + mpa = mtod(m, struct mpa_message *); + m->m_len = mpalen; + m->m_pkthdr.len = mpalen; + memset(mpa, 0, sizeof(*mpa)); + memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); + mpa->flags = (crc_enabled ? MPA_CRC : 0) | + (markers_enabled ? MPA_MARKERS : 0); + mpa->private_data_size = htons(ep->plen); + mpa->revision = mpa_rev; + if (ep->plen) + memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen); + + err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread); + if (err) { + m_freem(m); + connect_reply_upcall(ep, -ENOMEM); + return; + } + + start_ep_timer(ep); + state_set(&ep->com, MPA_REQ_SENT); + return; +} + +static int +send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen) +{ + int mpalen; + struct mpa_message *mpa; + struct mbuf *m; + int err; + + CTR3(KTR_IW_CXGB, "%s ep %p plen %d", __FUNCTION__, ep, plen); + + mpalen = sizeof(*mpa) + plen; + + m = m_gethdr(mpalen, M_NOWAIT); + if (m == NULL) { + printf("%s - cannot alloc mbuf!\n", __FUNCTION__); + return (-ENOMEM); + } + mpa = mtod(m, struct mpa_message *); + m->m_len = mpalen; + m->m_pkthdr.len = mpalen; + memset(mpa, 0, sizeof(*mpa)); + memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); + mpa->flags = MPA_REJECT; + mpa->revision = mpa_rev; + mpa->private_data_size = htons(plen); + if (plen) + memcpy(mpa->private_data, pdata, plen); + err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread); + PANIC_IF(err); + return 0; +} + +static int +send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen) +{ + int mpalen; + struct mpa_message *mpa; + struct mbuf *m; + + CTR4(KTR_IW_CXGB, "%s ep %p so %p plen %d", __FUNCTION__, ep, ep->com.so, plen); + + mpalen = sizeof(*mpa) + plen; + + m = m_gethdr(mpalen, M_NOWAIT); + if (m == NULL) { + printf("%s - cannot alloc mbuf!\n", __FUNCTION__); + return (-ENOMEM); + } + mpa = mtod(m, struct mpa_message *); + m->m_len = mpalen; + m->m_pkthdr.len = mpalen; + memset(mpa, 0, sizeof(*mpa)); + memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); + mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) | + (markers_enabled ? MPA_MARKERS : 0); + mpa->revision = mpa_rev; + mpa->private_data_size = htons(plen); + if (plen) + memcpy(mpa->private_data, pdata, plen); + + state_set(&ep->com, MPA_REP_SENT); + return sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, + ep->com.thread); +} + +static void +close_complete_upcall(struct iwch_ep *ep) +{ + struct iw_cm_event event; + + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_CLOSE; + if (ep->com.cm_id) { + CTR3(KTR_IW_CXGB, "close complete delivered ep %p cm_id %p tid %d", + ep, ep->com.cm_id, ep->hwtid); + ep->com.cm_id->event_handler(ep->com.cm_id, &event); + ep->com.cm_id->rem_ref(ep->com.cm_id); + ep->com.cm_id = NULL; + ep->com.qp = NULL; + } +} + +static void +abort_connection(struct iwch_ep *ep) +{ + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); + state_set(&ep->com, ABORTING); + abort_socket(ep); + close_socket(&ep->com); + close_complete_upcall(ep); + state_set(&ep->com, DEAD); + put_ep(&ep->com); +} + +static void +peer_close_upcall(struct iwch_ep *ep) +{ + struct iw_cm_event event; + + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_DISCONNECT; + if (ep->com.cm_id) { + CTR3(KTR_IW_CXGB, "peer close delivered ep %p cm_id %p tid %d", + ep, ep->com.cm_id, ep->hwtid); + ep->com.cm_id->event_handler(ep->com.cm_id, &event); + } +} + +static void +peer_abort_upcall(struct iwch_ep *ep) +{ + struct iw_cm_event event; + + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_CLOSE; + event.status = ECONNRESET; + if (ep->com.cm_id) { + CTR3(KTR_IW_CXGB, "abort delivered ep %p cm_id %p tid %d", ep, + ep->com.cm_id, ep->hwtid); + ep->com.cm_id->event_handler(ep->com.cm_id, &event); + ep->com.cm_id->rem_ref(ep->com.cm_id); + ep->com.cm_id = NULL; + ep->com.qp = NULL; + } +} + +static void +connect_reply_upcall(struct iwch_ep *ep, int status) +{ + struct iw_cm_event event; + + CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s status %d", __FUNCTION__, ep, ep->com.so, states[ep->com.state], status); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_CONNECT_REPLY; + event.status = status; + event.local_addr = ep->com.local_addr; + event.remote_addr = ep->com.remote_addr; + + if ((status == 0) || (status == ECONNREFUSED)) { + event.private_data_len = ep->plen; + event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); + } + if (ep->com.cm_id) { + CTR4(KTR_IW_CXGB, "%s ep %p tid %d status %d", __FUNCTION__, ep, + ep->hwtid, status); + ep->com.cm_id->event_handler(ep->com.cm_id, &event); + } + if (status < 0) { + ep->com.cm_id->rem_ref(ep->com.cm_id); + ep->com.cm_id = NULL; + ep->com.qp = NULL; + } +} + +static void +connect_request_upcall(struct iwch_ep *ep) +{ + struct iw_cm_event event; + + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_CONNECT_REQUEST; + event.local_addr = ep->com.local_addr; + event.remote_addr = ep->com.remote_addr; + event.private_data_len = ep->plen; + event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); + event.provider_data = ep; + event.so = ep->com.so; + if (state_read(&ep->parent_ep->com) != DEAD) + ep->parent_ep->com.cm_id->event_handler( + ep->parent_ep->com.cm_id, + &event); + put_ep(&ep->parent_ep->com); + ep->parent_ep = NULL; +} + +static void +established_upcall(struct iwch_ep *ep) +{ + struct iw_cm_event event; + + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_ESTABLISHED; + if (ep->com.cm_id) { + CTR3(KTR_IW_CXGB, "%s ep %p tid %d", __FUNCTION__, ep, ep->hwtid); + ep->com.cm_id->event_handler(ep->com.cm_id, &event); + } +} + +static void +process_mpa_reply(struct iwch_ep *ep) +{ + struct mpa_message *mpa; + u16 plen; + struct iwch_qp_attributes attrs; + enum iwch_qp_attr_mask mask; + int err; + struct mbuf *top, *m; + int flags = MSG_DONTWAIT; + struct uio uio; + int len; + + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); + + /* + * Stop mpa timer. If it expired, then the state has + * changed and we bail since ep_timeout already aborted + * the connection. + */ + stop_ep_timer(ep); + if (state_read(&ep->com) != MPA_REQ_SENT) + return; + + uio.uio_resid = len = 1000000; + uio.uio_td = ep->com.thread; + err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags); + if (err) { + if (err == EWOULDBLOCK) { + start_ep_timer(ep); + return; + } + err = -err; + goto err; + } + + if (ep->com.so->so_rcv.sb_mb) { + printf("%s data after soreceive called! so %p sb_mb %p top %p\n", + __FUNCTION__, ep->com.so, ep->com.so->so_rcv.sb_mb, top); + } + + m = top; + do { + /* + * If we get more than the supported amount of private data + * then we must fail this connection. + */ + if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) { + err = (-EINVAL); + goto err; + } + + /* + * copy the new data into our accumulation buffer. + */ + m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len])); + ep->mpa_pkt_len += m->m_len; + if (!m->m_next) + m = m->m_nextpkt; + else + m = m->m_next; + } while (m); + + m_freem(top); + + /* + * if we don't even have the mpa message, then bail. + */ + if (ep->mpa_pkt_len < sizeof(*mpa)) + return; + mpa = (struct mpa_message *)ep->mpa_pkt; + + /* Validate MPA header. */ + if (mpa->revision != mpa_rev) { + CTR2(KTR_IW_CXGB, "%s bad mpa rev %d", __FUNCTION__, mpa->revision); + err = EPROTO; + goto err; + } + if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) { + CTR2(KTR_IW_CXGB, "%s bad mpa key |%16s|", __FUNCTION__, mpa->key); + err = EPROTO; + goto err; + } + + plen = ntohs(mpa->private_data_size); + + /* + * Fail if there's too much private data. + */ + if (plen > MPA_MAX_PRIVATE_DATA) { + CTR2(KTR_IW_CXGB, "%s plen too big %d", __FUNCTION__, plen); + err = EPROTO; + goto err; + } + + /* + * If plen does not account for pkt size + */ + if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { + CTR2(KTR_IW_CXGB, "%s pkt too big %d", __FUNCTION__, ep->mpa_pkt_len); + err = EPROTO; + goto err; + } + + ep->plen = (u8) plen; + + /* + * If we don't have all the pdata yet, then bail. + * We'll continue process when more data arrives. + */ + if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) + return; + + if (mpa->flags & MPA_REJECT) { + err = ECONNREFUSED; + goto err; + } + + /* + * If we get here we have accumulated the entire mpa + * start reply message including private data. And + * the MPA header is valid. + */ + CTR1(KTR_IW_CXGB, "%s mpa rpl looks good!", __FUNCTION__); + state_set(&ep->com, FPDU_MODE); + ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; + ep->mpa_attr.recv_marker_enabled = markers_enabled; + ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; + ep->mpa_attr.version = mpa_rev; + if (set_tcpinfo(ep)) { + printf("%s set_tcpinfo error\n", __FUNCTION__); + goto err; + } + CTR5(KTR_IW_CXGB, "%s - crc_enabled=%d, recv_marker_enabled=%d, " + "xmit_marker_enabled=%d, version=%d", __FUNCTION__, + ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, + ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); + + attrs.mpa_attr = ep->mpa_attr; + attrs.max_ird = ep->ird; + attrs.max_ord = ep->ord; + attrs.llp_stream_handle = ep; + attrs.next_state = IWCH_QP_STATE_RTS; + + mask = IWCH_QP_ATTR_NEXT_STATE | + IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR | + IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD; + + /* bind QP and TID with INIT_WR */ + err = iwch_modify_qp(ep->com.qp->rhp, + ep->com.qp, mask, &attrs, 1); + if (!err) + goto out; +err: + abort_connection(ep); +out: + connect_reply_upcall(ep, err); + return; +} + +static void +process_mpa_request(struct iwch_ep *ep) +{ + struct mpa_message *mpa; + u16 plen; + int flags = MSG_DONTWAIT; + struct mbuf *top, *m; + int err; + struct uio uio; + int len; + + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); + + /* + * Stop mpa timer. If it expired, then the state has + * changed and we bail since ep_timeout already aborted + * the connection. + */ + stop_ep_timer(ep); + if (state_read(&ep->com) != MPA_REQ_WAIT) + return; + + uio.uio_resid = len = 1000000; + uio.uio_td = ep->com.thread; + err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags); + if (err) { + if (err == EWOULDBLOCK) { + start_ep_timer(ep); + return; + } + err = -err; + goto err; + } + + m = top; + do { + + /* + * If we get more than the supported amount of private data + * then we must fail this connection. + */ + if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) { + CTR2(KTR_IW_CXGB, "%s mpa message too big %d", __FUNCTION__, + ep->mpa_pkt_len + m->m_len); + goto err; + } + + + /* + * Copy the new data into our accumulation buffer. + */ + m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len])); + ep->mpa_pkt_len += m->m_len; + + if (!m->m_next) + m = m->m_nextpkt; + else + m = m->m_next; + } while (m); + + m_freem(top); + + /* + * If we don't even have the mpa message, then bail. + * We'll continue process when more data arrives. + */ + if (ep->mpa_pkt_len < sizeof(*mpa)) { + start_ep_timer(ep); + CTR2(KTR_IW_CXGB, "%s not enough header %d...waiting...", __FUNCTION__, + ep->mpa_pkt_len); + return; + } + mpa = (struct mpa_message *) ep->mpa_pkt; + + /* + * Validate MPA Header. + */ + if (mpa->revision != mpa_rev) { + CTR2(KTR_IW_CXGB, "%s bad mpa rev %d", __FUNCTION__, mpa->revision); + goto err; + } + + if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) { + CTR2(KTR_IW_CXGB, "%s bad mpa key |%16s|", __FUNCTION__, mpa->key); + goto err; + } + + plen = ntohs(mpa->private_data_size); + + /* + * Fail if there's too much private data. + */ + if (plen > MPA_MAX_PRIVATE_DATA) { + CTR2(KTR_IW_CXGB, "%s plen too big %d", __FUNCTION__, plen); + goto err; + } + + /* + * If plen does not account for pkt size + */ + if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { + CTR2(KTR_IW_CXGB, "%s more data after private data %d", __FUNCTION__, + ep->mpa_pkt_len); + goto err; + } + ep->plen = (u8) plen; + + /* + * If we don't have all the pdata yet, then bail. + */ + if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) { + start_ep_timer(ep); + CTR2(KTR_IW_CXGB, "%s more mpa msg to come %d", __FUNCTION__, + ep->mpa_pkt_len); + return; + } + + /* + * If we get here we have accumulated the entire mpa + * start reply message including private data. + */ + ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; + ep->mpa_attr.recv_marker_enabled = markers_enabled; + ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; + ep->mpa_attr.version = mpa_rev; + if (set_tcpinfo(ep)) { + printf("%s set_tcpinfo error\n", __FUNCTION__); + goto err; + } + CTR5(KTR_IW_CXGB, "%s - crc_enabled=%d, recv_marker_enabled=%d, " + "xmit_marker_enabled=%d, version=%d", __FUNCTION__, + ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, + ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); + + state_set(&ep->com, MPA_REQ_RCVD); + + /* drive upcall */ + connect_request_upcall(ep); + return; +err: + abort_connection(ep); + return; +} + +static void +process_peer_close(struct iwch_ep *ep) +{ + struct iwch_qp_attributes attrs; + int disconnect = 1; + int release = 0; + + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); + + mtx_lock(&ep->com.lock); + switch (ep->com.state) { + case MPA_REQ_WAIT: + __state_set(&ep->com, CLOSING); + break; + case MPA_REQ_SENT: + __state_set(&ep->com, CLOSING); + connect_reply_upcall(ep, -ECONNRESET); + break; + case MPA_REQ_RCVD: + + /* + * We're gonna mark this puppy DEAD, but keep + * the reference on it until the ULP accepts or + * rejects the CR. + */ + __state_set(&ep->com, CLOSING); + get_ep(&ep->com); + break; + case MPA_REP_SENT: + __state_set(&ep->com, CLOSING); + break; + case FPDU_MODE: + start_ep_timer(ep); + __state_set(&ep->com, CLOSING); + attrs.next_state = IWCH_QP_STATE_CLOSING; + iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, + IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); + peer_close_upcall(ep); + break; + case ABORTING: + disconnect = 0; + break; + case CLOSING: + __state_set(&ep->com, MORIBUND); + disconnect = 0; + break; + case MORIBUND: + stop_ep_timer(ep); + if (ep->com.cm_id && ep->com.qp) { + attrs.next_state = IWCH_QP_STATE_IDLE; + iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, + IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); + } + close_socket(&ep->com); + close_complete_upcall(ep); + __state_set(&ep->com, DEAD); + release = 1; + disconnect = 0; + break; + case DEAD: + disconnect = 0; + break; + default: + PANIC_IF(1); + } + mtx_unlock(&ep->com.lock); + if (disconnect) + iwch_ep_disconnect(ep, 0, M_NOWAIT); + if (release) + put_ep(&ep->com); + return; +} + +static void +process_conn_error(struct iwch_ep *ep) +{ + struct iwch_qp_attributes attrs; + int ret; + int state; + + state = state_read(&ep->com); + CTR5(KTR_IW_CXGB, "%s ep %p so %p so->so_error %u state %s", __FUNCTION__, ep, ep->com.so, ep->com.so->so_error, states[ep->com.state]); + switch (state) { + case MPA_REQ_WAIT: + stop_ep_timer(ep); + break; + case MPA_REQ_SENT: + stop_ep_timer(ep); + connect_reply_upcall(ep, -ECONNRESET); + break; + case MPA_REP_SENT: + ep->com.rpl_err = ECONNRESET; + CTR1(KTR_IW_CXGB, "waking up ep %p", ep); + break; + case MPA_REQ_RCVD: + + /* + * We're gonna mark this puppy DEAD, but keep + * the reference on it until the ULP accepts or + * rejects the CR. + */ + get_ep(&ep->com); + break; + case MORIBUND: + case CLOSING: + stop_ep_timer(ep); + /*FALLTHROUGH*/ + case FPDU_MODE: + if (ep->com.cm_id && ep->com.qp) { + attrs.next_state = IWCH_QP_STATE_ERROR; + ret = iwch_modify_qp(ep->com.qp->rhp, + ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, + &attrs, 1); + if (ret) + log(LOG_ERR, + "%s - qp <- error failed!\n", + __FUNCTION__); + } + peer_abort_upcall(ep); + break; + case ABORTING: + break; + case DEAD: + CTR2(KTR_IW_CXGB, "%s so_error %d IN DEAD STATE!!!!", __FUNCTION__, + ep->com.so->so_error); + return; + default: + PANIC_IF(1); + break; + } + + if (state != ABORTING) { + close_socket(&ep->com); + state_set(&ep->com, DEAD); + put_ep(&ep->com); + } + return; +} + +static void +process_close_complete(struct iwch_ep *ep) +{ + struct iwch_qp_attributes attrs; + int release = 0; + + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); + PANIC_IF(!ep); + + /* The cm_id may be null if we failed to connect */ + mtx_lock(&ep->com.lock); + switch (ep->com.state) { + case CLOSING: + __state_set(&ep->com, MORIBUND); + break; + case MORIBUND: + stop_ep_timer(ep); + if ((ep->com.cm_id) && (ep->com.qp)) { + attrs.next_state = IWCH_QP_STATE_IDLE; + iwch_modify_qp(ep->com.qp->rhp, + ep->com.qp, + IWCH_QP_ATTR_NEXT_STATE, + &attrs, 1); + } + close_socket(&ep->com); + close_complete_upcall(ep); + __state_set(&ep->com, DEAD); + release = 1; + break; + case ABORTING: + break; + case DEAD: + default: + PANIC_IF(1); + break; + } + mtx_unlock(&ep->com.lock); + if (release) + put_ep(&ep->com); + return; +} + +/* + * T3A does 3 things when a TERM is received: + * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet + * 2) generate an async event on the QP with the TERMINATE opcode + * 3) post a TERMINATE opcde cqe into the associated CQ. + * + * For (1), we save the message in the qp for later consumer consumption. + * For (2), we move the QP into TERMINATE, post a QP event and disconnect. + * For (3), we toss the CQE in cxio_poll_cq(). + * + * terminate() handles case (1)... + */ +static int +terminate(struct t3cdev *tdev, struct mbuf *m, void *ctx) +{ + struct toepcb *toep = (struct toepcb *)ctx; + struct socket *so = toeptoso(toep); + struct iwch_ep *ep = so->so_upcallarg; + + CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); + m_adj(m, sizeof(struct cpl_rdma_terminate)); + CTR2(KTR_IW_CXGB, "%s saving %d bytes of term msg", __FUNCTION__, m->m_len); + m_copydata(m, 0, m->m_len, ep->com.qp->attr.terminate_buffer); + ep->com.qp->attr.terminate_msg_len = m->m_len; + ep->com.qp->attr.is_terminate_local = 0; + return CPL_RET_BUF_DONE; +} + +static int +ec_status(struct t3cdev *tdev, struct mbuf *m, void *ctx) +{ + struct toepcb *toep = (struct toepcb *)ctx; + struct socket *so = toeptoso(toep); + struct cpl_rdma_ec_status *rep = cplhdr(m); + struct iwch_ep *ep; + struct iwch_qp_attributes attrs; + int release = 0; + + ep = so->so_upcallarg; + CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s ec_status %d", __FUNCTION__, ep, ep->com.so, states[ep->com.state], rep->status); + if (!so || !ep) { + panic("bogosity ep %p state %d, so %p state %x\n", ep, ep ? ep->com.state : -1, so, so ? so->so_state : -1); + } + mtx_lock(&ep->com.lock); + switch (ep->com.state) { + case CLOSING: + if (!rep->status) + __state_set(&ep->com, MORIBUND); + else + __state_set(&ep->com, ABORTING); + break; + case MORIBUND: + stop_ep_timer(ep); + if (!rep->status) { + if ((ep->com.cm_id) && (ep->com.qp)) { + attrs.next_state = IWCH_QP_STATE_IDLE; + iwch_modify_qp(ep->com.qp->rhp, + ep->com.qp, + IWCH_QP_ATTR_NEXT_STATE, + &attrs, 1); + } + close_socket(&ep->com); + close_complete_upcall(ep); + __state_set(&ep->com, DEAD); + release = 1; + } + break; + case DEAD: + break; + default: + panic("unknown state: %d\n", ep->com.state); + } + mtx_unlock(&ep->com.lock); + if (rep->status) { + log(LOG_ERR, "%s BAD CLOSE - Aborting tid %u\n", + __FUNCTION__, ep->hwtid); + attrs.next_state = IWCH_QP_STATE_ERROR; + iwch_modify_qp(ep->com.qp->rhp, + ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, + &attrs, 1); + } + if (release) + put_ep(&ep->com); + return CPL_RET_BUF_DONE; +} + +static void +ep_timeout(void *arg) +{ + struct iwch_ep *ep = (struct iwch_ep *)arg; + struct iwch_qp_attributes attrs; + int err = 0; + + mtx_lock(&ep->com.lock); + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); + switch (ep->com.state) { + case MPA_REQ_SENT: + connect_reply_upcall(ep, -ETIMEDOUT); + break; + case MPA_REQ_WAIT: + break; + case CLOSING: + case MORIBUND: + if (ep->com.cm_id && ep->com.qp) + err = 1; + break; + default: + panic("unknown state: %d\n", ep->com.state); + } + __state_set(&ep->com, ABORTING); + mtx_unlock(&ep->com.lock); + if (err){ + attrs.next_state = IWCH_QP_STATE_ERROR; + iwch_modify_qp(ep->com.qp->rhp, + ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, + &attrs, 1); + } + abort_connection(ep); + put_ep(&ep->com); +} + +int +iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) +{ + int err; + struct iwch_ep *ep = to_ep(cm_id); + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); + + if (state_read(&ep->com) == DEAD) { + put_ep(&ep->com); + return (-ECONNRESET); + } + PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD); + if (mpa_rev == 0) { + abort_connection(ep); + } else { + err = send_mpa_reject(ep, pdata, pdata_len); + err = soshutdown(ep->com.so, 3); + } + return 0; +} + +int +iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) +{ + int err; + struct iwch_qp_attributes attrs; + enum iwch_qp_attr_mask mask; + struct iwch_ep *ep = to_ep(cm_id); + struct iwch_dev *h = to_iwch_dev(cm_id->device); + struct iwch_qp *qp = get_qhp(h, conn_param->qpn); + + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); + if (state_read(&ep->com) == DEAD) + return (-ECONNRESET); + + PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD); + PANIC_IF(!qp); + + if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) || + (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) { + abort_connection(ep); + return (-EINVAL); + } + + cm_id->add_ref(cm_id); + ep->com.cm_id = cm_id; + ep->com.qp = qp; + + ep->com.rpl_err = 0; + ep->com.rpl_done = 0; + ep->ird = conn_param->ird; + ep->ord = conn_param->ord; + CTR3(KTR_IW_CXGB, "%s ird %d ord %d", __FUNCTION__, ep->ird, ep->ord); + get_ep(&ep->com); + + /* bind QP to EP and move to RTS */ + attrs.mpa_attr = ep->mpa_attr; + attrs.max_ird = ep->ord; + attrs.max_ord = ep->ord; + attrs.llp_stream_handle = ep; + attrs.next_state = IWCH_QP_STATE_RTS; + + /* bind QP and TID with INIT_WR */ + mask = IWCH_QP_ATTR_NEXT_STATE | + IWCH_QP_ATTR_LLP_STREAM_HANDLE | + IWCH_QP_ATTR_MPA_ATTR | + IWCH_QP_ATTR_MAX_IRD | + IWCH_QP_ATTR_MAX_ORD; + + err = iwch_modify_qp(ep->com.qp->rhp, + ep->com.qp, mask, &attrs, 1); + + if (err) + goto err; + + err = send_mpa_reply(ep, conn_param->private_data, + conn_param->private_data_len); + if (err) + goto err; + state_set(&ep->com, FPDU_MODE); + established_upcall(ep); + put_ep(&ep->com); + return 0; +err: + ep->com.cm_id = NULL; + ep->com.qp = NULL; + cm_id->rem_ref(cm_id); + put_ep(&ep->com); + return err; +} + +static int init_sock(struct iwch_ep_common *epc) +{ + int err; + struct sockopt sopt; + int on=1; + + epc->so->so_upcall = iwch_so_upcall; + epc->so->so_upcallarg = epc; + epc->so->so_rcv.sb_flags |= SB_UPCALL; + epc->so->so_state |= SS_NBIO; + sopt.sopt_dir = SOPT_SET; + sopt.sopt_level = SOL_SOCKET; + sopt.sopt_name = SO_NO_DDP; + sopt.sopt_val = (caddr_t)&on; + sopt.sopt_valsize = sizeof on; + sopt.sopt_td = NULL; + err = sosetopt(epc->so, &sopt); + if (err) + printf("%s can't set SO_NO_DDP err %d\n", __FUNCTION__, err); + sopt.sopt_dir = SOPT_SET; + sopt.sopt_level = IPPROTO_TCP; + sopt.sopt_name = TCP_NODELAY; + sopt.sopt_val = (caddr_t)&on; + sopt.sopt_valsize = sizeof on; + sopt.sopt_td = NULL; + err = sosetopt(epc->so, &sopt); + if (err) + printf("%s can't set TCP_NODELAY err %d\n", __FUNCTION__, err); + + return 0; +} + +static int +is_loopback_dst(struct iw_cm_id *cm_id) +{ + uint16_t port = cm_id->remote_addr.sin_port; + struct ifaddr *ifa; + + cm_id->remote_addr.sin_port = 0; + ifa = ifa_ifwithaddr((struct sockaddr *)&cm_id->remote_addr); + cm_id->remote_addr.sin_port = port; + return (ifa != NULL); +} + +int +iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) +{ + int err = 0; + struct iwch_dev *h = to_iwch_dev(cm_id->device); + struct iwch_ep *ep; + struct rtentry *rt; + struct toedev *tdev; + + if (is_loopback_dst(cm_id)) { + err = -ENOSYS; + goto out; + } + + ep = alloc_ep(sizeof(*ep), M_NOWAIT); + if (!ep) { + printf("%s - cannot alloc ep.\n", __FUNCTION__); + err = (-ENOMEM); + goto out; + } + callout_init(&ep->timer, TRUE); + ep->plen = conn_param->private_data_len; + if (ep->plen) + memcpy(ep->mpa_pkt + sizeof(struct mpa_message), + conn_param->private_data, ep->plen); + ep->ird = conn_param->ird; + ep->ord = conn_param->ord; + + cm_id->add_ref(cm_id); + ep->com.cm_id = cm_id; + ep->com.qp = get_qhp(h, conn_param->qpn); + ep->com.thread = curthread; + PANIC_IF(!ep->com.qp); + CTR4(KTR_IW_CXGB, "%s qpn 0x%x qp %p cm_id %p", __FUNCTION__, conn_param->qpn, + ep->com.qp, cm_id); + + ep->com.so = cm_id->so; + err = init_sock(&ep->com); + if (err) + goto fail2; + + /* find a route */ + rt = find_route(cm_id->local_addr.sin_addr.s_addr, + cm_id->remote_addr.sin_addr.s_addr, + cm_id->local_addr.sin_port, + cm_id->remote_addr.sin_port, IPTOS_LOWDELAY); + if (!rt) { + printf("%s - cannot find route.\n", __FUNCTION__); + err = EHOSTUNREACH; + goto fail2; + } + + if (!(rt->rt_ifp->if_flags & IFCAP_TOE)) { + printf("%s - interface not TOE capable.\n", __FUNCTION__); + goto fail3; + } + tdev = TOEDEV(rt->rt_ifp); + if (tdev == NULL) { + printf("%s - No toedev for interface.\n", __FUNCTION__); + goto fail3; + } + if (!tdev->tod_can_offload(tdev, ep->com.so)) { + printf("%s - interface cannot offload!.\n", __FUNCTION__); + goto fail3; + } + RTFREE(rt); + + state_set(&ep->com, CONNECTING); + ep->com.local_addr = cm_id->local_addr; + ep->com.remote_addr = cm_id->remote_addr; + err = soconnect(ep->com.so, (struct sockaddr *)&ep->com.remote_addr, + ep->com.thread); + if (!err) + goto out; +fail3: + RTFREE(ep->dst); +fail2: + put_ep(&ep->com); +out: + return err; +} + +int +iwch_create_listen(struct iw_cm_id *cm_id, int backlog) +{ + int err = 0; + struct iwch_listen_ep *ep; + + ep = alloc_ep(sizeof(*ep), M_NOWAIT); + if (!ep) { + printf("%s - cannot alloc ep.\n", __FUNCTION__); + err = ENOMEM; + goto out; + } + CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); + cm_id->add_ref(cm_id); + ep->com.cm_id = cm_id; + ep->backlog = backlog; + ep->com.local_addr = cm_id->local_addr; + ep->com.thread = curthread; + state_set(&ep->com, LISTEN); + + ep->com.so = cm_id->so; + err = init_sock(&ep->com); + if (err) + goto fail; + + err = solisten(ep->com.so, ep->backlog, ep->com.thread); + if (!err) { + cm_id->provider_data = ep; + goto out; + } + close_socket(&ep->com); +fail: + cm_id->rem_ref(cm_id); + put_ep(&ep->com); +out: + return err; +} + +int +iwch_destroy_listen(struct iw_cm_id *cm_id) +{ + struct iwch_listen_ep *ep = to_listen_ep(cm_id); + + CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); + + state_set(&ep->com, DEAD); + close_socket(&ep->com); + cm_id->rem_ref(cm_id); + put_ep(&ep->com); + return 0; +} + +int +iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, int flags) +{ + int close = 0; + + mtx_lock(&ep->com.lock); + + PANIC_IF(!ep); + PANIC_IF(!ep->com.so); + + CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s, abrupt %d", __FUNCTION__, ep, + ep->com.so, states[ep->com.state], abrupt); + + if (ep->com.state == DEAD) { + CTR2(KTR_IW_CXGB, "%s already dead ep %p", __FUNCTION__, ep); + goto out; + } + + if (abrupt) { + if (ep->com.state != ABORTING) { + ep->com.state = ABORTING; + close = 1; + } + goto out; + } + + switch (ep->com.state) { + case MPA_REQ_WAIT: + case MPA_REQ_SENT: + case MPA_REQ_RCVD: + case MPA_REP_SENT: + case FPDU_MODE: + start_ep_timer(ep); + ep->com.state = CLOSING; + close = 1; + break; + case CLOSING: + ep->com.state = MORIBUND; + close = 1; + break; + case MORIBUND: + case ABORTING: + break; + default: + panic("unknown state: %d\n", ep->com.state); + break; + } +out: + mtx_unlock(&ep->com.lock); + if (close) { + if (abrupt) + abort_connection(ep); + else + shutdown_socket(&ep->com); + } + return 0; +} + +static void +process_data(struct iwch_ep *ep) +{ + struct sockaddr_in *local, *remote; + + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); + + switch (state_read(&ep->com)) { + case MPA_REQ_SENT: + process_mpa_reply(ep); + break; + case MPA_REQ_WAIT: + + /* + * XXX + * Set local and remote addrs here because when we + * dequeue the newly accepted socket, they aren't set + * yet in the pcb! + */ + in_getsockaddr(ep->com.so, (struct sockaddr **)&local); + in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote); + CTR3(KTR_IW_CXGB, "%s local %s remote %s", __FUNCTION__, + inet_ntoa(local->sin_addr), + inet_ntoa(remote->sin_addr)); + ep->com.local_addr = *local; + ep->com.remote_addr = *remote; + free(local, M_SONAME); + free(remote, M_SONAME); + process_mpa_request(ep); + break; + default: + if (ep->com.so->so_rcv.sb_cc) + printf("%s Unexpected streaming data." + " ep %p state %d so %p so_state %x so_rcv.sb_cc %u so_rcv.sb_mb %p\n", + __FUNCTION__, ep, state_read(&ep->com), ep->com.so, ep->com.so->so_state, + ep->com.so->so_rcv.sb_cc, ep->com.so->so_rcv.sb_mb); + break; + } + return; +} + +static void +process_connected(struct iwch_ep *ep) +{ + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); + if ((ep->com.so->so_state & SS_ISCONNECTED) && !ep->com.so->so_error) { + send_mpa_req(ep); + } else { + connect_reply_upcall(ep, -ep->com.so->so_error); + close_socket(&ep->com); + state_set(&ep->com, DEAD); + put_ep(&ep->com); + } +} + +static struct socket * +dequeue_socket(struct socket *head, struct sockaddr_in **remote, struct iwch_ep *child_ep) +{ + struct socket *so; + + ACCEPT_LOCK(); + so = TAILQ_FIRST(&head->so_comp); + if (!so) { + ACCEPT_UNLOCK(); + return NULL; + } + TAILQ_REMOVE(&head->so_comp, so, so_list); + head->so_qlen--; + SOCK_LOCK(so); + so->so_qstate &= ~SQ_COMP; + so->so_head = NULL; + soref(so); + so->so_rcv.sb_flags |= SB_UPCALL; + so->so_state |= SS_NBIO; + so->so_upcall = iwch_so_upcall; + so->so_upcallarg = child_ep; + PANIC_IF(!(so->so_state & SS_ISCONNECTED)); + PANIC_IF(so->so_error); + SOCK_UNLOCK(so); + ACCEPT_UNLOCK(); + soaccept(so, (struct sockaddr **)remote); + return so; +} + +static void +process_newconn(struct iwch_ep *parent_ep) +{ + struct socket *child_so; + struct iwch_ep *child_ep; + struct sockaddr_in *remote; + + CTR3(KTR_IW_CXGB, "%s parent ep %p so %p", __FUNCTION__, parent_ep, parent_ep->com.so); + child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT); + if (!child_ep) { + log(LOG_ERR, "%s - failed to allocate ep entry!\n", + __FUNCTION__); + return; + } + child_so = dequeue_socket(parent_ep->com.so, &remote, child_ep); + if (!child_so) { + log(LOG_ERR, "%s - failed to dequeue child socket!\n", + __FUNCTION__); + __free_ep(&child_ep->com); + return; + } + CTR3(KTR_IW_CXGB, "%s remote addr %s port %d", __FUNCTION__, + inet_ntoa(remote->sin_addr), ntohs(remote->sin_port)); + child_ep->com.so = child_so; + child_ep->com.cm_id = NULL; + child_ep->com.thread = parent_ep->com.thread; + child_ep->parent_ep = parent_ep; + free(remote, M_SONAME); + get_ep(&parent_ep->com); + child_ep->parent_ep = parent_ep; + callout_init(&child_ep->timer, TRUE); + state_set(&child_ep->com, MPA_REQ_WAIT); + start_ep_timer(child_ep); + + /* maybe the request has already been queued up on the socket... */ + process_mpa_request(child_ep); +} + +static void +iwch_so_upcall(struct socket *so, void *arg, int waitflag) +{ + struct iwch_ep *ep = arg; + + CTR6(KTR_IW_CXGB, "%s so %p so state %x ep %p ep state(%d)=%s", __FUNCTION__, so, so->so_state, ep, ep->com.state, states[ep->com.state]); + mtx_lock(&req_lock); + if (ep && ep->com.so && !ep->com.entry.tqe_prev) { + get_ep(&ep->com); + TAILQ_INSERT_TAIL(&req_list, &ep->com, entry); + taskqueue_enqueue(iw_cxgb_taskq, &iw_cxgb_task); + } + mtx_unlock(&req_lock); +} + +static void +process_socket_event(struct iwch_ep *ep) +{ + int state = state_read(&ep->com); + struct socket *so = ep->com.so; + + CTR6(KTR_IW_CXGB, "%s so %p so state %x ep %p ep state(%d)=%s", __FUNCTION__, so, so->so_state, ep, ep->com.state, states[ep->com.state]); + if (state == CONNECTING) { + process_connected(ep); + return; + } + + if (state == LISTEN) { + process_newconn(ep); + return; + } + + /* connection error */ + if (so->so_error) { + process_conn_error(ep); + return; + } + + /* peer close */ + if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && state < CLOSING) { + process_peer_close(ep); + return; + } + + /* close complete */ + if (so->so_state & (SS_ISDISCONNECTED)) { + process_close_complete(ep); + return; + } + + /* rx data */ + process_data(ep); + return; +} + +static void +process_req(void *ctx, int pending) +{ + struct iwch_ep_common *epc; + + CTR1(KTR_IW_CXGB, "%s enter", __FUNCTION__); + mtx_lock(&req_lock); + while (!TAILQ_EMPTY(&req_list)) { + epc = TAILQ_FIRST(&req_list); + TAILQ_REMOVE(&req_list, epc, entry); + epc->entry.tqe_prev = NULL; + mtx_unlock(&req_lock); + if (epc->so) + process_socket_event((struct iwch_ep *)epc); + put_ep(epc); + mtx_lock(&req_lock); + } + mtx_unlock(&req_lock); +} + +int +iwch_cm_init(void) +{ + TAILQ_INIT(&req_list); + mtx_init(&req_lock, "iw_cxgb req_list lock", NULL, MTX_DEF); + iw_cxgb_taskq = taskqueue_create("iw_cxgb_taskq", M_NOWAIT, + taskqueue_thread_enqueue, &iw_cxgb_taskq); + if (iw_cxgb_taskq == NULL) { + printf("failed to allocate iw_cxgb taskqueue\n"); + return (ENOMEM); + } + taskqueue_start_threads(&iw_cxgb_taskq, 1, PI_NET, "iw_cxgb taskq"); + TASK_INIT(&iw_cxgb_task, 0, process_req, NULL); + t3tom_register_cpl_handler(CPL_RDMA_TERMINATE, terminate); + t3tom_register_cpl_handler(CPL_RDMA_EC_STATUS, ec_status); + return 0; +} + +void +iwch_cm_term(void) +{ + t3tom_register_cpl_handler(CPL_RDMA_TERMINATE, NULL); + t3tom_register_cpl_handler(CPL_RDMA_EC_STATUS, NULL); + taskqueue_drain(iw_cxgb_taskq, &iw_cxgb_task); + taskqueue_free(iw_cxgb_taskq); +} + |