diff options
author | np <np@FreeBSD.org> | 2016-01-22 23:33:34 +0000 |
---|---|---|
committer | np <np@FreeBSD.org> | 2016-01-22 23:33:34 +0000 |
commit | 057d736604eed731d9aa730e87220c4caa227ae3 (patch) | |
tree | 2bc6c60b6ab83ffb2aa3364ef3759bd23b93757e | |
parent | 9a3b34d7631b1ef522add470e0131f1a802f4968 (diff) | |
download | FreeBSD-src-057d736604eed731d9aa730e87220c4caa227ae3.zip FreeBSD-src-057d736604eed731d9aa730e87220c4caa227ae3.tar.gz |
Fix for iWARP servers that listen on INADDR_ANY.
The iWARP Connection Manager (CM) on FreeBSD creates a TCP socket to
represent an iWARP endpoint when the connection is over TCP. For
servers the current approach is to invoke create_listen callback for
each iWARP RNIC registered with the CM. This doesn't work too well for
INADDR_ANY because a listen on any TCP socket already notifies all
hardware TOEs/RNICs of the new listener. This patch fixes the server
side of things for FreeBSD. We've tried to keep all these modifications
in the iWARP/TCP specific parts of the OFED infrastructure as much as
possible.
Submitted by: Krishnamraju Eraparaju @ Chelsio (with design inputs from Steve Wise)
Sponsored by: Chelsio Communications
Differential Revision: https://reviews.freebsd.org/D4801
-rw-r--r-- | sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h | 1 | ||||
-rw-r--r-- | sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c | 83 | ||||
-rw-r--r-- | sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h | 4 | ||||
-rw-r--r-- | sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c | 5 | ||||
-rw-r--r-- | sys/dev/cxgbe/iw_cxgbe/cm.c | 101 | ||||
-rw-r--r-- | sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h | 8 | ||||
-rw-r--r-- | sys/dev/cxgbe/iw_cxgbe/provider.c | 7 | ||||
-rw-r--r-- | sys/ofed/drivers/infiniband/core/cma.c | 77 | ||||
-rw-r--r-- | sys/ofed/drivers/infiniband/core/iwcm.c | 292 | ||||
-rw-r--r-- | sys/ofed/include/rdma/iw_cm.h | 8 | ||||
-rw-r--r-- | sys/ofed/include/rdma/rdma_cm.h | 5 |
11 files changed, 446 insertions, 145 deletions
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h index 9fd3e0c..81f305f 100644 --- a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h +++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h @@ -174,4 +174,5 @@ static inline void remove_handle(struct iwch_dev *rhp, struct idr *idr, u32 id) } void iwch_ev_dispatch(struct iwch_dev *, struct mbuf *); +void process_newconn(struct iw_cm_id *parent_cm_id, struct socket *child_so); #endif diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c index b98caae..9bcc1b0 100644 --- a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c +++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c @@ -260,7 +260,6 @@ alloc_ep(int size, int flags) void __free_ep(struct iwch_ep_common *epc) { CTR3(KTR_IW_CXGB, "%s ep %p state %s", __FUNCTION__, epc, states[state_read(epc)]); - KASSERT(!epc->so, ("%s warning ep->so %p \n", __FUNCTION__, epc->so)); KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list!\n", __FUNCTION__, epc)); free(epc, M_DEVBUF); } @@ -1361,7 +1360,7 @@ out: } int -iwch_create_listen(struct iw_cm_id *cm_id, int backlog) +iwch_create_listen_ep(struct iw_cm_id *cm_id, int backlog) { int err = 0; struct iwch_listen_ep *ep; @@ -1381,35 +1380,22 @@ iwch_create_listen(struct iw_cm_id *cm_id, int backlog) state_set(&ep->com, LISTEN); ep->com.so = cm_id->so; - err = init_sock(&ep->com); - if (err) - goto fail; - - err = solisten(ep->com.so, ep->backlog, ep->com.thread); - if (!err) { - cm_id->provider_data = ep; - goto out; - } - close_socket(&ep->com, 0); -fail: - cm_id->rem_ref(cm_id); - put_ep(&ep->com); + cm_id->provider_data = ep; out: return err; } -int -iwch_destroy_listen(struct iw_cm_id *cm_id) +void +iwch_destroy_listen_ep(struct iw_cm_id *cm_id) { struct iwch_listen_ep *ep = to_listen_ep(cm_id); CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); state_set(&ep->com, DEAD); - close_socket(&ep->com, 0); cm_id->rem_ref(cm_id); put_ep(&ep->com); - return 0; + return; } int @@ -1526,54 +1512,32 @@ process_connected(struct iwch_ep *ep) } } -static struct socket * -dequeue_socket(struct socket *head, struct sockaddr_in **remote, struct iwch_ep *child_ep) -{ - struct socket *so; - - ACCEPT_LOCK(); - so = TAILQ_FIRST(&head->so_comp); - if (!so) { - ACCEPT_UNLOCK(); - return NULL; - } - TAILQ_REMOVE(&head->so_comp, so, so_list); - head->so_qlen--; - SOCK_LOCK(so); - so->so_qstate &= ~SQ_COMP; - so->so_head = NULL; - soref(so); - soupcall_set(so, SO_RCV, iwch_so_upcall, child_ep); - so->so_state |= SS_NBIO; - PANIC_IF(!(so->so_state & SS_ISCONNECTED)); - PANIC_IF(so->so_error); - SOCK_UNLOCK(so); - ACCEPT_UNLOCK(); - soaccept(so, (struct sockaddr **)remote); - return so; -} - -static void -process_newconn(struct iwch_ep *parent_ep) +void +process_newconn(struct iw_cm_id *parent_cm_id, struct socket *child_so) { - struct socket *child_so; struct iwch_ep *child_ep; + struct sockaddr_in *local; struct sockaddr_in *remote; + struct iwch_ep *parent_ep = parent_cm_id->provider_data; CTR3(KTR_IW_CXGB, "%s parent ep %p so %p", __FUNCTION__, parent_ep, parent_ep->com.so); + if (!child_so) { + log(LOG_ERR, "%s - invalid child socket!\n", __func__); + return; + } child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT); if (!child_ep) { log(LOG_ERR, "%s - failed to allocate ep entry!\n", __FUNCTION__); return; } - child_so = dequeue_socket(parent_ep->com.so, &remote, child_ep); - if (!child_so) { - log(LOG_ERR, "%s - failed to dequeue child socket!\n", - __FUNCTION__); - __free_ep(&child_ep->com); - return; - } + SOCKBUF_LOCK(&child_so->so_rcv); + soupcall_set(child_so, SO_RCV, iwch_so_upcall, child_ep); + SOCKBUF_UNLOCK(&child_so->so_rcv); + + in_getsockaddr(child_so, (struct sockaddr **)&local); + in_getpeeraddr(child_so, (struct sockaddr **)&remote); + CTR3(KTR_IW_CXGB, "%s remote addr %s port %d", __FUNCTION__, inet_ntoa(remote->sin_addr), ntohs(remote->sin_port)); child_ep->com.tdev = parent_ep->com.tdev; @@ -1590,9 +1554,9 @@ process_newconn(struct iwch_ep *parent_ep) child_ep->com.thread = parent_ep->com.thread; child_ep->parent_ep = parent_ep; + free(local, M_SONAME); free(remote, M_SONAME); get_ep(&parent_ep->com); - child_ep->parent_ep = parent_ep; callout_init(&child_ep->timer, 1); state_set(&child_ep->com, MPA_REQ_WAIT); start_ep_timer(child_ep); @@ -1630,7 +1594,10 @@ process_socket_event(struct iwch_ep *ep) } if (state == LISTEN) { - process_newconn(ep); + /* socket listening events are handled at IWCM */ + CTR3(KTR_IW_CXGB, "%s Invalid ep state:%u, ep:%p", __func__, + ep->com.state, ep); + BUG(); return; } diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h index ef76729..241106b 100644 --- a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h +++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h @@ -231,8 +231,8 @@ iwch_wakeup(struct cv *cv, struct mtx *lock, int *rpl_done) /* CM prototypes */ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); -int iwch_create_listen(struct iw_cm_id *cm_id, int backlog); -int iwch_destroy_listen(struct iw_cm_id *cm_id); +int iwch_create_listen_ep(struct iw_cm_id *cm_id, int backlog); +void iwch_destroy_listen_ep(struct iw_cm_id *cm_id); int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len); int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, int flags); diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c index f9d36b3..448b993 100644 --- a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c +++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c @@ -1140,8 +1140,9 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.iwcm->connect = iwch_connect; dev->ibdev.iwcm->accept = iwch_accept_cr; dev->ibdev.iwcm->reject = iwch_reject_cr; - dev->ibdev.iwcm->create_listen = iwch_create_listen; - dev->ibdev.iwcm->destroy_listen = iwch_destroy_listen; + dev->ibdev.iwcm->create_listen_ep = iwch_create_listen_ep; + dev->ibdev.iwcm->destroy_listen_ep = iwch_destroy_listen_ep; + dev->ibdev.iwcm->newconn = process_newconn; dev->ibdev.iwcm->add_ref = iwch_qp_add_ref; dev->ibdev.iwcm->rem_ref = iwch_qp_rem_ref; dev->ibdev.iwcm->get_qp = iwch_get_qp; diff --git a/sys/dev/cxgbe/iw_cxgbe/cm.c b/sys/dev/cxgbe/iw_cxgbe/cm.c index c3c7f4b..c884f5a 100644 --- a/sys/dev/cxgbe/iw_cxgbe/cm.c +++ b/sys/dev/cxgbe/iw_cxgbe/cm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved. + * Copyright (c) 2009-2013, 2016 Chelsio, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -111,8 +111,6 @@ static void ep_timeout(unsigned long arg); static void init_sock(struct c4iw_ep_common *epc); static void process_data(struct c4iw_ep *ep); static void process_connected(struct c4iw_ep *ep); -static struct socket * dequeue_socket(struct socket *head, struct sockaddr_in **remote, struct c4iw_ep *child_ep); -static void process_newconn(struct c4iw_ep *parent_ep); static int c4iw_so_upcall(struct socket *so, void *arg, int waitflag); static void process_socket_event(struct c4iw_ep *ep); static void release_ep_resources(struct c4iw_ep *ep); @@ -623,40 +621,21 @@ process_connected(struct c4iw_ep *ep) } } -static struct socket * -dequeue_socket(struct socket *head, struct sockaddr_in **remote, - struct c4iw_ep *child_ep) -{ - struct socket *so; - - ACCEPT_LOCK(); - so = TAILQ_FIRST(&head->so_comp); - if (!so) { - ACCEPT_UNLOCK(); - return (NULL); - } - TAILQ_REMOVE(&head->so_comp, so, so_list); - head->so_qlen--; - SOCK_LOCK(so); - so->so_qstate &= ~SQ_COMP; - so->so_head = NULL; - soref(so); - soupcall_set(so, SO_RCV, c4iw_so_upcall, child_ep); - so->so_state |= SS_NBIO; - SOCK_UNLOCK(so); - ACCEPT_UNLOCK(); - soaccept(so, (struct sockaddr **)remote); - - return (so); -} - -static void -process_newconn(struct c4iw_ep *parent_ep) +void +process_newconn(struct iw_cm_id *parent_cm_id, struct socket *child_so) { - struct socket *child_so; struct c4iw_ep *child_ep; + struct sockaddr_in *local; struct sockaddr_in *remote; + struct c4iw_ep *parent_ep = parent_cm_id->provider_data; + if (!child_so) { + CTR4(KTR_IW_CXGBE, + "%s: parent so %p, parent ep %p, child so %p, invalid so", + __func__, parent_ep->com.so, parent_ep, child_so); + log(LOG_ERR, "%s: invalid child socket\n", __func__); + return; + } child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT); if (!child_ep) { CTR3(KTR_IW_CXGBE, "%s: parent so %p, parent ep %p, ENOMEM", @@ -664,23 +643,18 @@ process_newconn(struct c4iw_ep *parent_ep) log(LOG_ERR, "%s: failed to allocate ep entry\n", __func__); return; } - - child_so = dequeue_socket(parent_ep->com.so, &remote, child_ep); - if (!child_so) { - CTR4(KTR_IW_CXGBE, - "%s: parent so %p, parent ep %p, child ep %p, dequeue err", - __func__, parent_ep->com.so, parent_ep, child_ep); - log(LOG_ERR, "%s: failed to dequeue child socket\n", __func__); - __free_ep(&child_ep->com); - return; - - } + SOCKBUF_LOCK(&child_so->so_rcv); + soupcall_set(child_so, SO_RCV, c4iw_so_upcall, child_ep); + SOCKBUF_UNLOCK(&child_so->so_rcv); CTR5(KTR_IW_CXGBE, "%s: parent so %p, parent ep %p, child so %p, child ep %p", __func__, parent_ep->com.so, parent_ep, child_so, child_ep); - child_ep->com.local_addr = parent_ep->com.local_addr; + in_getsockaddr(child_so, (struct sockaddr **)&local); + in_getpeeraddr(child_so, (struct sockaddr **)&remote); + + child_ep->com.local_addr = *local; child_ep->com.remote_addr = *remote; child_ep->com.dev = parent_ep->com.dev; child_ep->com.so = child_so; @@ -688,15 +662,17 @@ process_newconn(struct c4iw_ep *parent_ep) child_ep->com.thread = parent_ep->com.thread; child_ep->parent_ep = parent_ep; + free(local, M_SONAME); free(remote, M_SONAME); + c4iw_get_ep(&parent_ep->com); - child_ep->parent_ep = parent_ep; init_timer(&child_ep->timer); state_set(&child_ep->com, MPA_REQ_WAIT); START_EP_TIMER(child_ep); /* maybe the request has already been queued up on the socket... */ process_mpa_request(child_ep); + return; } static int @@ -738,7 +714,10 @@ process_socket_event(struct c4iw_ep *ep) } if (state == LISTEN) { - process_newconn(ep); + /* socket listening events are handled at IWCM */ + CTR3(KTR_IW_CXGBE, "%s Invalid ep state:%u, ep:%p", __func__, + ep->com.state, ep); + BUG(); return; } @@ -919,7 +898,6 @@ void _c4iw_free_ep(struct kref *kref) ep = container_of(kref, struct c4iw_ep, com.kref); epc = &ep->com; - KASSERT(!epc->so, ("%s ep->so %p", __func__, epc->so)); KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list", __func__, epc)); kfree(ep); @@ -2126,10 +2104,10 @@ out: } /* - * iwcm->create_listen. Returns -errno on failure. + * iwcm->create_listen_ep. Returns -errno on failure. */ int -c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) +c4iw_create_listen_ep(struct iw_cm_id *cm_id, int backlog) { int rc; struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); @@ -2154,17 +2132,6 @@ c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) ep->com.thread = curthread; state_set(&ep->com, LISTEN); ep->com.so = so; - init_sock(&ep->com); - - rc = solisten(so, ep->backlog, ep->com.thread); - if (rc != 0) { - log(LOG_ERR, "%s: failed to start listener: %d\n", __func__, - rc); - close_socket(&ep->com, 0); - cm_id->rem_ref(cm_id); - c4iw_put_ep(&ep->com); - goto failed; - } cm_id->provider_data = ep; return (0); @@ -2174,21 +2141,19 @@ failed: return (-rc); } -int -c4iw_destroy_listen(struct iw_cm_id *cm_id) +void +c4iw_destroy_listen_ep(struct iw_cm_id *cm_id) { - int rc; struct c4iw_listen_ep *ep = to_listen_ep(cm_id); - CTR4(KTR_IW_CXGBE, "%s: cm_id %p, so %p, inp %p", __func__, cm_id, - cm_id->so, cm_id->so->so_pcb); + CTR4(KTR_IW_CXGBE, "%s: cm_id %p, so %p, state %s", __func__, cm_id, + cm_id->so, states[ep->com.state]); state_set(&ep->com, DEAD); - rc = close_socket(&ep->com, 0); cm_id->rem_ref(cm_id); c4iw_put_ep(&ep->com); - return (rc); + return; } int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) diff --git a/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h b/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h index e6d70f4..f6c8a59 100644 --- a/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h +++ b/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved. + * Copyright (c) 2009-2013, 2016 Chelsio, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -850,8 +850,8 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw, struct ib_mw_bind *mw_bind); int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); -int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog); -int c4iw_destroy_listen(struct iw_cm_id *cm_id); +int c4iw_create_listen_ep(struct iw_cm_id *cm_id, int backlog); +void c4iw_destroy_listen_ep(struct iw_cm_id *cm_id); int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len); void c4iw_qp_add_ref(struct ib_qp *qp); @@ -914,6 +914,8 @@ u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx); void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qid, struct c4iw_dev_ucontext *uctx); void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe); +void process_newconn(struct iw_cm_id *parent_cm_id, + struct socket *child_so); extern struct cxgb4_client t4c_client; extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS]; diff --git a/sys/dev/cxgbe/iw_cxgbe/provider.c b/sys/dev/cxgbe/iw_cxgbe/provider.c index d7ce079..a21fb9c 100644 --- a/sys/dev/cxgbe/iw_cxgbe/provider.c +++ b/sys/dev/cxgbe/iw_cxgbe/provider.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved. + * Copyright (c) 2009-2013, 2016 Chelsio, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -474,8 +474,9 @@ c4iw_register_device(struct c4iw_dev *dev) iwcm->connect = c4iw_connect; iwcm->accept = c4iw_accept_cr; iwcm->reject = c4iw_reject_cr; - iwcm->create_listen = c4iw_create_listen; - iwcm->destroy_listen = c4iw_destroy_listen; + iwcm->create_listen_ep = c4iw_create_listen_ep; + iwcm->destroy_listen_ep = c4iw_destroy_listen_ep; + iwcm->newconn = process_newconn; iwcm->add_ref = c4iw_qp_add_ref; iwcm->rem_ref = c4iw_qp_rem_ref; iwcm->get_qp = c4iw_get_qp; diff --git a/sys/ofed/drivers/infiniband/core/cma.c b/sys/ofed/drivers/infiniband/core/cma.c index 40c4d82..1cafced 100644 --- a/sys/ofed/drivers/infiniband/core/cma.c +++ b/sys/ofed/drivers/infiniband/core/cma.c @@ -3,6 +3,7 @@ * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. * Copyright (c) 2005-2006 Intel Corporation. All rights reserved. + * Copyright (c) 2016 Chelsio Communications. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -407,6 +408,75 @@ static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_nu return -EAGAIN; } +int +rdma_find_cmid_laddr(struct sockaddr_in *local_addr, unsigned short dev_type, + void **cm_id) +{ + int ret; + u8 port; + int found_dev = 0, found_cmid = 0; + struct rdma_id_private *id_priv; + struct rdma_id_private *dev_id_priv; + struct cma_device *cma_dev; + struct rdma_dev_addr dev_addr; + union ib_gid gid; + enum rdma_link_layer dev_ll = dev_type == ARPHRD_INFINIBAND ? + IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET; + + memset(&dev_addr, 0, sizeof(dev_addr)); + + ret = rdma_translate_ip((struct sockaddr *)local_addr, + &dev_addr, NULL); + if (ret) + goto err; + + /* find rdma device based on MAC address/gid */ + mutex_lock(&lock); + + memcpy(&gid, dev_addr.src_dev_addr + + rdma_addr_gid_offset(&dev_addr), sizeof(gid)); + + list_for_each_entry(cma_dev, &dev_list, list) + for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) + if ((rdma_port_get_link_layer(cma_dev->device, port) == + dev_ll) && + (rdma_node_get_transport(cma_dev->device->node_type) == + RDMA_TRANSPORT_IWARP)) { + ret = find_gid_port(cma_dev->device, + &gid, port); + if (!ret) { + found_dev = 1; + goto out; + } else if (ret == 1) { + mutex_unlock(&lock); + goto err; + } + } +out: + mutex_unlock(&lock); + + if (!found_dev) + goto err; + + /* Traverse through the list of listening cm_id's to find the + * desired cm_id based on rdma device & port number. + */ + list_for_each_entry(id_priv, &listen_any_list, list) + list_for_each_entry(dev_id_priv, &id_priv->listen_list, + listen_list) + if (dev_id_priv->cma_dev == cma_dev) + if (dev_id_priv->cm_id.iw->local_addr.sin_port + == local_addr->sin_port) { + *cm_id = (void *)dev_id_priv->cm_id.iw; + found_cmid = 1; + } + return found_cmid ? 0 : -ENODEV; + +err: + return -ENODEV; +} +EXPORT_SYMBOL(rdma_find_cmid_laddr); + static int cma_acquire_dev(struct rdma_id_private *id_priv) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; @@ -780,6 +850,12 @@ static inline int cma_any_addr(struct sockaddr *addr) { return cma_zero_addr(addr) || cma_loopback_addr(addr); } +int +rdma_cma_any_addr(struct sockaddr *addr) +{ + return cma_any_addr(addr); +} +EXPORT_SYMBOL(rdma_cma_any_addr); static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst) { @@ -1707,6 +1783,7 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, dev_id_priv = container_of(id, struct rdma_id_private, id); dev_id_priv->state = RDMA_CM_ADDR_BOUND; + dev_id_priv->sock = id_priv->sock; memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr, ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr)); diff --git a/sys/ofed/drivers/infiniband/core/iwcm.c b/sys/ofed/drivers/infiniband/core/iwcm.c index 14d23cc..a90f907 100644 --- a/sys/ofed/drivers/infiniband/core/iwcm.c +++ b/sys/ofed/drivers/infiniband/core/iwcm.c @@ -5,6 +5,7 @@ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. + * Copyright (c) 2016 Chelsio Communications. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -35,6 +36,8 @@ * SOFTWARE. * */ +#include "opt_inet.h" + #include <linux/dma-mapping.h> #include <linux/err.h> #include <linux/idr.h> @@ -47,7 +50,10 @@ #include <linux/slab.h> #include <linux/module.h> #include <linux/string.h> +#include <netinet/tcp.h> +#include <sys/mutex.h> +#include <rdma/rdma_cm.h> #include <rdma/iw_cm.h> #include <rdma/ib_addr.h> @@ -65,6 +71,85 @@ struct iwcm_work { struct iw_cm_event event; struct list_head free_list; }; +struct iwcm_listen_work { + struct work_struct work; + struct iw_cm_id *cm_id; +}; + +static LIST_HEAD(listen_port_list); + +static DEFINE_MUTEX(listen_port_mutex); +static DEFINE_MUTEX(dequeue_mutex); + +struct listen_port_info { + struct list_head list; + uint16_t port_num; + uint32_t refcnt; +}; + +static int32_t +add_port_to_listenlist(uint16_t port) +{ + struct listen_port_info *port_info; + int err = 0; + + mutex_lock(&listen_port_mutex); + + list_for_each_entry(port_info, &listen_port_list, list) + if (port_info->port_num == port) + goto found_port; + + port_info = kmalloc(sizeof(*port_info), GFP_KERNEL); + if (!port_info) { + err = -ENOMEM; + mutex_unlock(&listen_port_mutex); + goto out; + } + + port_info->port_num = port; + port_info->refcnt = 0; + + list_add(&port_info->list, &listen_port_list); + +found_port: + ++(port_info->refcnt); + mutex_unlock(&listen_port_mutex); + return port_info->refcnt; +out: + return err; +} + +static int32_t +rem_port_from_listenlist(uint16_t port) +{ + struct listen_port_info *port_info; + int ret, found_port = 0; + + mutex_lock(&listen_port_mutex); + + list_for_each_entry(port_info, &listen_port_list, list) + if (port_info->port_num == port) { + found_port = 1; + break; + } + + if (found_port) { + --(port_info->refcnt); + ret = port_info->refcnt; + if (port_info->refcnt == 0) { + /* Remove this entry from the list as there are no + * more listeners for this port_num. + */ + list_del(&port_info->list); + kfree(port_info); + } + } else { + ret = -EINVAL; + } + mutex_unlock(&listen_port_mutex); + return ret; + +} /* * The following services provide a mechanism for pre-allocating iwcm_work @@ -320,6 +405,167 @@ int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt) } EXPORT_SYMBOL(iw_cm_disconnect); +static struct socket * +dequeue_socket(struct socket *head) +{ + struct socket *so; + struct sockaddr_in *remote; + + ACCEPT_LOCK(); + so = TAILQ_FIRST(&head->so_comp); + if (!so) { + ACCEPT_UNLOCK(); + return NULL; + } + + SOCK_LOCK(so); + /* + * Before changing the flags on the socket, we have to bump the + * reference count. Otherwise, if the protocol calls sofree(), + * the socket will be released due to a zero refcount. + */ + soref(so); + TAILQ_REMOVE(&head->so_comp, so, so_list); + head->so_qlen--; + so->so_qstate &= ~SQ_COMP; + so->so_head = NULL; + so->so_state |= SS_NBIO; + SOCK_UNLOCK(so); + ACCEPT_UNLOCK(); + soaccept(so, (struct sockaddr **)&remote); + + free(remote, M_SONAME); + return so; +} +static void +iw_so_event_handler(struct work_struct *_work) +{ +#ifdef INET + struct iwcm_listen_work *work = container_of(_work, + struct iwcm_listen_work, work); + struct iw_cm_id *listen_cm_id = work->cm_id; + struct iwcm_id_private *cm_id_priv; + struct iw_cm_id *real_cm_id; + struct sockaddr_in *local; + struct socket *so; + + cm_id_priv = container_of(listen_cm_id, struct iwcm_id_private, id); + + if (cm_id_priv->state != IW_CM_STATE_LISTEN) { + kfree(work); + return; + } + mutex_lock(&dequeue_mutex); + + /* Dequeue & process all new 'so' connection requests for this cmid */ + while ((so = dequeue_socket(work->cm_id->so)) != NULL) { + if (rdma_cma_any_addr((struct sockaddr *) + &listen_cm_id->local_addr)) { + in_getsockaddr(so, (struct sockaddr **)&local); + if (rdma_find_cmid_laddr(local, ARPHRD_ETHER, + (void **) &real_cm_id)) { + free(local, M_SONAME); + goto err; + } + free(local, M_SONAME); + + real_cm_id->device->iwcm->newconn(real_cm_id, so); + } else { + listen_cm_id->device->iwcm->newconn(listen_cm_id, so); + } + } +err: + mutex_unlock(&dequeue_mutex); + kfree(work); +#endif + return; +} +static int +iw_so_upcall(struct socket *parent_so, void *arg, int waitflag) +{ + struct iwcm_listen_work *work; + struct socket *so; + struct iw_cm_id *cm_id = arg; + + mutex_lock(&dequeue_mutex); + /* check whether iw_so_event_handler() already dequeued this 'so' */ + so = TAILQ_FIRST(&parent_so->so_comp); + if (!so) + return SU_OK; + work = kzalloc(sizeof(*work), M_NOWAIT); + if (!work) + return -ENOMEM; + work->cm_id = cm_id; + + INIT_WORK(&work->work, iw_so_event_handler); + queue_work(iwcm_wq, &work->work); + + mutex_unlock(&dequeue_mutex); + return SU_OK; +} + +static void +iw_init_sock(struct iw_cm_id *cm_id) +{ + struct sockopt sopt; + struct socket *so = cm_id->so; + int on = 1; + + SOCK_LOCK(so); + soupcall_set(so, SO_RCV, iw_so_upcall, cm_id); + so->so_state |= SS_NBIO; + SOCK_UNLOCK(so); + sopt.sopt_dir = SOPT_SET; + sopt.sopt_level = IPPROTO_TCP; + sopt.sopt_name = TCP_NODELAY; + sopt.sopt_val = (caddr_t)&on; + sopt.sopt_valsize = sizeof(on); + sopt.sopt_td = NULL; + sosetopt(so, &sopt); +} + +static int +iw_close_socket(struct iw_cm_id *cm_id, int close) +{ + struct socket *so = cm_id->so; + int rc; + + + SOCK_LOCK(so); + soupcall_clear(so, SO_RCV); + SOCK_UNLOCK(so); + + if (close) + rc = soclose(so); + else + rc = soshutdown(so, SHUT_WR | SHUT_RD); + + cm_id->so = NULL; + + return rc; +} + +static int +iw_create_listen(struct iw_cm_id *cm_id, int backlog) +{ + int rc; + + iw_init_sock(cm_id); + rc = solisten(cm_id->so, backlog, curthread); + if (rc != 0) + iw_close_socket(cm_id, 0); + return rc; +} + +static int +iw_destroy_listen(struct iw_cm_id *cm_id) +{ + int rc; + rc = iw_close_socket(cm_id, 0); + return rc; +} + + /* * CM_ID <-- DESTROYING * @@ -330,7 +576,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; unsigned long flags; - int ret; + int ret = 0, refcnt; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); /* @@ -345,8 +591,18 @@ static void destroy_cm_id(struct iw_cm_id *cm_id) case IW_CM_STATE_LISTEN: cm_id_priv->state = IW_CM_STATE_DESTROYING; spin_unlock_irqrestore(&cm_id_priv->lock, flags); - /* destroy the listening endpoint */ - ret = cm_id->device->iwcm->destroy_listen(cm_id); + if (rdma_cma_any_addr((struct sockaddr *)&cm_id->local_addr)) { + refcnt = + rem_port_from_listenlist(cm_id->local_addr.sin_port); + + if (refcnt == 0) + ret = iw_destroy_listen(cm_id); + + cm_id->device->iwcm->destroy_listen_ep(cm_id); + } else { + ret = iw_destroy_listen(cm_id); + cm_id->device->iwcm->destroy_listen_ep(cm_id); + } spin_lock_irqsave(&cm_id_priv->lock, flags); break; case IW_CM_STATE_ESTABLISHED: @@ -418,7 +674,7 @@ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) { struct iwcm_id_private *cm_id_priv; unsigned long flags; - int ret; + int ret, refcnt; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); @@ -431,9 +687,33 @@ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) case IW_CM_STATE_IDLE: cm_id_priv->state = IW_CM_STATE_LISTEN; spin_unlock_irqrestore(&cm_id_priv->lock, flags); - ret = cm_id->device->iwcm->create_listen(cm_id, backlog); - if (ret) + + if (rdma_cma_any_addr((struct sockaddr *)&cm_id->local_addr)) { + refcnt = + add_port_to_listenlist(cm_id->local_addr.sin_port); + + if (refcnt == 1) { + ret = iw_create_listen(cm_id, backlog); + } else if (refcnt <= 0) { + ret = -EINVAL; + } else { + /* if refcnt > 1, a socket listener created + * already. And we need not create socket + * listener on other rdma devices/listen cm_id's + * due to TOE. That is when a socket listener is + * created with INADDR_ANY all registered TOE + * devices will get a call to start + * hardware listeners. + */ + } + } else { + ret = iw_create_listen(cm_id, backlog); + } + if (!ret) + cm_id->device->iwcm->create_listen_ep(cm_id, backlog); + else cm_id_priv->state = IW_CM_STATE_IDLE; + spin_lock_irqsave(&cm_id_priv->lock, flags); break; default: diff --git a/sys/ofed/include/rdma/iw_cm.h b/sys/ofed/include/rdma/iw_cm.h index 271c2f8..a246e61 100644 --- a/sys/ofed/include/rdma/iw_cm.h +++ b/sys/ofed/include/rdma/iw_cm.h @@ -1,6 +1,7 @@ /* * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. + * Copyright (c) 2016 Chelsio Communications. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -120,10 +121,13 @@ struct iw_cm_verbs { int (*reject)(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len); - int (*create_listen)(struct iw_cm_id *cm_id, + int (*create_listen_ep)(struct iw_cm_id *cm_id, int backlog); - int (*destroy_listen)(struct iw_cm_id *cm_id); + void (*destroy_listen_ep)(struct iw_cm_id *cm_id); + + void (*newconn)(struct iw_cm_id *parent_cm_id, + struct socket *so); }; /** diff --git a/sys/ofed/include/rdma/rdma_cm.h b/sys/ofed/include/rdma/rdma_cm.h index d699261..33be957 100644 --- a/sys/ofed/include/rdma/rdma_cm.h +++ b/sys/ofed/include/rdma/rdma_cm.h @@ -1,6 +1,7 @@ /* * Copyright (c) 2005 Voltaire Inc. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2016 Chelsio Communications. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -400,5 +401,7 @@ int rdma_set_afonly(struct rdma_cm_id *id, int afonly); * @timeout: QP timeout */ void rdma_set_timeout(struct rdma_cm_id *id, int timeout); - +int rdma_cma_any_addr(struct sockaddr *addr); +int rdma_find_cmid_laddr(struct sockaddr_in *local_addr, + unsigned short dev_type, void **cm_id); #endif /* RDMA_CM_H */ |