summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authornp <np@FreeBSD.org>2016-01-22 23:33:34 +0000
committernp <np@FreeBSD.org>2016-01-22 23:33:34 +0000
commit057d736604eed731d9aa730e87220c4caa227ae3 (patch)
tree2bc6c60b6ab83ffb2aa3364ef3759bd23b93757e
parent9a3b34d7631b1ef522add470e0131f1a802f4968 (diff)
downloadFreeBSD-src-057d736604eed731d9aa730e87220c4caa227ae3.zip
FreeBSD-src-057d736604eed731d9aa730e87220c4caa227ae3.tar.gz
Fix for iWARP servers that listen on INADDR_ANY.
The iWARP Connection Manager (CM) on FreeBSD creates a TCP socket to represent an iWARP endpoint when the connection is over TCP. For servers the current approach is to invoke create_listen callback for each iWARP RNIC registered with the CM. This doesn't work too well for INADDR_ANY because a listen on any TCP socket already notifies all hardware TOEs/RNICs of the new listener. This patch fixes the server side of things for FreeBSD. We've tried to keep all these modifications in the iWARP/TCP specific parts of the OFED infrastructure as much as possible. Submitted by: Krishnamraju Eraparaju @ Chelsio (with design inputs from Steve Wise) Sponsored by: Chelsio Communications Differential Revision: https://reviews.freebsd.org/D4801
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h1
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c83
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h4
-rw-r--r--sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c5
-rw-r--r--sys/dev/cxgbe/iw_cxgbe/cm.c101
-rw-r--r--sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h8
-rw-r--r--sys/dev/cxgbe/iw_cxgbe/provider.c7
-rw-r--r--sys/ofed/drivers/infiniband/core/cma.c77
-rw-r--r--sys/ofed/drivers/infiniband/core/iwcm.c292
-rw-r--r--sys/ofed/include/rdma/iw_cm.h8
-rw-r--r--sys/ofed/include/rdma/rdma_cm.h5
11 files changed, 446 insertions, 145 deletions
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h
index 9fd3e0c..81f305f 100644
--- a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h
@@ -174,4 +174,5 @@ static inline void remove_handle(struct iwch_dev *rhp, struct idr *idr, u32 id)
}
void iwch_ev_dispatch(struct iwch_dev *, struct mbuf *);
+void process_newconn(struct iw_cm_id *parent_cm_id, struct socket *child_so);
#endif
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c
index b98caae..9bcc1b0 100644
--- a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c
@@ -260,7 +260,6 @@ alloc_ep(int size, int flags)
void __free_ep(struct iwch_ep_common *epc)
{
CTR3(KTR_IW_CXGB, "%s ep %p state %s", __FUNCTION__, epc, states[state_read(epc)]);
- KASSERT(!epc->so, ("%s warning ep->so %p \n", __FUNCTION__, epc->so));
KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list!\n", __FUNCTION__, epc));
free(epc, M_DEVBUF);
}
@@ -1361,7 +1360,7 @@ out:
}
int
-iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
+iwch_create_listen_ep(struct iw_cm_id *cm_id, int backlog)
{
int err = 0;
struct iwch_listen_ep *ep;
@@ -1381,35 +1380,22 @@ iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
state_set(&ep->com, LISTEN);
ep->com.so = cm_id->so;
- err = init_sock(&ep->com);
- if (err)
- goto fail;
-
- err = solisten(ep->com.so, ep->backlog, ep->com.thread);
- if (!err) {
- cm_id->provider_data = ep;
- goto out;
- }
- close_socket(&ep->com, 0);
-fail:
- cm_id->rem_ref(cm_id);
- put_ep(&ep->com);
+ cm_id->provider_data = ep;
out:
return err;
}
-int
-iwch_destroy_listen(struct iw_cm_id *cm_id)
+void
+iwch_destroy_listen_ep(struct iw_cm_id *cm_id)
{
struct iwch_listen_ep *ep = to_listen_ep(cm_id);
CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep);
state_set(&ep->com, DEAD);
- close_socket(&ep->com, 0);
cm_id->rem_ref(cm_id);
put_ep(&ep->com);
- return 0;
+ return;
}
int
@@ -1526,54 +1512,32 @@ process_connected(struct iwch_ep *ep)
}
}
-static struct socket *
-dequeue_socket(struct socket *head, struct sockaddr_in **remote, struct iwch_ep *child_ep)
-{
- struct socket *so;
-
- ACCEPT_LOCK();
- so = TAILQ_FIRST(&head->so_comp);
- if (!so) {
- ACCEPT_UNLOCK();
- return NULL;
- }
- TAILQ_REMOVE(&head->so_comp, so, so_list);
- head->so_qlen--;
- SOCK_LOCK(so);
- so->so_qstate &= ~SQ_COMP;
- so->so_head = NULL;
- soref(so);
- soupcall_set(so, SO_RCV, iwch_so_upcall, child_ep);
- so->so_state |= SS_NBIO;
- PANIC_IF(!(so->so_state & SS_ISCONNECTED));
- PANIC_IF(so->so_error);
- SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
- soaccept(so, (struct sockaddr **)remote);
- return so;
-}
-
-static void
-process_newconn(struct iwch_ep *parent_ep)
+void
+process_newconn(struct iw_cm_id *parent_cm_id, struct socket *child_so)
{
- struct socket *child_so;
struct iwch_ep *child_ep;
+ struct sockaddr_in *local;
struct sockaddr_in *remote;
+ struct iwch_ep *parent_ep = parent_cm_id->provider_data;
CTR3(KTR_IW_CXGB, "%s parent ep %p so %p", __FUNCTION__, parent_ep, parent_ep->com.so);
+ if (!child_so) {
+ log(LOG_ERR, "%s - invalid child socket!\n", __func__);
+ return;
+ }
child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT);
if (!child_ep) {
log(LOG_ERR, "%s - failed to allocate ep entry!\n",
__FUNCTION__);
return;
}
- child_so = dequeue_socket(parent_ep->com.so, &remote, child_ep);
- if (!child_so) {
- log(LOG_ERR, "%s - failed to dequeue child socket!\n",
- __FUNCTION__);
- __free_ep(&child_ep->com);
- return;
- }
+ SOCKBUF_LOCK(&child_so->so_rcv);
+ soupcall_set(child_so, SO_RCV, iwch_so_upcall, child_ep);
+ SOCKBUF_UNLOCK(&child_so->so_rcv);
+
+ in_getsockaddr(child_so, (struct sockaddr **)&local);
+ in_getpeeraddr(child_so, (struct sockaddr **)&remote);
+
CTR3(KTR_IW_CXGB, "%s remote addr %s port %d", __FUNCTION__,
inet_ntoa(remote->sin_addr), ntohs(remote->sin_port));
child_ep->com.tdev = parent_ep->com.tdev;
@@ -1590,9 +1554,9 @@ process_newconn(struct iwch_ep *parent_ep)
child_ep->com.thread = parent_ep->com.thread;
child_ep->parent_ep = parent_ep;
+ free(local, M_SONAME);
free(remote, M_SONAME);
get_ep(&parent_ep->com);
- child_ep->parent_ep = parent_ep;
callout_init(&child_ep->timer, 1);
state_set(&child_ep->com, MPA_REQ_WAIT);
start_ep_timer(child_ep);
@@ -1630,7 +1594,10 @@ process_socket_event(struct iwch_ep *ep)
}
if (state == LISTEN) {
- process_newconn(ep);
+ /* socket listening events are handled at IWCM */
+ CTR3(KTR_IW_CXGB, "%s Invalid ep state:%u, ep:%p", __func__,
+ ep->com.state, ep);
+ BUG();
return;
}
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h
index ef76729..241106b 100644
--- a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h
@@ -231,8 +231,8 @@ iwch_wakeup(struct cv *cv, struct mtx *lock, int *rpl_done)
/* CM prototypes */
int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
-int iwch_create_listen(struct iw_cm_id *cm_id, int backlog);
-int iwch_destroy_listen(struct iw_cm_id *cm_id);
+int iwch_create_listen_ep(struct iw_cm_id *cm_id, int backlog);
+void iwch_destroy_listen_ep(struct iw_cm_id *cm_id);
int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, int flags);
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c
index f9d36b3..448b993 100644
--- a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c
+++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c
@@ -1140,8 +1140,9 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.iwcm->connect = iwch_connect;
dev->ibdev.iwcm->accept = iwch_accept_cr;
dev->ibdev.iwcm->reject = iwch_reject_cr;
- dev->ibdev.iwcm->create_listen = iwch_create_listen;
- dev->ibdev.iwcm->destroy_listen = iwch_destroy_listen;
+ dev->ibdev.iwcm->create_listen_ep = iwch_create_listen_ep;
+ dev->ibdev.iwcm->destroy_listen_ep = iwch_destroy_listen_ep;
+ dev->ibdev.iwcm->newconn = process_newconn;
dev->ibdev.iwcm->add_ref = iwch_qp_add_ref;
dev->ibdev.iwcm->rem_ref = iwch_qp_rem_ref;
dev->ibdev.iwcm->get_qp = iwch_get_qp;
diff --git a/sys/dev/cxgbe/iw_cxgbe/cm.c b/sys/dev/cxgbe/iw_cxgbe/cm.c
index c3c7f4b..c884f5a 100644
--- a/sys/dev/cxgbe/iw_cxgbe/cm.c
+++ b/sys/dev/cxgbe/iw_cxgbe/cm.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved.
+ * Copyright (c) 2009-2013, 2016 Chelsio, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -111,8 +111,6 @@ static void ep_timeout(unsigned long arg);
static void init_sock(struct c4iw_ep_common *epc);
static void process_data(struct c4iw_ep *ep);
static void process_connected(struct c4iw_ep *ep);
-static struct socket * dequeue_socket(struct socket *head, struct sockaddr_in **remote, struct c4iw_ep *child_ep);
-static void process_newconn(struct c4iw_ep *parent_ep);
static int c4iw_so_upcall(struct socket *so, void *arg, int waitflag);
static void process_socket_event(struct c4iw_ep *ep);
static void release_ep_resources(struct c4iw_ep *ep);
@@ -623,40 +621,21 @@ process_connected(struct c4iw_ep *ep)
}
}
-static struct socket *
-dequeue_socket(struct socket *head, struct sockaddr_in **remote,
- struct c4iw_ep *child_ep)
-{
- struct socket *so;
-
- ACCEPT_LOCK();
- so = TAILQ_FIRST(&head->so_comp);
- if (!so) {
- ACCEPT_UNLOCK();
- return (NULL);
- }
- TAILQ_REMOVE(&head->so_comp, so, so_list);
- head->so_qlen--;
- SOCK_LOCK(so);
- so->so_qstate &= ~SQ_COMP;
- so->so_head = NULL;
- soref(so);
- soupcall_set(so, SO_RCV, c4iw_so_upcall, child_ep);
- so->so_state |= SS_NBIO;
- SOCK_UNLOCK(so);
- ACCEPT_UNLOCK();
- soaccept(so, (struct sockaddr **)remote);
-
- return (so);
-}
-
-static void
-process_newconn(struct c4iw_ep *parent_ep)
+void
+process_newconn(struct iw_cm_id *parent_cm_id, struct socket *child_so)
{
- struct socket *child_so;
struct c4iw_ep *child_ep;
+ struct sockaddr_in *local;
struct sockaddr_in *remote;
+ struct c4iw_ep *parent_ep = parent_cm_id->provider_data;
+ if (!child_so) {
+ CTR4(KTR_IW_CXGBE,
+ "%s: parent so %p, parent ep %p, child so %p, invalid so",
+ __func__, parent_ep->com.so, parent_ep, child_so);
+ log(LOG_ERR, "%s: invalid child socket\n", __func__);
+ return;
+ }
child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT);
if (!child_ep) {
CTR3(KTR_IW_CXGBE, "%s: parent so %p, parent ep %p, ENOMEM",
@@ -664,23 +643,18 @@ process_newconn(struct c4iw_ep *parent_ep)
log(LOG_ERR, "%s: failed to allocate ep entry\n", __func__);
return;
}
-
- child_so = dequeue_socket(parent_ep->com.so, &remote, child_ep);
- if (!child_so) {
- CTR4(KTR_IW_CXGBE,
- "%s: parent so %p, parent ep %p, child ep %p, dequeue err",
- __func__, parent_ep->com.so, parent_ep, child_ep);
- log(LOG_ERR, "%s: failed to dequeue child socket\n", __func__);
- __free_ep(&child_ep->com);
- return;
-
- }
+ SOCKBUF_LOCK(&child_so->so_rcv);
+ soupcall_set(child_so, SO_RCV, c4iw_so_upcall, child_ep);
+ SOCKBUF_UNLOCK(&child_so->so_rcv);
CTR5(KTR_IW_CXGBE,
"%s: parent so %p, parent ep %p, child so %p, child ep %p",
__func__, parent_ep->com.so, parent_ep, child_so, child_ep);
- child_ep->com.local_addr = parent_ep->com.local_addr;
+ in_getsockaddr(child_so, (struct sockaddr **)&local);
+ in_getpeeraddr(child_so, (struct sockaddr **)&remote);
+
+ child_ep->com.local_addr = *local;
child_ep->com.remote_addr = *remote;
child_ep->com.dev = parent_ep->com.dev;
child_ep->com.so = child_so;
@@ -688,15 +662,17 @@ process_newconn(struct c4iw_ep *parent_ep)
child_ep->com.thread = parent_ep->com.thread;
child_ep->parent_ep = parent_ep;
+ free(local, M_SONAME);
free(remote, M_SONAME);
+
c4iw_get_ep(&parent_ep->com);
- child_ep->parent_ep = parent_ep;
init_timer(&child_ep->timer);
state_set(&child_ep->com, MPA_REQ_WAIT);
START_EP_TIMER(child_ep);
/* maybe the request has already been queued up on the socket... */
process_mpa_request(child_ep);
+ return;
}
static int
@@ -738,7 +714,10 @@ process_socket_event(struct c4iw_ep *ep)
}
if (state == LISTEN) {
- process_newconn(ep);
+ /* socket listening events are handled at IWCM */
+ CTR3(KTR_IW_CXGBE, "%s Invalid ep state:%u, ep:%p", __func__,
+ ep->com.state, ep);
+ BUG();
return;
}
@@ -919,7 +898,6 @@ void _c4iw_free_ep(struct kref *kref)
ep = container_of(kref, struct c4iw_ep, com.kref);
epc = &ep->com;
- KASSERT(!epc->so, ("%s ep->so %p", __func__, epc->so));
KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list",
__func__, epc));
kfree(ep);
@@ -2126,10 +2104,10 @@ out:
}
/*
- * iwcm->create_listen. Returns -errno on failure.
+ * iwcm->create_listen_ep. Returns -errno on failure.
*/
int
-c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
+c4iw_create_listen_ep(struct iw_cm_id *cm_id, int backlog)
{
int rc;
struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
@@ -2154,17 +2132,6 @@ c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
ep->com.thread = curthread;
state_set(&ep->com, LISTEN);
ep->com.so = so;
- init_sock(&ep->com);
-
- rc = solisten(so, ep->backlog, ep->com.thread);
- if (rc != 0) {
- log(LOG_ERR, "%s: failed to start listener: %d\n", __func__,
- rc);
- close_socket(&ep->com, 0);
- cm_id->rem_ref(cm_id);
- c4iw_put_ep(&ep->com);
- goto failed;
- }
cm_id->provider_data = ep;
return (0);
@@ -2174,21 +2141,19 @@ failed:
return (-rc);
}
-int
-c4iw_destroy_listen(struct iw_cm_id *cm_id)
+void
+c4iw_destroy_listen_ep(struct iw_cm_id *cm_id)
{
- int rc;
struct c4iw_listen_ep *ep = to_listen_ep(cm_id);
- CTR4(KTR_IW_CXGBE, "%s: cm_id %p, so %p, inp %p", __func__, cm_id,
- cm_id->so, cm_id->so->so_pcb);
+ CTR4(KTR_IW_CXGBE, "%s: cm_id %p, so %p, state %s", __func__, cm_id,
+ cm_id->so, states[ep->com.state]);
state_set(&ep->com, DEAD);
- rc = close_socket(&ep->com, 0);
cm_id->rem_ref(cm_id);
c4iw_put_ep(&ep->com);
- return (rc);
+ return;
}
int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
diff --git a/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h b/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h
index e6d70f4..f6c8a59 100644
--- a/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h
+++ b/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved.
+ * Copyright (c) 2009-2013, 2016 Chelsio, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -850,8 +850,8 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
struct ib_mw_bind *mw_bind);
int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
-int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog);
-int c4iw_destroy_listen(struct iw_cm_id *cm_id);
+int c4iw_create_listen_ep(struct iw_cm_id *cm_id, int backlog);
+void c4iw_destroy_listen_ep(struct iw_cm_id *cm_id);
int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
void c4iw_qp_add_ref(struct ib_qp *qp);
@@ -914,6 +914,8 @@ u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx);
void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qid,
struct c4iw_dev_ucontext *uctx);
void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe);
+void process_newconn(struct iw_cm_id *parent_cm_id,
+ struct socket *child_so);
extern struct cxgb4_client t4c_client;
extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS];
diff --git a/sys/dev/cxgbe/iw_cxgbe/provider.c b/sys/dev/cxgbe/iw_cxgbe/provider.c
index d7ce079..a21fb9c 100644
--- a/sys/dev/cxgbe/iw_cxgbe/provider.c
+++ b/sys/dev/cxgbe/iw_cxgbe/provider.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved.
+ * Copyright (c) 2009-2013, 2016 Chelsio, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -474,8 +474,9 @@ c4iw_register_device(struct c4iw_dev *dev)
iwcm->connect = c4iw_connect;
iwcm->accept = c4iw_accept_cr;
iwcm->reject = c4iw_reject_cr;
- iwcm->create_listen = c4iw_create_listen;
- iwcm->destroy_listen = c4iw_destroy_listen;
+ iwcm->create_listen_ep = c4iw_create_listen_ep;
+ iwcm->destroy_listen_ep = c4iw_destroy_listen_ep;
+ iwcm->newconn = process_newconn;
iwcm->add_ref = c4iw_qp_add_ref;
iwcm->rem_ref = c4iw_qp_rem_ref;
iwcm->get_qp = c4iw_get_qp;
diff --git a/sys/ofed/drivers/infiniband/core/cma.c b/sys/ofed/drivers/infiniband/core/cma.c
index 40c4d82..1cafced 100644
--- a/sys/ofed/drivers/infiniband/core/cma.c
+++ b/sys/ofed/drivers/infiniband/core/cma.c
@@ -3,6 +3,7 @@
* Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
* Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
* Copyright (c) 2005-2006 Intel Corporation. All rights reserved.
+ * Copyright (c) 2016 Chelsio Communications. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -407,6 +408,75 @@ static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_nu
return -EAGAIN;
}
+int
+rdma_find_cmid_laddr(struct sockaddr_in *local_addr, unsigned short dev_type,
+ void **cm_id)
+{
+ int ret;
+ u8 port;
+ int found_dev = 0, found_cmid = 0;
+ struct rdma_id_private *id_priv;
+ struct rdma_id_private *dev_id_priv;
+ struct cma_device *cma_dev;
+ struct rdma_dev_addr dev_addr;
+ union ib_gid gid;
+ enum rdma_link_layer dev_ll = dev_type == ARPHRD_INFINIBAND ?
+ IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
+
+ memset(&dev_addr, 0, sizeof(dev_addr));
+
+ ret = rdma_translate_ip((struct sockaddr *)local_addr,
+ &dev_addr, NULL);
+ if (ret)
+ goto err;
+
+ /* find rdma device based on MAC address/gid */
+ mutex_lock(&lock);
+
+ memcpy(&gid, dev_addr.src_dev_addr +
+ rdma_addr_gid_offset(&dev_addr), sizeof(gid));
+
+ list_for_each_entry(cma_dev, &dev_list, list)
+ for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port)
+ if ((rdma_port_get_link_layer(cma_dev->device, port) ==
+ dev_ll) &&
+ (rdma_node_get_transport(cma_dev->device->node_type) ==
+ RDMA_TRANSPORT_IWARP)) {
+ ret = find_gid_port(cma_dev->device,
+ &gid, port);
+ if (!ret) {
+ found_dev = 1;
+ goto out;
+ } else if (ret == 1) {
+ mutex_unlock(&lock);
+ goto err;
+ }
+ }
+out:
+ mutex_unlock(&lock);
+
+ if (!found_dev)
+ goto err;
+
+ /* Traverse through the list of listening cm_id's to find the
+ * desired cm_id based on rdma device & port number.
+ */
+ list_for_each_entry(id_priv, &listen_any_list, list)
+ list_for_each_entry(dev_id_priv, &id_priv->listen_list,
+ listen_list)
+ if (dev_id_priv->cma_dev == cma_dev)
+ if (dev_id_priv->cm_id.iw->local_addr.sin_port
+ == local_addr->sin_port) {
+ *cm_id = (void *)dev_id_priv->cm_id.iw;
+ found_cmid = 1;
+ }
+ return found_cmid ? 0 : -ENODEV;
+
+err:
+ return -ENODEV;
+}
+EXPORT_SYMBOL(rdma_find_cmid_laddr);
+
static int cma_acquire_dev(struct rdma_id_private *id_priv)
{
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
@@ -780,6 +850,12 @@ static inline int cma_any_addr(struct sockaddr *addr)
{
return cma_zero_addr(addr) || cma_loopback_addr(addr);
}
+int
+rdma_cma_any_addr(struct sockaddr *addr)
+{
+ return cma_any_addr(addr);
+}
+EXPORT_SYMBOL(rdma_cma_any_addr);
static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst)
{
@@ -1707,6 +1783,7 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
dev_id_priv = container_of(id, struct rdma_id_private, id);
dev_id_priv->state = RDMA_CM_ADDR_BOUND;
+ dev_id_priv->sock = id_priv->sock;
memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
ip_addr_size((struct sockaddr *) &id_priv->id.route.addr.src_addr));
diff --git a/sys/ofed/drivers/infiniband/core/iwcm.c b/sys/ofed/drivers/infiniband/core/iwcm.c
index 14d23cc..a90f907 100644
--- a/sys/ofed/drivers/infiniband/core/iwcm.c
+++ b/sys/ofed/drivers/infiniband/core/iwcm.c
@@ -5,6 +5,7 @@
* Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
* Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
+ * Copyright (c) 2016 Chelsio Communications. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -35,6 +36,8 @@
* SOFTWARE.
*
*/
+#include "opt_inet.h"
+
#include <linux/dma-mapping.h>
#include <linux/err.h>
#include <linux/idr.h>
@@ -47,7 +50,10 @@
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/string.h>
+#include <netinet/tcp.h>
+#include <sys/mutex.h>
+#include <rdma/rdma_cm.h>
#include <rdma/iw_cm.h>
#include <rdma/ib_addr.h>
@@ -65,6 +71,85 @@ struct iwcm_work {
struct iw_cm_event event;
struct list_head free_list;
};
+struct iwcm_listen_work {
+ struct work_struct work;
+ struct iw_cm_id *cm_id;
+};
+
+static LIST_HEAD(listen_port_list);
+
+static DEFINE_MUTEX(listen_port_mutex);
+static DEFINE_MUTEX(dequeue_mutex);
+
+struct listen_port_info {
+ struct list_head list;
+ uint16_t port_num;
+ uint32_t refcnt;
+};
+
+static int32_t
+add_port_to_listenlist(uint16_t port)
+{
+ struct listen_port_info *port_info;
+ int err = 0;
+
+ mutex_lock(&listen_port_mutex);
+
+ list_for_each_entry(port_info, &listen_port_list, list)
+ if (port_info->port_num == port)
+ goto found_port;
+
+ port_info = kmalloc(sizeof(*port_info), GFP_KERNEL);
+ if (!port_info) {
+ err = -ENOMEM;
+ mutex_unlock(&listen_port_mutex);
+ goto out;
+ }
+
+ port_info->port_num = port;
+ port_info->refcnt = 0;
+
+ list_add(&port_info->list, &listen_port_list);
+
+found_port:
+ ++(port_info->refcnt);
+ mutex_unlock(&listen_port_mutex);
+ return port_info->refcnt;
+out:
+ return err;
+}
+
+static int32_t
+rem_port_from_listenlist(uint16_t port)
+{
+ struct listen_port_info *port_info;
+ int ret, found_port = 0;
+
+ mutex_lock(&listen_port_mutex);
+
+ list_for_each_entry(port_info, &listen_port_list, list)
+ if (port_info->port_num == port) {
+ found_port = 1;
+ break;
+ }
+
+ if (found_port) {
+ --(port_info->refcnt);
+ ret = port_info->refcnt;
+ if (port_info->refcnt == 0) {
+ /* Remove this entry from the list as there are no
+ * more listeners for this port_num.
+ */
+ list_del(&port_info->list);
+ kfree(port_info);
+ }
+ } else {
+ ret = -EINVAL;
+ }
+ mutex_unlock(&listen_port_mutex);
+ return ret;
+
+}
/*
* The following services provide a mechanism for pre-allocating iwcm_work
@@ -320,6 +405,167 @@ int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt)
}
EXPORT_SYMBOL(iw_cm_disconnect);
+static struct socket *
+dequeue_socket(struct socket *head)
+{
+ struct socket *so;
+ struct sockaddr_in *remote;
+
+ ACCEPT_LOCK();
+ so = TAILQ_FIRST(&head->so_comp);
+ if (!so) {
+ ACCEPT_UNLOCK();
+ return NULL;
+ }
+
+ SOCK_LOCK(so);
+ /*
+ * Before changing the flags on the socket, we have to bump the
+ * reference count. Otherwise, if the protocol calls sofree(),
+ * the socket will be released due to a zero refcount.
+ */
+ soref(so);
+ TAILQ_REMOVE(&head->so_comp, so, so_list);
+ head->so_qlen--;
+ so->so_qstate &= ~SQ_COMP;
+ so->so_head = NULL;
+ so->so_state |= SS_NBIO;
+ SOCK_UNLOCK(so);
+ ACCEPT_UNLOCK();
+ soaccept(so, (struct sockaddr **)&remote);
+
+ free(remote, M_SONAME);
+ return so;
+}
+static void
+iw_so_event_handler(struct work_struct *_work)
+{
+#ifdef INET
+ struct iwcm_listen_work *work = container_of(_work,
+ struct iwcm_listen_work, work);
+ struct iw_cm_id *listen_cm_id = work->cm_id;
+ struct iwcm_id_private *cm_id_priv;
+ struct iw_cm_id *real_cm_id;
+ struct sockaddr_in *local;
+ struct socket *so;
+
+ cm_id_priv = container_of(listen_cm_id, struct iwcm_id_private, id);
+
+ if (cm_id_priv->state != IW_CM_STATE_LISTEN) {
+ kfree(work);
+ return;
+ }
+ mutex_lock(&dequeue_mutex);
+
+ /* Dequeue & process all new 'so' connection requests for this cmid */
+ while ((so = dequeue_socket(work->cm_id->so)) != NULL) {
+ if (rdma_cma_any_addr((struct sockaddr *)
+ &listen_cm_id->local_addr)) {
+ in_getsockaddr(so, (struct sockaddr **)&local);
+ if (rdma_find_cmid_laddr(local, ARPHRD_ETHER,
+ (void **) &real_cm_id)) {
+ free(local, M_SONAME);
+ goto err;
+ }
+ free(local, M_SONAME);
+
+ real_cm_id->device->iwcm->newconn(real_cm_id, so);
+ } else {
+ listen_cm_id->device->iwcm->newconn(listen_cm_id, so);
+ }
+ }
+err:
+ mutex_unlock(&dequeue_mutex);
+ kfree(work);
+#endif
+ return;
+}
+static int
+iw_so_upcall(struct socket *parent_so, void *arg, int waitflag)
+{
+ struct iwcm_listen_work *work;
+ struct socket *so;
+ struct iw_cm_id *cm_id = arg;
+
+ mutex_lock(&dequeue_mutex);
+ /* check whether iw_so_event_handler() already dequeued this 'so' */
+ so = TAILQ_FIRST(&parent_so->so_comp);
+ if (!so)
+ return SU_OK;
+ work = kzalloc(sizeof(*work), M_NOWAIT);
+ if (!work)
+ return -ENOMEM;
+ work->cm_id = cm_id;
+
+ INIT_WORK(&work->work, iw_so_event_handler);
+ queue_work(iwcm_wq, &work->work);
+
+ mutex_unlock(&dequeue_mutex);
+ return SU_OK;
+}
+
+static void
+iw_init_sock(struct iw_cm_id *cm_id)
+{
+ struct sockopt sopt;
+ struct socket *so = cm_id->so;
+ int on = 1;
+
+ SOCK_LOCK(so);
+ soupcall_set(so, SO_RCV, iw_so_upcall, cm_id);
+ so->so_state |= SS_NBIO;
+ SOCK_UNLOCK(so);
+ sopt.sopt_dir = SOPT_SET;
+ sopt.sopt_level = IPPROTO_TCP;
+ sopt.sopt_name = TCP_NODELAY;
+ sopt.sopt_val = (caddr_t)&on;
+ sopt.sopt_valsize = sizeof(on);
+ sopt.sopt_td = NULL;
+ sosetopt(so, &sopt);
+}
+
+static int
+iw_close_socket(struct iw_cm_id *cm_id, int close)
+{
+ struct socket *so = cm_id->so;
+ int rc;
+
+
+ SOCK_LOCK(so);
+ soupcall_clear(so, SO_RCV);
+ SOCK_UNLOCK(so);
+
+ if (close)
+ rc = soclose(so);
+ else
+ rc = soshutdown(so, SHUT_WR | SHUT_RD);
+
+ cm_id->so = NULL;
+
+ return rc;
+}
+
+static int
+iw_create_listen(struct iw_cm_id *cm_id, int backlog)
+{
+ int rc;
+
+ iw_init_sock(cm_id);
+ rc = solisten(cm_id->so, backlog, curthread);
+ if (rc != 0)
+ iw_close_socket(cm_id, 0);
+ return rc;
+}
+
+static int
+iw_destroy_listen(struct iw_cm_id *cm_id)
+{
+ int rc;
+ rc = iw_close_socket(cm_id, 0);
+ return rc;
+}
+
+
/*
* CM_ID <-- DESTROYING
*
@@ -330,7 +576,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
{
struct iwcm_id_private *cm_id_priv;
unsigned long flags;
- int ret;
+ int ret = 0, refcnt;
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
/*
@@ -345,8 +591,18 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
case IW_CM_STATE_LISTEN:
cm_id_priv->state = IW_CM_STATE_DESTROYING;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- /* destroy the listening endpoint */
- ret = cm_id->device->iwcm->destroy_listen(cm_id);
+ if (rdma_cma_any_addr((struct sockaddr *)&cm_id->local_addr)) {
+ refcnt =
+ rem_port_from_listenlist(cm_id->local_addr.sin_port);
+
+ if (refcnt == 0)
+ ret = iw_destroy_listen(cm_id);
+
+ cm_id->device->iwcm->destroy_listen_ep(cm_id);
+ } else {
+ ret = iw_destroy_listen(cm_id);
+ cm_id->device->iwcm->destroy_listen_ep(cm_id);
+ }
spin_lock_irqsave(&cm_id_priv->lock, flags);
break;
case IW_CM_STATE_ESTABLISHED:
@@ -418,7 +674,7 @@ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog)
{
struct iwcm_id_private *cm_id_priv;
unsigned long flags;
- int ret;
+ int ret, refcnt;
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
@@ -431,9 +687,33 @@ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog)
case IW_CM_STATE_IDLE:
cm_id_priv->state = IW_CM_STATE_LISTEN;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- ret = cm_id->device->iwcm->create_listen(cm_id, backlog);
- if (ret)
+
+ if (rdma_cma_any_addr((struct sockaddr *)&cm_id->local_addr)) {
+ refcnt =
+ add_port_to_listenlist(cm_id->local_addr.sin_port);
+
+ if (refcnt == 1) {
+ ret = iw_create_listen(cm_id, backlog);
+ } else if (refcnt <= 0) {
+ ret = -EINVAL;
+ } else {
+ /* if refcnt > 1, a socket listener created
+ * already. And we need not create socket
+ * listener on other rdma devices/listen cm_id's
+ * due to TOE. That is when a socket listener is
+ * created with INADDR_ANY all registered TOE
+ * devices will get a call to start
+ * hardware listeners.
+ */
+ }
+ } else {
+ ret = iw_create_listen(cm_id, backlog);
+ }
+ if (!ret)
+ cm_id->device->iwcm->create_listen_ep(cm_id, backlog);
+ else
cm_id_priv->state = IW_CM_STATE_IDLE;
+
spin_lock_irqsave(&cm_id_priv->lock, flags);
break;
default:
diff --git a/sys/ofed/include/rdma/iw_cm.h b/sys/ofed/include/rdma/iw_cm.h
index 271c2f8..a246e61 100644
--- a/sys/ofed/include/rdma/iw_cm.h
+++ b/sys/ofed/include/rdma/iw_cm.h
@@ -1,6 +1,7 @@
/*
* Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
* Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ * Copyright (c) 2016 Chelsio Communications. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -120,10 +121,13 @@ struct iw_cm_verbs {
int (*reject)(struct iw_cm_id *cm_id,
const void *pdata, u8 pdata_len);
- int (*create_listen)(struct iw_cm_id *cm_id,
+ int (*create_listen_ep)(struct iw_cm_id *cm_id,
int backlog);
- int (*destroy_listen)(struct iw_cm_id *cm_id);
+ void (*destroy_listen_ep)(struct iw_cm_id *cm_id);
+
+ void (*newconn)(struct iw_cm_id *parent_cm_id,
+ struct socket *so);
};
/**
diff --git a/sys/ofed/include/rdma/rdma_cm.h b/sys/ofed/include/rdma/rdma_cm.h
index d699261..33be957 100644
--- a/sys/ofed/include/rdma/rdma_cm.h
+++ b/sys/ofed/include/rdma/rdma_cm.h
@@ -1,6 +1,7 @@
/*
* Copyright (c) 2005 Voltaire Inc. All rights reserved.
* Copyright (c) 2005 Intel Corporation. All rights reserved.
+ * Copyright (c) 2016 Chelsio Communications. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -400,5 +401,7 @@ int rdma_set_afonly(struct rdma_cm_id *id, int afonly);
* @timeout: QP timeout
*/
void rdma_set_timeout(struct rdma_cm_id *id, int timeout);
-
+int rdma_cma_any_addr(struct sockaddr *addr);
+int rdma_find_cmid_laddr(struct sockaddr_in *local_addr,
+ unsigned short dev_type, void **cm_id);
#endif /* RDMA_CM_H */
OpenPOWER on IntegriCloud