summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authornp <np@FreeBSD.org>2013-10-21 01:10:37 +0000
committernp <np@FreeBSD.org>2013-10-21 01:10:37 +0000
commit01cd6364a8ed9e44d43face023115015f242e98b (patch)
tree8a4312b3d53bc3be34c713abe57bde9ae5131957 /sys
parent514a2c2eaae634f1b2dfb3644838d92fa2916acb (diff)
downloadFreeBSD-src-01cd6364a8ed9e44d43face023115015f242e98b.zip
FreeBSD-src-01cd6364a8ed9e44d43face023115015f242e98b.tar.gz
MFC r256694, r256713, r256714.
r256694: iw_cxgbe: iWARP driver for Chelsio T4/T5 chips. This is a straight port of the iw_cxgb4 found in OFED distributions. r256713: iw_cxgbe should have a dependency on t4nex. r256714: Fix typo in previous commit. Approved by: re (hrs)
Diffstat (limited to 'sys')
-rw-r--r--sys/dev/cxgbe/iw_cxgbe/cm.c2458
-rw-r--r--sys/dev/cxgbe/iw_cxgbe/cq.c926
-rw-r--r--sys/dev/cxgbe/iw_cxgbe/device.c369
-rw-r--r--sys/dev/cxgbe/iw_cxgbe/ev.c206
-rw-r--r--sys/dev/cxgbe/iw_cxgbe/id_table.c118
-rw-r--r--sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h1046
-rw-r--r--sys/dev/cxgbe/iw_cxgbe/mem.c828
-rw-r--r--sys/dev/cxgbe/iw_cxgbe/provider.c498
-rw-r--r--sys/dev/cxgbe/iw_cxgbe/qp.c1707
-rw-r--r--sys/dev/cxgbe/iw_cxgbe/resource.c342
-rw-r--r--sys/dev/cxgbe/iw_cxgbe/t4.h597
-rw-r--r--sys/dev/cxgbe/iw_cxgbe/user.h70
-rw-r--r--sys/modules/cxgbe/Makefile9
-rw-r--r--sys/modules/cxgbe/iw_cxgbe/Makefile27
14 files changed, 9200 insertions, 1 deletions
diff --git a/sys/dev/cxgbe/iw_cxgbe/cm.c b/sys/dev/cxgbe/iw_cxgbe/cm.c
new file mode 100644
index 0000000..03652a3
--- /dev/null
+++ b/sys/dev/cxgbe/iw_cxgbe/cm.c
@@ -0,0 +1,2458 @@
+/*
+ * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+
+#ifdef TCP_OFFLOAD
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sockio.h>
+#include <sys/taskqueue.h>
+#include <netinet/in.h>
+#include <net/neighbour.h>
+#include <net/route.h>
+
+#include <netinet/in_systm.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcpip.h>
+
+#include <netinet/toecore.h>
+
+struct sge_iq;
+struct rss_header;
+#include <linux/types.h>
+#include "offload.h"
+#include "tom/t4_tom.h"
+
+#define TOEPCB(so) ((struct toepcb *)(so_sototcpcb((so))->t_toe))
+
+#include "iw_cxgbe.h"
+#include <linux/module.h>
+#include <linux/workqueue.h>
+#include <linux/notifier.h>
+#include <linux/inetdevice.h>
+#include <linux/if_vlan.h>
+#include <net/netevent.h>
+
+static spinlock_t req_lock;
+static TAILQ_HEAD(c4iw_ep_list, c4iw_ep_common) req_list;
+static struct work_struct c4iw_task;
+static struct workqueue_struct *c4iw_taskq;
+static LIST_HEAD(timeout_list);
+static spinlock_t timeout_lock;
+
+static void process_req(struct work_struct *ctx);
+static void start_ep_timer(struct c4iw_ep *ep);
+static void stop_ep_timer(struct c4iw_ep *ep);
+static int set_tcpinfo(struct c4iw_ep *ep);
+static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc);
+static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate);
+static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate);
+static void *alloc_ep(int size, gfp_t flags);
+void __free_ep(struct c4iw_ep_common *epc);
+static struct rtentry * find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
+ __be16 peer_port, u8 tos);
+static int close_socket(struct c4iw_ep_common *epc, int close);
+static int shutdown_socket(struct c4iw_ep_common *epc);
+static void abort_socket(struct c4iw_ep *ep);
+static void send_mpa_req(struct c4iw_ep *ep);
+static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen);
+static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen);
+static void close_complete_upcall(struct c4iw_ep *ep);
+static int abort_connection(struct c4iw_ep *ep);
+static void peer_close_upcall(struct c4iw_ep *ep);
+static void peer_abort_upcall(struct c4iw_ep *ep);
+static void connect_reply_upcall(struct c4iw_ep *ep, int status);
+static void connect_request_upcall(struct c4iw_ep *ep);
+static void established_upcall(struct c4iw_ep *ep);
+static void process_mpa_reply(struct c4iw_ep *ep);
+static void process_mpa_request(struct c4iw_ep *ep);
+static void process_peer_close(struct c4iw_ep *ep);
+static void process_conn_error(struct c4iw_ep *ep);
+static void process_close_complete(struct c4iw_ep *ep);
+static void ep_timeout(unsigned long arg);
+static void init_sock(struct c4iw_ep_common *epc);
+static void process_data(struct c4iw_ep *ep);
+static void process_connected(struct c4iw_ep *ep);
+static struct socket * dequeue_socket(struct socket *head, struct sockaddr_in **remote, struct c4iw_ep *child_ep);
+static void process_newconn(struct c4iw_ep *parent_ep);
+static int c4iw_so_upcall(struct socket *so, void *arg, int waitflag);
+static void process_socket_event(struct c4iw_ep *ep);
+static void release_ep_resources(struct c4iw_ep *ep);
+
+#define START_EP_TIMER(ep) \
+ do { \
+ CTR3(KTR_IW_CXGBE, "start_ep_timer (%s:%d) ep %p", \
+ __func__, __LINE__, (ep)); \
+ start_ep_timer(ep); \
+ } while (0)
+
+#define STOP_EP_TIMER(ep) \
+ do { \
+ CTR3(KTR_IW_CXGBE, "stop_ep_timer (%s:%d) ep %p", \
+ __func__, __LINE__, (ep)); \
+ stop_ep_timer(ep); \
+ } while (0)
+
+#ifdef KTR
+static char *states[] = {
+ "idle",
+ "listen",
+ "connecting",
+ "mpa_wait_req",
+ "mpa_req_sent",
+ "mpa_req_rcvd",
+ "mpa_rep_sent",
+ "fpdu_mode",
+ "aborting",
+ "closing",
+ "moribund",
+ "dead",
+ NULL,
+};
+#endif
+
+static void
+process_req(struct work_struct *ctx)
+{
+ struct c4iw_ep_common *epc;
+
+ spin_lock(&req_lock);
+ while (!TAILQ_EMPTY(&req_list)) {
+ epc = TAILQ_FIRST(&req_list);
+ TAILQ_REMOVE(&req_list, epc, entry);
+ epc->entry.tqe_prev = NULL;
+ spin_unlock(&req_lock);
+ if (epc->so)
+ process_socket_event((struct c4iw_ep *)epc);
+ c4iw_put_ep(epc);
+ spin_lock(&req_lock);
+ }
+ spin_unlock(&req_lock);
+}
+
+/*
+ * XXX: doesn't belong here in the iWARP driver.
+ * XXX: assumes that the connection was offloaded by cxgbe/t4_tom if TF_TOE is
+ * set. Is this a valid assumption for active open?
+ */
+static int
+set_tcpinfo(struct c4iw_ep *ep)
+{
+ struct socket *so = ep->com.so;
+ struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp;
+ struct toepcb *toep;
+ int rc = 0;
+
+ INP_WLOCK(inp);
+ tp = intotcpcb(inp);
+ if ((tp->t_flags & TF_TOE) == 0) {
+ rc = EINVAL;
+ log(LOG_ERR, "%s: connection not offloaded (so %p, ep %p)\n",
+ __func__, so, ep);
+ goto done;
+ }
+ toep = TOEPCB(so);
+
+ ep->hwtid = toep->tid;
+ ep->snd_seq = tp->snd_nxt;
+ ep->rcv_seq = tp->rcv_nxt;
+ ep->emss = max(tp->t_maxseg, 128);
+done:
+ INP_WUNLOCK(inp);
+ return (rc);
+
+}
+
+static struct rtentry *
+find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
+ __be16 peer_port, u8 tos)
+{
+ struct route iproute;
+ struct sockaddr_in *dst = (struct sockaddr_in *)&iproute.ro_dst;
+
+ CTR5(KTR_IW_CXGBE, "%s:frtB %x, %x, %d, %d", __func__, local_ip,
+ peer_ip, ntohs(local_port), ntohs(peer_port));
+ bzero(&iproute, sizeof iproute);
+ dst->sin_family = AF_INET;
+ dst->sin_len = sizeof *dst;
+ dst->sin_addr.s_addr = peer_ip;
+
+ rtalloc(&iproute);
+ CTR2(KTR_IW_CXGBE, "%s:frtE %p", __func__, (uint64_t)iproute.ro_rt);
+ return iproute.ro_rt;
+}
+
+static int
+close_socket(struct c4iw_ep_common *epc, int close)
+{
+ struct socket *so = epc->so;
+ int rc;
+
+ CTR4(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s", __func__, epc, so,
+ states[epc->state]);
+
+ SOCK_LOCK(so);
+ soupcall_clear(so, SO_RCV);
+ SOCK_UNLOCK(so);
+
+ if (close)
+ rc = soclose(so);
+ else
+ rc = soshutdown(so, SHUT_WR | SHUT_RD);
+ epc->so = NULL;
+
+ return (rc);
+}
+
+static int
+shutdown_socket(struct c4iw_ep_common *epc)
+{
+
+ CTR4(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s", __func__, epc->so, epc,
+ states[epc->state]);
+
+ return (soshutdown(epc->so, SHUT_WR));
+}
+
+static void
+abort_socket(struct c4iw_ep *ep)
+{
+ struct sockopt sopt;
+ int rc;
+ struct linger l;
+
+ CTR4(KTR_IW_CXGBE, "%s ep %p so %p state %s", __func__, ep, ep->com.so,
+ states[ep->com.state]);
+
+ l.l_onoff = 1;
+ l.l_linger = 0;
+
+ /* linger_time of 0 forces RST to be sent */
+ sopt.sopt_dir = SOPT_SET;
+ sopt.sopt_level = SOL_SOCKET;
+ sopt.sopt_name = SO_LINGER;
+ sopt.sopt_val = (caddr_t)&l;
+ sopt.sopt_valsize = sizeof l;
+ sopt.sopt_td = NULL;
+ rc = sosetopt(ep->com.so, &sopt);
+ if (rc) {
+ log(LOG_ERR, "%s: can't set linger to 0, no RST! err %d\n",
+ __func__, rc);
+ }
+}
+
+static void
+process_peer_close(struct c4iw_ep *ep)
+{
+ struct c4iw_qp_attributes attrs;
+ int disconnect = 1;
+ int release = 0;
+
+ CTR4(KTR_IW_CXGBE, "%s:ppcB ep %p so %p state %s", __func__, ep,
+ ep->com.so, states[ep->com.state]);
+
+ mutex_lock(&ep->com.mutex);
+ switch (ep->com.state) {
+
+ case MPA_REQ_WAIT:
+ CTR2(KTR_IW_CXGBE, "%s:ppc1 %p MPA_REQ_WAIT CLOSING",
+ __func__, ep);
+ __state_set(&ep->com, CLOSING);
+ break;
+
+ case MPA_REQ_SENT:
+ CTR2(KTR_IW_CXGBE, "%s:ppc2 %p MPA_REQ_SENT CLOSING",
+ __func__, ep);
+ __state_set(&ep->com, DEAD);
+ connect_reply_upcall(ep, -ECONNABORTED);
+
+ disconnect = 0;
+ STOP_EP_TIMER(ep);
+ close_socket(&ep->com, 0);
+ ep->com.cm_id->rem_ref(ep->com.cm_id);
+ ep->com.cm_id = NULL;
+ ep->com.qp = NULL;
+ release = 1;
+ break;
+
+ case MPA_REQ_RCVD:
+
+ /*
+ * We're gonna mark this puppy DEAD, but keep
+ * the reference on it until the ULP accepts or
+ * rejects the CR.
+ */
+ CTR2(KTR_IW_CXGBE, "%s:ppc3 %p MPA_REQ_RCVD CLOSING",
+ __func__, ep);
+ __state_set(&ep->com, CLOSING);
+ c4iw_get_ep(&ep->com);
+ break;
+
+ case MPA_REP_SENT:
+ CTR2(KTR_IW_CXGBE, "%s:ppc4 %p MPA_REP_SENT CLOSING",
+ __func__, ep);
+ __state_set(&ep->com, CLOSING);
+ break;
+
+ case FPDU_MODE:
+ CTR2(KTR_IW_CXGBE, "%s:ppc5 %p FPDU_MODE CLOSING",
+ __func__, ep);
+ START_EP_TIMER(ep);
+ __state_set(&ep->com, CLOSING);
+ attrs.next_state = C4IW_QP_STATE_CLOSING;
+ c4iw_modify_qp(ep->com.dev, ep->com.qp,
+ C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
+ peer_close_upcall(ep);
+ break;
+
+ case ABORTING:
+ CTR2(KTR_IW_CXGBE, "%s:ppc6 %p ABORTING (disconn)",
+ __func__, ep);
+ disconnect = 0;
+ break;
+
+ case CLOSING:
+ CTR2(KTR_IW_CXGBE, "%s:ppc7 %p CLOSING MORIBUND",
+ __func__, ep);
+ __state_set(&ep->com, MORIBUND);
+ disconnect = 0;
+ break;
+
+ case MORIBUND:
+ CTR2(KTR_IW_CXGBE, "%s:ppc8 %p MORIBUND DEAD", __func__,
+ ep);
+ STOP_EP_TIMER(ep);
+ if (ep->com.cm_id && ep->com.qp) {
+ attrs.next_state = C4IW_QP_STATE_IDLE;
+ c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
+ C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
+ }
+ close_socket(&ep->com, 0);
+ close_complete_upcall(ep);
+ __state_set(&ep->com, DEAD);
+ release = 1;
+ disconnect = 0;
+ break;
+
+ case DEAD:
+ CTR2(KTR_IW_CXGBE, "%s:ppc9 %p DEAD (disconn)",
+ __func__, ep);
+ disconnect = 0;
+ break;
+
+ default:
+ panic("%s: ep %p state %d", __func__, ep,
+ ep->com.state);
+ break;
+ }
+
+ mutex_unlock(&ep->com.mutex);
+
+ if (disconnect) {
+
+ CTR2(KTR_IW_CXGBE, "%s:ppca %p", __func__, ep);
+ c4iw_ep_disconnect(ep, 0, M_NOWAIT);
+ }
+ if (release) {
+
+ CTR2(KTR_IW_CXGBE, "%s:ppcb %p", __func__, ep);
+ c4iw_put_ep(&ep->com);
+ }
+ CTR2(KTR_IW_CXGBE, "%s:ppcE %p", __func__, ep);
+ return;
+}
+
+static void
+process_conn_error(struct c4iw_ep *ep)
+{
+ struct c4iw_qp_attributes attrs;
+ int ret;
+ int state;
+
+ state = state_read(&ep->com);
+ CTR5(KTR_IW_CXGBE, "%s:pceB ep %p so %p so->so_error %u state %s",
+ __func__, ep, ep->com.so, ep->com.so->so_error,
+ states[ep->com.state]);
+
+ switch (state) {
+
+ case MPA_REQ_WAIT:
+ STOP_EP_TIMER(ep);
+ break;
+
+ case MPA_REQ_SENT:
+ STOP_EP_TIMER(ep);
+ connect_reply_upcall(ep, -ECONNRESET);
+ break;
+
+ case MPA_REP_SENT:
+ ep->com.rpl_err = ECONNRESET;
+ CTR1(KTR_IW_CXGBE, "waking up ep %p", ep);
+ break;
+
+ case MPA_REQ_RCVD:
+
+ /*
+ * We're gonna mark this puppy DEAD, but keep
+ * the reference on it until the ULP accepts or
+ * rejects the CR.
+ */
+ c4iw_get_ep(&ep->com);
+ break;
+
+ case MORIBUND:
+ case CLOSING:
+ STOP_EP_TIMER(ep);
+ /*FALLTHROUGH*/
+ case FPDU_MODE:
+
+ if (ep->com.cm_id && ep->com.qp) {
+
+ attrs.next_state = C4IW_QP_STATE_ERROR;
+ ret = c4iw_modify_qp(ep->com.qp->rhp,
+ ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
+ &attrs, 1);
+ if (ret)
+ log(LOG_ERR,
+ "%s - qp <- error failed!\n",
+ __func__);
+ }
+ peer_abort_upcall(ep);
+ break;
+
+ case ABORTING:
+ break;
+
+ case DEAD:
+ CTR2(KTR_IW_CXGBE, "%s so_error %d IN DEAD STATE!!!!",
+ __func__, ep->com.so->so_error);
+ return;
+
+ default:
+ panic("%s: ep %p state %d", __func__, ep, state);
+ break;
+ }
+
+ if (state != ABORTING) {
+
+ CTR2(KTR_IW_CXGBE, "%s:pce1 %p", __func__, ep);
+ close_socket(&ep->com, 0);
+ state_set(&ep->com, DEAD);
+ c4iw_put_ep(&ep->com);
+ }
+ CTR2(KTR_IW_CXGBE, "%s:pceE %p", __func__, ep);
+ return;
+}
+
+static void
+process_close_complete(struct c4iw_ep *ep)
+{
+ struct c4iw_qp_attributes attrs;
+ int release = 0;
+
+ CTR4(KTR_IW_CXGBE, "%s:pccB ep %p so %p state %s", __func__, ep,
+ ep->com.so, states[ep->com.state]);
+
+ /* The cm_id may be null if we failed to connect */
+ mutex_lock(&ep->com.mutex);
+
+ switch (ep->com.state) {
+
+ case CLOSING:
+ CTR2(KTR_IW_CXGBE, "%s:pcc1 %p CLOSING MORIBUND",
+ __func__, ep);
+ __state_set(&ep->com, MORIBUND);
+ break;
+
+ case MORIBUND:
+ CTR2(KTR_IW_CXGBE, "%s:pcc1 %p MORIBUND DEAD", __func__,
+ ep);
+ STOP_EP_TIMER(ep);
+
+ if ((ep->com.cm_id) && (ep->com.qp)) {
+
+ CTR2(KTR_IW_CXGBE, "%s:pcc2 %p QP_STATE_IDLE",
+ __func__, ep);
+ attrs.next_state = C4IW_QP_STATE_IDLE;
+ c4iw_modify_qp(ep->com.dev,
+ ep->com.qp,
+ C4IW_QP_ATTR_NEXT_STATE,
+ &attrs, 1);
+ }
+
+ if (ep->parent_ep) {
+
+ CTR2(KTR_IW_CXGBE, "%s:pcc3 %p", __func__, ep);
+ close_socket(&ep->com, 1);
+ }
+ else {
+
+ CTR2(KTR_IW_CXGBE, "%s:pcc4 %p", __func__, ep);
+ close_socket(&ep->com, 0);
+ }
+ close_complete_upcall(ep);
+ __state_set(&ep->com, DEAD);
+ release = 1;
+ break;
+
+ case ABORTING:
+ CTR2(KTR_IW_CXGBE, "%s:pcc5 %p ABORTING", __func__, ep);
+ break;
+
+ case DEAD:
+ default:
+ CTR2(KTR_IW_CXGBE, "%s:pcc6 %p DEAD", __func__, ep);
+ panic("%s:pcc6 %p DEAD", __func__, ep);
+ break;
+ }
+ mutex_unlock(&ep->com.mutex);
+
+ if (release) {
+
+ CTR2(KTR_IW_CXGBE, "%s:pcc7 %p", __func__, ep);
+ c4iw_put_ep(&ep->com);
+ }
+ CTR2(KTR_IW_CXGBE, "%s:pccE %p", __func__, ep);
+ return;
+}
+
+static void
+init_sock(struct c4iw_ep_common *epc)
+{
+ int rc;
+ struct sockopt sopt;
+ struct socket *so = epc->so;
+ int on = 1;
+
+ SOCK_LOCK(so);
+ soupcall_set(so, SO_RCV, c4iw_so_upcall, epc);
+ so->so_state |= SS_NBIO;
+ SOCK_UNLOCK(so);
+ sopt.sopt_dir = SOPT_SET;
+ sopt.sopt_level = IPPROTO_TCP;
+ sopt.sopt_name = TCP_NODELAY;
+ sopt.sopt_val = (caddr_t)&on;
+ sopt.sopt_valsize = sizeof on;
+ sopt.sopt_td = NULL;
+ rc = sosetopt(so, &sopt);
+ if (rc) {
+ log(LOG_ERR, "%s: can't set TCP_NODELAY on so %p (%d)\n",
+ __func__, so, rc);
+ }
+}
+
+static void
+process_data(struct c4iw_ep *ep)
+{
+ struct sockaddr_in *local, *remote;
+
+ CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s, sb_cc %d", __func__,
+ ep->com.so, ep, states[ep->com.state], ep->com.so->so_rcv.sb_cc);
+
+ switch (state_read(&ep->com)) {
+ case MPA_REQ_SENT:
+ process_mpa_reply(ep);
+ break;
+ case MPA_REQ_WAIT:
+ in_getsockaddr(ep->com.so, (struct sockaddr **)&local);
+ in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote);
+ ep->com.local_addr = *local;
+ ep->com.remote_addr = *remote;
+ free(local, M_SONAME);
+ free(remote, M_SONAME);
+ process_mpa_request(ep);
+ break;
+ default:
+ if (ep->com.so->so_rcv.sb_cc)
+ log(LOG_ERR, "%s: Unexpected streaming data. "
+ "ep %p, state %d, so %p, so_state 0x%x, sb_cc %u\n",
+ __func__, ep, state_read(&ep->com), ep->com.so,
+ ep->com.so->so_state, ep->com.so->so_rcv.sb_cc);
+ break;
+ }
+}
+
+static void
+process_connected(struct c4iw_ep *ep)
+{
+
+ if ((ep->com.so->so_state & SS_ISCONNECTED) && !ep->com.so->so_error)
+ send_mpa_req(ep);
+ else {
+ connect_reply_upcall(ep, -ep->com.so->so_error);
+ close_socket(&ep->com, 0);
+ state_set(&ep->com, DEAD);
+ c4iw_put_ep(&ep->com);
+ }
+}
+
+static struct socket *
+dequeue_socket(struct socket *head, struct sockaddr_in **remote,
+ struct c4iw_ep *child_ep)
+{
+ struct socket *so;
+
+ ACCEPT_LOCK();
+ so = TAILQ_FIRST(&head->so_comp);
+ if (!so) {
+ ACCEPT_UNLOCK();
+ return (NULL);
+ }
+ TAILQ_REMOVE(&head->so_comp, so, so_list);
+ head->so_qlen--;
+ SOCK_LOCK(so);
+ so->so_qstate &= ~SQ_COMP;
+ so->so_head = NULL;
+ soref(so);
+ soupcall_set(so, SO_RCV, c4iw_so_upcall, child_ep);
+ so->so_state |= SS_NBIO;
+ SOCK_UNLOCK(so);
+ ACCEPT_UNLOCK();
+ soaccept(so, (struct sockaddr **)remote);
+
+ return (so);
+}
+
+static void
+process_newconn(struct c4iw_ep *parent_ep)
+{
+ struct socket *child_so;
+ struct c4iw_ep *child_ep;
+ struct sockaddr_in *remote;
+
+ child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT);
+ if (!child_ep) {
+ CTR3(KTR_IW_CXGBE, "%s: parent so %p, parent ep %p, ENOMEM",
+ __func__, parent_ep->com.so, parent_ep);
+ log(LOG_ERR, "%s: failed to allocate ep entry\n", __func__);
+ return;
+ }
+
+ child_so = dequeue_socket(parent_ep->com.so, &remote, child_ep);
+ if (!child_so) {
+ CTR4(KTR_IW_CXGBE,
+ "%s: parent so %p, parent ep %p, child ep %p, dequeue err",
+ __func__, parent_ep->com.so, parent_ep, child_ep);
+ log(LOG_ERR, "%s: failed to dequeue child socket\n", __func__);
+ __free_ep(&child_ep->com);
+ return;
+
+ }
+
+ CTR5(KTR_IW_CXGBE,
+ "%s: parent so %p, parent ep %p, child so %p, child ep %p",
+ __func__, parent_ep->com.so, parent_ep, child_so, child_ep);
+
+ child_ep->com.local_addr = parent_ep->com.local_addr;
+ child_ep->com.remote_addr = *remote;
+ child_ep->com.dev = parent_ep->com.dev;
+ child_ep->com.so = child_so;
+ child_ep->com.cm_id = NULL;
+ child_ep->com.thread = parent_ep->com.thread;
+ child_ep->parent_ep = parent_ep;
+
+ free(remote, M_SONAME);
+ c4iw_get_ep(&parent_ep->com);
+ child_ep->parent_ep = parent_ep;
+ init_timer(&child_ep->timer);
+ state_set(&child_ep->com, MPA_REQ_WAIT);
+ START_EP_TIMER(child_ep);
+
+ /* maybe the request has already been queued up on the socket... */
+ process_mpa_request(child_ep);
+}
+
+static int
+c4iw_so_upcall(struct socket *so, void *arg, int waitflag)
+{
+ struct c4iw_ep *ep = arg;
+
+ spin_lock(&req_lock);
+
+ CTR6(KTR_IW_CXGBE,
+ "%s: so %p, so_state 0x%x, ep %p, ep_state %s, tqe_prev %p",
+ __func__, so, so->so_state, ep, states[ep->com.state],
+ ep->com.entry.tqe_prev);
+
+ if (ep && ep->com.so && !ep->com.entry.tqe_prev) {
+ KASSERT(ep->com.so == so, ("%s: XXX review.", __func__));
+ c4iw_get_ep(&ep->com);
+ TAILQ_INSERT_TAIL(&req_list, &ep->com, entry);
+ queue_work(c4iw_taskq, &c4iw_task);
+ }
+
+ spin_unlock(&req_lock);
+ return (SU_OK);
+}
+
+static void
+process_socket_event(struct c4iw_ep *ep)
+{
+ int state = state_read(&ep->com);
+ struct socket *so = ep->com.so;
+
+ CTR6(KTR_IW_CXGBE, "process_socket_event: so %p, so_state 0x%x, "
+ "so_err %d, sb_state 0x%x, ep %p, ep_state %s", so, so->so_state,
+ so->so_error, so->so_rcv.sb_state, ep, states[state]);
+
+ if (state == CONNECTING) {
+ process_connected(ep);
+ return;
+ }
+
+ if (state == LISTEN) {
+ process_newconn(ep);
+ return;
+ }
+
+ /* connection error */
+ if (so->so_error) {
+ process_conn_error(ep);
+ return;
+ }
+
+ /* peer close */
+ if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && state < CLOSING) {
+ process_peer_close(ep);
+ return;
+ }
+
+ /* close complete */
+ if (so->so_state & SS_ISDISCONNECTED) {
+ process_close_complete(ep);
+ return;
+ }
+
+ /* rx data */
+ process_data(ep);
+}
+
+SYSCTL_NODE(_hw, OID_AUTO, iw_cxgbe, CTLFLAG_RD, 0, "iw_cxgbe driver parameters");
+
+int db_delay_usecs = 1;
+TUNABLE_INT("hw.iw_cxgbe.db_delay_usecs", &db_delay_usecs);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, db_delay_usecs, CTLFLAG_RW, &db_delay_usecs, 0,
+ "Usecs to delay awaiting db fifo to drain");
+
+static int dack_mode = 1;
+TUNABLE_INT("hw.iw_cxgbe.dack_mode", &dack_mode);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, dack_mode, CTLFLAG_RW, &dack_mode, 0,
+ "Delayed ack mode (default = 1)");
+
+int c4iw_max_read_depth = 8;
+TUNABLE_INT("hw.iw_cxgbe.c4iw_max_read_depth", &c4iw_max_read_depth);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_max_read_depth, CTLFLAG_RW, &c4iw_max_read_depth, 0,
+ "Per-connection max ORD/IRD (default = 8)");
+
+static int enable_tcp_timestamps;
+TUNABLE_INT("hw.iw_cxgbe.enable_tcp_timestamps", &enable_tcp_timestamps);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_timestamps, CTLFLAG_RW, &enable_tcp_timestamps, 0,
+ "Enable tcp timestamps (default = 0)");
+
+static int enable_tcp_sack;
+TUNABLE_INT("hw.iw_cxgbe.enable_tcp_sack", &enable_tcp_sack);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_sack, CTLFLAG_RW, &enable_tcp_sack, 0,
+ "Enable tcp SACK (default = 0)");
+
+static int enable_tcp_window_scaling = 1;
+TUNABLE_INT("hw.iw_cxgbe.enable_tcp_window_scaling", &enable_tcp_window_scaling);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_window_scaling, CTLFLAG_RW, &enable_tcp_window_scaling, 0,
+ "Enable tcp window scaling (default = 1)");
+
+int c4iw_debug = 1;
+TUNABLE_INT("hw.iw_cxgbe.c4iw_debug", &c4iw_debug);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_debug, CTLFLAG_RW, &c4iw_debug, 0,
+ "Enable debug logging (default = 0)");
+
+static int peer2peer;
+TUNABLE_INT("hw.iw_cxgbe.peer2peer", &peer2peer);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, peer2peer, CTLFLAG_RW, &peer2peer, 0,
+ "Support peer2peer ULPs (default = 0)");
+
+static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ;
+TUNABLE_INT("hw.iw_cxgbe.p2p_type", &p2p_type);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, p2p_type, CTLFLAG_RW, &p2p_type, 0,
+ "RDMAP opcode to use for the RTR message: 1 = RDMA_READ 0 = RDMA_WRITE (default 1)");
+
+static int ep_timeout_secs = 60;
+TUNABLE_INT("hw.iw_cxgbe.ep_timeout_secs", &ep_timeout_secs);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, ep_timeout_secs, CTLFLAG_RW, &ep_timeout_secs, 0,
+ "CM Endpoint operation timeout in seconds (default = 60)");
+
+static int mpa_rev = 1;
+TUNABLE_INT("hw.iw_cxgbe.mpa_rev", &mpa_rev);
+#ifdef IW_CM_MPAV2
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, mpa_rev, CTLFLAG_RW, &mpa_rev, 0,
+ "MPA Revision, 0 supports amso1100, 1 is RFC0544 spec compliant, 2 is IETF MPA Peer Connect Draft compliant (default = 1)");
+#else
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, mpa_rev, CTLFLAG_RW, &mpa_rev, 0,
+ "MPA Revision, 0 supports amso1100, 1 is RFC0544 spec compliant (default = 1)");
+#endif
+
+static int markers_enabled;
+TUNABLE_INT("hw.iw_cxgbe.markers_enabled", &markers_enabled);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, markers_enabled, CTLFLAG_RW, &markers_enabled, 0,
+ "Enable MPA MARKERS (default(0) = disabled)");
+
+static int crc_enabled = 1;
+TUNABLE_INT("hw.iw_cxgbe.crc_enabled", &crc_enabled);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, crc_enabled, CTLFLAG_RW, &crc_enabled, 0,
+ "Enable MPA CRC (default(1) = enabled)");
+
+static int rcv_win = 256 * 1024;
+TUNABLE_INT("hw.iw_cxgbe.rcv_win", &rcv_win);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, rcv_win, CTLFLAG_RW, &rcv_win, 0,
+ "TCP receive window in bytes (default = 256KB)");
+
+static int snd_win = 128 * 1024;
+TUNABLE_INT("hw.iw_cxgbe.snd_win", &snd_win);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, snd_win, CTLFLAG_RW, &snd_win, 0,
+ "TCP send window in bytes (default = 128KB)");
+
+int db_fc_threshold = 2000;
+TUNABLE_INT("hw.iw_cxgbe.db_fc_threshold", &db_fc_threshold);
+SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, db_fc_threshold, CTLFLAG_RW, &db_fc_threshold, 0,
+ "QP count/threshold that triggers automatic");
+
+static void
+start_ep_timer(struct c4iw_ep *ep)
+{
+
+ if (timer_pending(&ep->timer)) {
+ CTR2(KTR_IW_CXGBE, "%s: ep %p, already started", __func__, ep);
+ printk(KERN_ERR "%s timer already started! ep %p\n", __func__,
+ ep);
+ return;
+ }
+ clear_bit(TIMEOUT, &ep->com.flags);
+ c4iw_get_ep(&ep->com);
+ ep->timer.expires = jiffies + ep_timeout_secs * HZ;
+ ep->timer.data = (unsigned long)ep;
+ ep->timer.function = ep_timeout;
+ add_timer(&ep->timer);
+}
+
+static void
+stop_ep_timer(struct c4iw_ep *ep)
+{
+
+ del_timer_sync(&ep->timer);
+ if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
+ c4iw_put_ep(&ep->com);
+ }
+}
+
+static enum
+c4iw_ep_state state_read(struct c4iw_ep_common *epc)
+{
+ enum c4iw_ep_state state;
+
+ mutex_lock(&epc->mutex);
+ state = epc->state;
+ mutex_unlock(&epc->mutex);
+
+ return (state);
+}
+
+static void
+__state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
+{
+
+ epc->state = new;
+}
+
+static void
+state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
+{
+
+ mutex_lock(&epc->mutex);
+ __state_set(epc, new);
+ mutex_unlock(&epc->mutex);
+}
+
+static void *
+alloc_ep(int size, gfp_t gfp)
+{
+ struct c4iw_ep_common *epc;
+
+ epc = kzalloc(size, gfp);
+ if (epc == NULL)
+ return (NULL);
+
+ kref_init(&epc->kref);
+ mutex_init(&epc->mutex);
+ c4iw_init_wr_wait(&epc->wr_wait);
+
+ return (epc);
+}
+
+void
+__free_ep(struct c4iw_ep_common *epc)
+{
+ CTR2(KTR_IW_CXGBE, "%s:feB %p", __func__, epc);
+ KASSERT(!epc->so, ("%s warning ep->so %p \n", __func__, epc->so));
+ KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list!\n", __func__, epc));
+ free(epc, M_DEVBUF);
+ CTR2(KTR_IW_CXGBE, "%s:feE %p", __func__, epc);
+}
+
+void _c4iw_free_ep(struct kref *kref)
+{
+ struct c4iw_ep *ep;
+ struct c4iw_ep_common *epc;
+
+ ep = container_of(kref, struct c4iw_ep, com.kref);
+ epc = &ep->com;
+ KASSERT(!epc->so, ("%s ep->so %p", __func__, epc->so));
+ KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list",
+ __func__, epc));
+ kfree(ep);
+}
+
+static void release_ep_resources(struct c4iw_ep *ep)
+{
+ CTR2(KTR_IW_CXGBE, "%s:rerB %p", __func__, ep);
+ set_bit(RELEASE_RESOURCES, &ep->com.flags);
+ c4iw_put_ep(&ep->com);
+ CTR2(KTR_IW_CXGBE, "%s:rerE %p", __func__, ep);
+}
+
+static void
+send_mpa_req(struct c4iw_ep *ep)
+{
+ int mpalen;
+ struct mpa_message *mpa;
+ struct mpa_v2_conn_params mpa_v2_params;
+ struct mbuf *m;
+ char mpa_rev_to_use = mpa_rev;
+ int err;
+
+ if (ep->retry_with_mpa_v1)
+ mpa_rev_to_use = 1;
+ mpalen = sizeof(*mpa) + ep->plen;
+ if (mpa_rev_to_use == 2)
+ mpalen += sizeof(struct mpa_v2_conn_params);
+
+ if (mpalen > MHLEN)
+ CXGBE_UNIMPLEMENTED(__func__);
+
+ m = m_gethdr(M_NOWAIT, MT_DATA);
+ if (m == NULL) {
+ connect_reply_upcall(ep, -ENOMEM);
+ return;
+ }
+
+ mpa = mtod(m, struct mpa_message *);
+ m->m_len = mpalen;
+ m->m_pkthdr.len = mpalen;
+ memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
+ mpa->flags = (crc_enabled ? MPA_CRC : 0) |
+ (markers_enabled ? MPA_MARKERS : 0) |
+ (mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0);
+ mpa->private_data_size = htons(ep->plen);
+ mpa->revision = mpa_rev_to_use;
+
+ if (mpa_rev_to_use == 1) {
+ ep->tried_with_mpa_v1 = 1;
+ ep->retry_with_mpa_v1 = 0;
+ }
+
+ if (mpa_rev_to_use == 2) {
+ mpa->private_data_size +=
+ htons(sizeof(struct mpa_v2_conn_params));
+ mpa_v2_params.ird = htons((u16)ep->ird);
+ mpa_v2_params.ord = htons((u16)ep->ord);
+
+ if (peer2peer) {
+ mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
+
+ if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) {
+ mpa_v2_params.ord |=
+ htons(MPA_V2_RDMA_WRITE_RTR);
+ } else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) {
+ mpa_v2_params.ord |=
+ htons(MPA_V2_RDMA_READ_RTR);
+ }
+ }
+ memcpy(mpa->private_data, &mpa_v2_params,
+ sizeof(struct mpa_v2_conn_params));
+
+ if (ep->plen) {
+
+ memcpy(mpa->private_data +
+ sizeof(struct mpa_v2_conn_params),
+ ep->mpa_pkt + sizeof(*mpa), ep->plen);
+ }
+ } else {
+
+ if (ep->plen)
+ memcpy(mpa->private_data,
+ ep->mpa_pkt + sizeof(*mpa), ep->plen);
+ CTR2(KTR_IW_CXGBE, "%s:smr7 %p", __func__, ep);
+ }
+
+ err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread);
+ if (err) {
+ connect_reply_upcall(ep, -ENOMEM);
+ return;
+ }
+
+ START_EP_TIMER(ep);
+ state_set(&ep->com, MPA_REQ_SENT);
+ ep->mpa_attr.initiator = 1;
+}
+
+static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
+{
+ int mpalen ;
+ struct mpa_message *mpa;
+ struct mpa_v2_conn_params mpa_v2_params;
+ struct mbuf *m;
+ int err;
+
+ CTR4(KTR_IW_CXGBE, "%s:smrejB %p %u %d", __func__, ep, ep->hwtid,
+ ep->plen);
+
+ mpalen = sizeof(*mpa) + plen;
+
+ if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
+
+ mpalen += sizeof(struct mpa_v2_conn_params);
+ CTR4(KTR_IW_CXGBE, "%s:smrej1 %p %u %d", __func__, ep,
+ ep->mpa_attr.version, mpalen);
+ }
+
+ if (mpalen > MHLEN)
+ CXGBE_UNIMPLEMENTED(__func__);
+
+ m = m_gethdr(M_NOWAIT, MT_DATA);
+ if (m == NULL) {
+
+ printf("%s - cannot alloc mbuf!\n", __func__);
+ CTR2(KTR_IW_CXGBE, "%s:smrej2 %p", __func__, ep);
+ return (-ENOMEM);
+ }
+
+
+ mpa = mtod(m, struct mpa_message *);
+ m->m_len = mpalen;
+ m->m_pkthdr.len = mpalen;
+ memset(mpa, 0, sizeof(*mpa));
+ memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
+ mpa->flags = MPA_REJECT;
+ mpa->revision = mpa_rev;
+ mpa->private_data_size = htons(plen);
+
+ if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
+
+ mpa->flags |= MPA_ENHANCED_RDMA_CONN;
+ mpa->private_data_size +=
+ htons(sizeof(struct mpa_v2_conn_params));
+ mpa_v2_params.ird = htons(((u16)ep->ird) |
+ (peer2peer ? MPA_V2_PEER2PEER_MODEL :
+ 0));
+ mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ?
+ (p2p_type ==
+ FW_RI_INIT_P2PTYPE_RDMA_WRITE ?
+ MPA_V2_RDMA_WRITE_RTR : p2p_type ==
+ FW_RI_INIT_P2PTYPE_READ_REQ ?
+ MPA_V2_RDMA_READ_RTR : 0) : 0));
+ memcpy(mpa->private_data, &mpa_v2_params,
+ sizeof(struct mpa_v2_conn_params));
+
+ if (ep->plen)
+ memcpy(mpa->private_data +
+ sizeof(struct mpa_v2_conn_params), pdata, plen);
+ CTR5(KTR_IW_CXGBE, "%s:smrej3 %p %d %d %d", __func__, ep,
+ mpa_v2_params.ird, mpa_v2_params.ord, ep->plen);
+ } else
+ if (plen)
+ memcpy(mpa->private_data, pdata, plen);
+
+ err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread);
+ if (!err)
+ ep->snd_seq += mpalen;
+ CTR4(KTR_IW_CXGBE, "%s:smrejE %p %u %d", __func__, ep, ep->hwtid, err);
+ return err;
+}
+
+static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
+{
+ int mpalen;
+ struct mpa_message *mpa;
+ struct mbuf *m;
+ struct mpa_v2_conn_params mpa_v2_params;
+ int err;
+
+ CTR2(KTR_IW_CXGBE, "%s:smrepB %p", __func__, ep);
+
+ mpalen = sizeof(*mpa) + plen;
+
+ if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
+
+ CTR3(KTR_IW_CXGBE, "%s:smrep1 %p %d", __func__, ep,
+ ep->mpa_attr.version);
+ mpalen += sizeof(struct mpa_v2_conn_params);
+ }
+
+ if (mpalen > MHLEN)
+ CXGBE_UNIMPLEMENTED(__func__);
+
+ m = m_gethdr(M_NOWAIT, MT_DATA);
+ if (m == NULL) {
+
+ CTR2(KTR_IW_CXGBE, "%s:smrep2 %p", __func__, ep);
+ printf("%s - cannot alloc mbuf!\n", __func__);
+ return (-ENOMEM);
+ }
+
+
+ mpa = mtod(m, struct mpa_message *);
+ m->m_len = mpalen;
+ m->m_pkthdr.len = mpalen;
+ memset(mpa, 0, sizeof(*mpa));
+ memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
+ mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
+ (markers_enabled ? MPA_MARKERS : 0);
+ mpa->revision = ep->mpa_attr.version;
+ mpa->private_data_size = htons(plen);
+
+ if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
+
+ mpa->flags |= MPA_ENHANCED_RDMA_CONN;
+ mpa->private_data_size +=
+ htons(sizeof(struct mpa_v2_conn_params));
+ mpa_v2_params.ird = htons((u16)ep->ird);
+ mpa_v2_params.ord = htons((u16)ep->ord);
+ CTR5(KTR_IW_CXGBE, "%s:smrep3 %p %d %d %d", __func__, ep,
+ ep->mpa_attr.version, mpa_v2_params.ird, mpa_v2_params.ord);
+
+ if (peer2peer && (ep->mpa_attr.p2p_type !=
+ FW_RI_INIT_P2PTYPE_DISABLED)) {
+
+ mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
+
+ if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) {
+
+ mpa_v2_params.ord |=
+ htons(MPA_V2_RDMA_WRITE_RTR);
+ CTR5(KTR_IW_CXGBE, "%s:smrep4 %p %d %d %d",
+ __func__, ep, p2p_type, mpa_v2_params.ird,
+ mpa_v2_params.ord);
+ }
+ else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) {
+
+ mpa_v2_params.ord |=
+ htons(MPA_V2_RDMA_READ_RTR);
+ CTR5(KTR_IW_CXGBE, "%s:smrep5 %p %d %d %d",
+ __func__, ep, p2p_type, mpa_v2_params.ird,
+ mpa_v2_params.ord);
+ }
+ }
+
+ memcpy(mpa->private_data, &mpa_v2_params,
+ sizeof(struct mpa_v2_conn_params));
+
+ if (ep->plen)
+ memcpy(mpa->private_data +
+ sizeof(struct mpa_v2_conn_params), pdata, plen);
+ } else
+ if (plen)
+ memcpy(mpa->private_data, pdata, plen);
+
+ state_set(&ep->com, MPA_REP_SENT);
+ ep->snd_seq += mpalen;
+ err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT,
+ ep->com.thread);
+ CTR3(KTR_IW_CXGBE, "%s:smrepE %p %d", __func__, ep, err);
+ return err;
+}
+
+
+
+static void close_complete_upcall(struct c4iw_ep *ep)
+{
+ struct iw_cm_event event;
+
+ CTR2(KTR_IW_CXGBE, "%s:ccuB %p", __func__, ep);
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_CLOSE;
+
+ if (ep->com.cm_id) {
+
+ CTR2(KTR_IW_CXGBE, "%s:ccu1 %1", __func__, ep);
+ ep->com.cm_id->event_handler(ep->com.cm_id, &event);
+ ep->com.cm_id->rem_ref(ep->com.cm_id);
+ ep->com.cm_id = NULL;
+ ep->com.qp = NULL;
+ set_bit(CLOSE_UPCALL, &ep->com.history);
+ }
+ CTR2(KTR_IW_CXGBE, "%s:ccuE %p", __func__, ep);
+}
+
+static int abort_connection(struct c4iw_ep *ep)
+{
+ int err;
+
+ CTR2(KTR_IW_CXGBE, "%s:abB %p", __func__, ep);
+ close_complete_upcall(ep);
+ state_set(&ep->com, ABORTING);
+ abort_socket(ep);
+ err = close_socket(&ep->com, 0);
+ set_bit(ABORT_CONN, &ep->com.history);
+ CTR2(KTR_IW_CXGBE, "%s:abE %p", __func__, ep);
+ return err;
+}
+
+static void peer_close_upcall(struct c4iw_ep *ep)
+{
+ struct iw_cm_event event;
+
+ CTR2(KTR_IW_CXGBE, "%s:pcuB %p", __func__, ep);
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_DISCONNECT;
+
+ if (ep->com.cm_id) {
+
+ CTR2(KTR_IW_CXGBE, "%s:pcu1 %p", __func__, ep);
+ ep->com.cm_id->event_handler(ep->com.cm_id, &event);
+ set_bit(DISCONN_UPCALL, &ep->com.history);
+ }
+ CTR2(KTR_IW_CXGBE, "%s:pcuE %p", __func__, ep);
+}
+
+static void peer_abort_upcall(struct c4iw_ep *ep)
+{
+ struct iw_cm_event event;
+
+ CTR2(KTR_IW_CXGBE, "%s:pauB %p", __func__, ep);
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_CLOSE;
+ event.status = -ECONNRESET;
+
+ if (ep->com.cm_id) {
+
+ CTR2(KTR_IW_CXGBE, "%s:pau1 %p", __func__, ep);
+ ep->com.cm_id->event_handler(ep->com.cm_id, &event);
+ ep->com.cm_id->rem_ref(ep->com.cm_id);
+ ep->com.cm_id = NULL;
+ ep->com.qp = NULL;
+ set_bit(ABORT_UPCALL, &ep->com.history);
+ }
+ CTR2(KTR_IW_CXGBE, "%s:pauE %p", __func__, ep);
+}
+
+static void connect_reply_upcall(struct c4iw_ep *ep, int status)
+{
+ struct iw_cm_event event;
+
+ CTR3(KTR_IW_CXGBE, "%s:cruB %p", __func__, ep, status);
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_CONNECT_REPLY;
+ event.status = (status ==-ECONNABORTED)?-ECONNRESET: status;
+ event.local_addr = ep->com.local_addr;
+ event.remote_addr = ep->com.remote_addr;
+
+ if ((status == 0) || (status == -ECONNREFUSED)) {
+
+ if (!ep->tried_with_mpa_v1) {
+
+ CTR2(KTR_IW_CXGBE, "%s:cru1 %p", __func__, ep);
+ /* this means MPA_v2 is used */
+ event.private_data_len = ep->plen -
+ sizeof(struct mpa_v2_conn_params);
+ event.private_data = ep->mpa_pkt +
+ sizeof(struct mpa_message) +
+ sizeof(struct mpa_v2_conn_params);
+ } else {
+
+ CTR2(KTR_IW_CXGBE, "%s:cru2 %p", __func__, ep);
+ /* this means MPA_v1 is used */
+ event.private_data_len = ep->plen;
+ event.private_data = ep->mpa_pkt +
+ sizeof(struct mpa_message);
+ }
+ }
+
+ if (ep->com.cm_id) {
+
+ CTR2(KTR_IW_CXGBE, "%s:cru3 %p", __func__, ep);
+ set_bit(CONN_RPL_UPCALL, &ep->com.history);
+ ep->com.cm_id->event_handler(ep->com.cm_id, &event);
+ }
+
+ if(status == -ECONNABORTED) {
+
+ CTR3(KTR_IW_CXGBE, "%s:cruE %p %d", __func__, ep, status);
+ return;
+ }
+
+ if (status < 0) {
+
+ CTR3(KTR_IW_CXGBE, "%s:cru4 %p %d", __func__, ep, status);
+ ep->com.cm_id->rem_ref(ep->com.cm_id);
+ ep->com.cm_id = NULL;
+ ep->com.qp = NULL;
+ }
+
+ CTR2(KTR_IW_CXGBE, "%s:cruE %p", __func__, ep);
+}
+
+static void connect_request_upcall(struct c4iw_ep *ep)
+{
+ struct iw_cm_event event;
+
+ CTR3(KTR_IW_CXGBE, "%s: ep %p, mpa_v1 %d", __func__, ep,
+ ep->tried_with_mpa_v1);
+
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_CONNECT_REQUEST;
+ event.local_addr = ep->com.local_addr;
+ event.remote_addr = ep->com.remote_addr;
+ event.provider_data = ep;
+ event.so = ep->com.so;
+
+ if (!ep->tried_with_mpa_v1) {
+ /* this means MPA_v2 is used */
+#ifdef IW_CM_MPAV2
+ event.ord = ep->ord;
+ event.ird = ep->ird;
+#endif
+ event.private_data_len = ep->plen -
+ sizeof(struct mpa_v2_conn_params);
+ event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) +
+ sizeof(struct mpa_v2_conn_params);
+ } else {
+
+ /* this means MPA_v1 is used. Send max supported */
+#ifdef IW_CM_MPAV2
+ event.ord = c4iw_max_read_depth;
+ event.ird = c4iw_max_read_depth;
+#endif
+ event.private_data_len = ep->plen;
+ event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
+ }
+
+ c4iw_get_ep(&ep->com);
+ ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id,
+ &event);
+ set_bit(CONNREQ_UPCALL, &ep->com.history);
+ c4iw_put_ep(&ep->parent_ep->com);
+}
+
+static void established_upcall(struct c4iw_ep *ep)
+{
+ struct iw_cm_event event;
+
+ CTR2(KTR_IW_CXGBE, "%s:euB %p", __func__, ep);
+ memset(&event, 0, sizeof(event));
+ event.event = IW_CM_EVENT_ESTABLISHED;
+#ifdef IW_CM_MPAV2
+ event.ird = ep->ird;
+ event.ord = ep->ord;
+#endif
+ if (ep->com.cm_id) {
+
+ CTR2(KTR_IW_CXGBE, "%s:eu1 %p", __func__, ep);
+ ep->com.cm_id->event_handler(ep->com.cm_id, &event);
+ set_bit(ESTAB_UPCALL, &ep->com.history);
+ }
+ CTR2(KTR_IW_CXGBE, "%s:euE %p", __func__, ep);
+}
+
+
+
+static void process_mpa_reply(struct c4iw_ep *ep)
+{
+ struct mpa_message *mpa;
+ struct mpa_v2_conn_params *mpa_v2_params;
+ u16 plen;
+ u16 resp_ird, resp_ord;
+ u8 rtr_mismatch = 0, insuff_ird = 0;
+ struct c4iw_qp_attributes attrs;
+ enum c4iw_qp_attr_mask mask;
+ int err;
+ struct mbuf *top, *m;
+ int flags = MSG_DONTWAIT;
+ struct uio uio;
+
+ CTR2(KTR_IW_CXGBE, "%s:pmrB %p", __func__, ep);
+
+ /*
+ * Stop mpa timer. If it expired, then the state has
+ * changed and we bail since ep_timeout already aborted
+ * the connection.
+ */
+ STOP_EP_TIMER(ep);
+ if (state_read(&ep->com) != MPA_REQ_SENT)
+ return;
+
+ uio.uio_resid = 1000000;
+ uio.uio_td = ep->com.thread;
+ err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags);
+
+ if (err) {
+
+ if (err == EWOULDBLOCK) {
+
+ CTR2(KTR_IW_CXGBE, "%s:pmr1 %p", __func__, ep);
+ START_EP_TIMER(ep);
+ return;
+ }
+ err = -err;
+ CTR2(KTR_IW_CXGBE, "%s:pmr2 %p", __func__, ep);
+ goto err;
+ }
+
+ if (ep->com.so->so_rcv.sb_mb) {
+
+ CTR2(KTR_IW_CXGBE, "%s:pmr3 %p", __func__, ep);
+ printf("%s data after soreceive called! so %p sb_mb %p top %p\n",
+ __func__, ep->com.so, ep->com.so->so_rcv.sb_mb, top);
+ }
+
+ m = top;
+
+ do {
+
+ CTR2(KTR_IW_CXGBE, "%s:pmr4 %p", __func__, ep);
+ /*
+ * If we get more than the supported amount of private data
+ * then we must fail this connection.
+ */
+ if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) {
+
+ CTR3(KTR_IW_CXGBE, "%s:pmr5 %p %d", __func__, ep,
+ ep->mpa_pkt_len + m->m_len);
+ err = (-EINVAL);
+ goto err;
+ }
+
+ /*
+ * copy the new data into our accumulation buffer.
+ */
+ m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len]));
+ ep->mpa_pkt_len += m->m_len;
+ if (!m->m_next)
+ m = m->m_nextpkt;
+ else
+ m = m->m_next;
+ } while (m);
+
+ m_freem(top);
+ /*
+ * if we don't even have the mpa message, then bail.
+ */
+ if (ep->mpa_pkt_len < sizeof(*mpa))
+ return;
+ mpa = (struct mpa_message *) ep->mpa_pkt;
+
+ /* Validate MPA header. */
+ if (mpa->revision > mpa_rev) {
+
+ CTR4(KTR_IW_CXGBE, "%s:pmr6 %p %d %d", __func__, ep,
+ mpa->revision, mpa_rev);
+ printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d, "
+ " Received = %d\n", __func__, mpa_rev, mpa->revision);
+ err = -EPROTO;
+ goto err;
+ }
+
+ if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
+
+ CTR2(KTR_IW_CXGBE, "%s:pmr7 %p", __func__, ep);
+ err = -EPROTO;
+ goto err;
+ }
+
+ plen = ntohs(mpa->private_data_size);
+
+ /*
+ * Fail if there's too much private data.
+ */
+ if (plen > MPA_MAX_PRIVATE_DATA) {
+
+ CTR2(KTR_IW_CXGBE, "%s:pmr8 %p", __func__, ep);
+ err = -EPROTO;
+ goto err;
+ }
+
+ /*
+ * If plen does not account for pkt size
+ */
+ if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
+
+ CTR2(KTR_IW_CXGBE, "%s:pmr9 %p", __func__, ep);
+ err = -EPROTO;
+ goto err;
+ }
+
+ ep->plen = (u8) plen;
+
+ /*
+ * If we don't have all the pdata yet, then bail.
+ * We'll continue process when more data arrives.
+ */
+ if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) {
+
+ CTR2(KTR_IW_CXGBE, "%s:pmra %p", __func__, ep);
+ return;
+ }
+
+ if (mpa->flags & MPA_REJECT) {
+
+ CTR2(KTR_IW_CXGBE, "%s:pmrb %p", __func__, ep);
+ err = -ECONNREFUSED;
+ goto err;
+ }
+
+ /*
+ * If we get here we have accumulated the entire mpa
+ * start reply message including private data. And
+ * the MPA header is valid.
+ */
+ state_set(&ep->com, FPDU_MODE);
+ ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
+ ep->mpa_attr.recv_marker_enabled = markers_enabled;
+ ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
+ ep->mpa_attr.version = mpa->revision;
+ ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
+
+ if (mpa->revision == 2) {
+
+ CTR2(KTR_IW_CXGBE, "%s:pmrc %p", __func__, ep);
+ ep->mpa_attr.enhanced_rdma_conn =
+ mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
+
+ if (ep->mpa_attr.enhanced_rdma_conn) {
+
+ CTR2(KTR_IW_CXGBE, "%s:pmrd %p", __func__, ep);
+ mpa_v2_params = (struct mpa_v2_conn_params *)
+ (ep->mpa_pkt + sizeof(*mpa));
+ resp_ird = ntohs(mpa_v2_params->ird) &
+ MPA_V2_IRD_ORD_MASK;
+ resp_ord = ntohs(mpa_v2_params->ord) &
+ MPA_V2_IRD_ORD_MASK;
+
+ /*
+ * This is a double-check. Ideally, below checks are
+ * not required since ird/ord stuff has been taken
+ * care of in c4iw_accept_cr
+ */
+ if ((ep->ird < resp_ord) || (ep->ord > resp_ird)) {
+
+ CTR2(KTR_IW_CXGBE, "%s:pmre %p", __func__, ep);
+ err = -ENOMEM;
+ ep->ird = resp_ord;
+ ep->ord = resp_ird;
+ insuff_ird = 1;
+ }
+
+ if (ntohs(mpa_v2_params->ird) &
+ MPA_V2_PEER2PEER_MODEL) {
+
+ CTR2(KTR_IW_CXGBE, "%s:pmrf %p", __func__, ep);
+ if (ntohs(mpa_v2_params->ord) &
+ MPA_V2_RDMA_WRITE_RTR) {
+
+ CTR2(KTR_IW_CXGBE, "%s:pmrg %p", __func__, ep);
+ ep->mpa_attr.p2p_type =
+ FW_RI_INIT_P2PTYPE_RDMA_WRITE;
+ }
+ else if (ntohs(mpa_v2_params->ord) &
+ MPA_V2_RDMA_READ_RTR) {
+
+ CTR2(KTR_IW_CXGBE, "%s:pmrh %p", __func__, ep);
+ ep->mpa_attr.p2p_type =
+ FW_RI_INIT_P2PTYPE_READ_REQ;
+ }
+ }
+ }
+ } else {
+
+ CTR2(KTR_IW_CXGBE, "%s:pmri %p", __func__, ep);
+
+ if (mpa->revision == 1) {
+
+ CTR2(KTR_IW_CXGBE, "%s:pmrj %p", __func__, ep);
+
+ if (peer2peer) {
+
+ CTR2(KTR_IW_CXGBE, "%s:pmrk %p", __func__, ep);
+ ep->mpa_attr.p2p_type = p2p_type;
+ }
+ }
+ }
+
+ if (set_tcpinfo(ep)) {
+
+ CTR2(KTR_IW_CXGBE, "%s:pmrl %p", __func__, ep);
+ printf("%s set_tcpinfo error\n", __func__);
+ goto err;
+ }
+
+ CTR6(KTR_IW_CXGBE, "%s - crc_enabled = %d, recv_marker_enabled = %d, "
+ "xmit_marker_enabled = %d, version = %d p2p_type = %d", __func__,
+ ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
+ ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
+ ep->mpa_attr.p2p_type);
+
+ /*
+ * If responder's RTR does not match with that of initiator, assign
+ * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not
+ * generated when moving QP to RTS state.
+ * A TERM message will be sent after QP has moved to RTS state
+ */
+ if ((ep->mpa_attr.version == 2) && peer2peer &&
+ (ep->mpa_attr.p2p_type != p2p_type)) {
+
+ CTR2(KTR_IW_CXGBE, "%s:pmrm %p", __func__, ep);
+ ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
+ rtr_mismatch = 1;
+ }
+
+
+ //ep->ofld_txq = TOEPCB(ep->com.so)->ofld_txq;
+ attrs.mpa_attr = ep->mpa_attr;
+ attrs.max_ird = ep->ird;
+ attrs.max_ord = ep->ord;
+ attrs.llp_stream_handle = ep;
+ attrs.next_state = C4IW_QP_STATE_RTS;
+
+ mask = C4IW_QP_ATTR_NEXT_STATE |
+ C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_MPA_ATTR |
+ C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_MAX_ORD;
+
+ /* bind QP and TID with INIT_WR */
+ err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1);
+
+ if (err) {
+
+ CTR2(KTR_IW_CXGBE, "%s:pmrn %p", __func__, ep);
+ goto err;
+ }
+
+ /*
+ * If responder's RTR requirement did not match with what initiator
+ * supports, generate TERM message
+ */
+ if (rtr_mismatch) {
+
+ CTR2(KTR_IW_CXGBE, "%s:pmro %p", __func__, ep);
+ printk(KERN_ERR "%s: RTR mismatch, sending TERM\n", __func__);
+ attrs.layer_etype = LAYER_MPA | DDP_LLP;
+ attrs.ecode = MPA_NOMATCH_RTR;
+ attrs.next_state = C4IW_QP_STATE_TERMINATE;
+ err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
+ C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
+ err = -ENOMEM;
+ goto out;
+ }
+
+ /*
+ * Generate TERM if initiator IRD is not sufficient for responder
+ * provided ORD. Currently, we do the same behaviour even when
+ * responder provided IRD is also not sufficient as regards to
+ * initiator ORD.
+ */
+ if (insuff_ird) {
+
+ CTR2(KTR_IW_CXGBE, "%s:pmrp %p", __func__, ep);
+ printk(KERN_ERR "%s: Insufficient IRD, sending TERM\n",
+ __func__);
+ attrs.layer_etype = LAYER_MPA | DDP_LLP;
+ attrs.ecode = MPA_INSUFF_IRD;
+ attrs.next_state = C4IW_QP_STATE_TERMINATE;
+ err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
+ C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
+ err = -ENOMEM;
+ goto out;
+ }
+ goto out;
+err:
+ state_set(&ep->com, ABORTING);
+ abort_connection(ep);
+out:
+ connect_reply_upcall(ep, err);
+ CTR2(KTR_IW_CXGBE, "%s:pmrE %p", __func__, ep);
+ return;
+}
+
+static void
+process_mpa_request(struct c4iw_ep *ep)
+{
+ struct mpa_message *mpa;
+ u16 plen;
+ int flags = MSG_DONTWAIT;
+ int rc;
+ struct iovec iov;
+ struct uio uio;
+ enum c4iw_ep_state state = state_read(&ep->com);
+
+ CTR3(KTR_IW_CXGBE, "%s: ep %p, state %s", __func__, ep, states[state]);
+
+ if (state != MPA_REQ_WAIT)
+ return;
+
+ iov.iov_base = &ep->mpa_pkt[ep->mpa_pkt_len];
+ iov.iov_len = sizeof(ep->mpa_pkt) - ep->mpa_pkt_len;
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ uio.uio_offset = 0;
+ uio.uio_resid = sizeof(ep->mpa_pkt) - ep->mpa_pkt_len;
+ uio.uio_segflg = UIO_SYSSPACE;
+ uio.uio_rw = UIO_READ;
+ uio.uio_td = NULL; /* uio.uio_td = ep->com.thread; */
+
+ rc = soreceive(ep->com.so, NULL, &uio, NULL, NULL, &flags);
+ if (rc == EAGAIN)
+ return;
+ else if (rc) {
+abort:
+ STOP_EP_TIMER(ep);
+ abort_connection(ep);
+ return;
+ }
+ KASSERT(uio.uio_offset > 0, ("%s: sorecieve on so %p read no data",
+ __func__, ep->com.so));
+ ep->mpa_pkt_len += uio.uio_offset;
+
+ /*
+ * If we get more than the supported amount of private data then we must
+ * fail this connection. XXX: check so_rcv->sb_cc, or peek with another
+ * soreceive, or increase the size of mpa_pkt by 1 and abort if the last
+ * byte is filled by the soreceive above.
+ */
+
+ /* Don't even have the MPA message. Wait for more data to arrive. */
+ if (ep->mpa_pkt_len < sizeof(*mpa))
+ return;
+ mpa = (struct mpa_message *) ep->mpa_pkt;
+
+ /*
+ * Validate MPA Header.
+ */
+ if (mpa->revision > mpa_rev) {
+ log(LOG_ERR, "%s: MPA version mismatch. Local = %d,"
+ " Received = %d\n", __func__, mpa_rev, mpa->revision);
+ goto abort;
+ }
+
+ if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)))
+ goto abort;
+
+ /*
+ * Fail if there's too much private data.
+ */
+ plen = ntohs(mpa->private_data_size);
+ if (plen > MPA_MAX_PRIVATE_DATA)
+ goto abort;
+
+ /*
+ * If plen does not account for pkt size
+ */
+ if (ep->mpa_pkt_len > (sizeof(*mpa) + plen))
+ goto abort;
+
+ ep->plen = (u8) plen;
+
+ /*
+ * If we don't have all the pdata yet, then bail.
+ */
+ if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
+ return;
+
+ /*
+ * If we get here we have accumulated the entire mpa
+ * start reply message including private data.
+ */
+ ep->mpa_attr.initiator = 0;
+ ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
+ ep->mpa_attr.recv_marker_enabled = markers_enabled;
+ ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
+ ep->mpa_attr.version = mpa->revision;
+ if (mpa->revision == 1)
+ ep->tried_with_mpa_v1 = 1;
+ ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
+
+ if (mpa->revision == 2) {
+ ep->mpa_attr.enhanced_rdma_conn =
+ mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
+ if (ep->mpa_attr.enhanced_rdma_conn) {
+ struct mpa_v2_conn_params *mpa_v2_params;
+ u16 ird, ord;
+
+ mpa_v2_params = (void *)&ep->mpa_pkt[sizeof(*mpa)];
+ ird = ntohs(mpa_v2_params->ird);
+ ord = ntohs(mpa_v2_params->ord);
+
+ ep->ird = ird & MPA_V2_IRD_ORD_MASK;
+ ep->ord = ord & MPA_V2_IRD_ORD_MASK;
+ if (ird & MPA_V2_PEER2PEER_MODEL && peer2peer) {
+ if (ord & MPA_V2_RDMA_WRITE_RTR) {
+ ep->mpa_attr.p2p_type =
+ FW_RI_INIT_P2PTYPE_RDMA_WRITE;
+ } else if (ord & MPA_V2_RDMA_READ_RTR) {
+ ep->mpa_attr.p2p_type =
+ FW_RI_INIT_P2PTYPE_READ_REQ;
+ }
+ }
+ }
+ } else if (mpa->revision == 1 && peer2peer)
+ ep->mpa_attr.p2p_type = p2p_type;
+
+ if (set_tcpinfo(ep))
+ goto abort;
+
+ CTR5(KTR_IW_CXGBE, "%s: crc_enabled = %d, recv_marker_enabled = %d, "
+ "xmit_marker_enabled = %d, version = %d", __func__,
+ ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
+ ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
+
+ state_set(&ep->com, MPA_REQ_RCVD);
+ STOP_EP_TIMER(ep);
+
+ /* drive upcall */
+ mutex_lock(&ep->parent_ep->com.mutex);
+ if (ep->parent_ep->com.state != DEAD)
+ connect_request_upcall(ep);
+ else
+ abort_connection(ep);
+ mutex_unlock(&ep->parent_ep->com.mutex);
+}
+
+/*
+ * Upcall from the adapter indicating data has been transmitted.
+ * For us its just the single MPA request or reply. We can now free
+ * the skb holding the mpa message.
+ */
+int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
+{
+ int err;
+ struct c4iw_ep *ep = to_ep(cm_id);
+ CTR2(KTR_IW_CXGBE, "%s:crcB %p", __func__, ep);
+
+ if (state_read(&ep->com) == DEAD) {
+
+ CTR2(KTR_IW_CXGBE, "%s:crc1 %p", __func__, ep);
+ c4iw_put_ep(&ep->com);
+ return -ECONNRESET;
+ }
+ set_bit(ULP_REJECT, &ep->com.history);
+ BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
+
+ if (mpa_rev == 0) {
+
+ CTR2(KTR_IW_CXGBE, "%s:crc2 %p", __func__, ep);
+ abort_connection(ep);
+ }
+ else {
+
+ CTR2(KTR_IW_CXGBE, "%s:crc3 %p", __func__, ep);
+ err = send_mpa_reject(ep, pdata, pdata_len);
+ err = soshutdown(ep->com.so, 3);
+ }
+ c4iw_put_ep(&ep->com);
+ CTR2(KTR_IW_CXGBE, "%s:crc4 %p", __func__, ep);
+ return 0;
+}
+
+int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+{
+ int err;
+ struct c4iw_qp_attributes attrs;
+ enum c4iw_qp_attr_mask mask;
+ struct c4iw_ep *ep = to_ep(cm_id);
+ struct c4iw_dev *h = to_c4iw_dev(cm_id->device);
+ struct c4iw_qp *qp = get_qhp(h, conn_param->qpn);
+
+ CTR2(KTR_IW_CXGBE, "%s:cacB %p", __func__, ep);
+
+ if (state_read(&ep->com) == DEAD) {
+
+ CTR2(KTR_IW_CXGBE, "%s:cac1 %p", __func__, ep);
+ err = -ECONNRESET;
+ goto err;
+ }
+
+ BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
+ BUG_ON(!qp);
+
+ set_bit(ULP_ACCEPT, &ep->com.history);
+
+ if ((conn_param->ord > c4iw_max_read_depth) ||
+ (conn_param->ird > c4iw_max_read_depth)) {
+
+ CTR2(KTR_IW_CXGBE, "%s:cac2 %p", __func__, ep);
+ abort_connection(ep);
+ err = -EINVAL;
+ goto err;
+ }
+
+ if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
+
+ CTR2(KTR_IW_CXGBE, "%s:cac3 %p", __func__, ep);
+
+ if (conn_param->ord > ep->ird) {
+
+ CTR2(KTR_IW_CXGBE, "%s:cac4 %p", __func__, ep);
+ ep->ird = conn_param->ird;
+ ep->ord = conn_param->ord;
+ send_mpa_reject(ep, conn_param->private_data,
+ conn_param->private_data_len);
+ abort_connection(ep);
+ err = -ENOMEM;
+ goto err;
+ }
+
+ if (conn_param->ird > ep->ord) {
+
+ CTR2(KTR_IW_CXGBE, "%s:cac5 %p", __func__, ep);
+
+ if (!ep->ord) {
+
+ CTR2(KTR_IW_CXGBE, "%s:cac6 %p", __func__, ep);
+ conn_param->ird = 1;
+ }
+ else {
+ CTR2(KTR_IW_CXGBE, "%s:cac7 %p", __func__, ep);
+ abort_connection(ep);
+ err = -ENOMEM;
+ goto err;
+ }
+ }
+
+ }
+ ep->ird = conn_param->ird;
+ ep->ord = conn_param->ord;
+
+ if (ep->mpa_attr.version != 2) {
+
+ CTR2(KTR_IW_CXGBE, "%s:cac8 %p", __func__, ep);
+
+ if (peer2peer && ep->ird == 0) {
+
+ CTR2(KTR_IW_CXGBE, "%s:cac9 %p", __func__, ep);
+ ep->ird = 1;
+ }
+ }
+
+
+ cm_id->add_ref(cm_id);
+ ep->com.cm_id = cm_id;
+ ep->com.qp = qp;
+ //ep->ofld_txq = TOEPCB(ep->com.so)->ofld_txq;
+
+ /* bind QP to EP and move to RTS */
+ attrs.mpa_attr = ep->mpa_attr;
+ attrs.max_ird = ep->ird;
+ attrs.max_ord = ep->ord;
+ attrs.llp_stream_handle = ep;
+ attrs.next_state = C4IW_QP_STATE_RTS;
+
+ /* bind QP and TID with INIT_WR */
+ mask = C4IW_QP_ATTR_NEXT_STATE |
+ C4IW_QP_ATTR_LLP_STREAM_HANDLE |
+ C4IW_QP_ATTR_MPA_ATTR |
+ C4IW_QP_ATTR_MAX_IRD |
+ C4IW_QP_ATTR_MAX_ORD;
+
+ err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1);
+
+ if (err) {
+
+ CTR2(KTR_IW_CXGBE, "%s:caca %p", __func__, ep);
+ goto err1;
+ }
+ err = send_mpa_reply(ep, conn_param->private_data,
+ conn_param->private_data_len);
+
+ if (err) {
+
+ CTR2(KTR_IW_CXGBE, "%s:caca %p", __func__, ep);
+ goto err1;
+ }
+
+ state_set(&ep->com, FPDU_MODE);
+ established_upcall(ep);
+ c4iw_put_ep(&ep->com);
+ CTR2(KTR_IW_CXGBE, "%s:cacE %p", __func__, ep);
+ return 0;
+err1:
+ ep->com.cm_id = NULL;
+ ep->com.qp = NULL;
+ cm_id->rem_ref(cm_id);
+err:
+ c4iw_put_ep(&ep->com);
+ CTR2(KTR_IW_CXGBE, "%s:cacE err %p", __func__, ep);
+ return err;
+}
+
+
+
+int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
+{
+ int err = 0;
+ struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
+ struct c4iw_ep *ep = NULL;
+ struct rtentry *rt;
+ struct toedev *tdev;
+
+ CTR2(KTR_IW_CXGBE, "%s:ccB %p", __func__, cm_id);
+
+ if ((conn_param->ord > c4iw_max_read_depth) ||
+ (conn_param->ird > c4iw_max_read_depth)) {
+
+ CTR2(KTR_IW_CXGBE, "%s:cc1 %p", __func__, cm_id);
+ err = -EINVAL;
+ goto out;
+ }
+ ep = alloc_ep(sizeof(*ep), M_NOWAIT);
+
+ if (!ep) {
+
+ CTR2(KTR_IW_CXGBE, "%s:cc2 %p", __func__, cm_id);
+ printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
+ err = -ENOMEM;
+ goto out;
+ }
+ init_timer(&ep->timer);
+ ep->plen = conn_param->private_data_len;
+
+ if (ep->plen) {
+
+ CTR2(KTR_IW_CXGBE, "%s:cc3 %p", __func__, ep);
+ memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
+ conn_param->private_data, ep->plen);
+ }
+ ep->ird = conn_param->ird;
+ ep->ord = conn_param->ord;
+
+ if (peer2peer && ep->ord == 0) {
+
+ CTR2(KTR_IW_CXGBE, "%s:cc4 %p", __func__, ep);
+ ep->ord = 1;
+ }
+
+ cm_id->add_ref(cm_id);
+ ep->com.dev = dev;
+ ep->com.cm_id = cm_id;
+ ep->com.qp = get_qhp(dev, conn_param->qpn);
+
+ if (!ep->com.qp) {
+
+ CTR2(KTR_IW_CXGBE, "%s:cc5 %p", __func__, ep);
+ err = -EINVAL;
+ goto fail2;
+ }
+ ep->com.thread = curthread;
+ ep->com.so = cm_id->so;
+
+ init_sock(&ep->com);
+
+ /* find a route */
+ rt = find_route(
+ cm_id->local_addr.sin_addr.s_addr,
+ cm_id->remote_addr.sin_addr.s_addr,
+ cm_id->local_addr.sin_port,
+ cm_id->remote_addr.sin_port, 0);
+
+ if (!rt) {
+
+ CTR2(KTR_IW_CXGBE, "%s:cc7 %p", __func__, ep);
+ printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
+ err = -EHOSTUNREACH;
+ goto fail3;
+ }
+
+
+ if (!(rt->rt_ifp->if_flags & IFCAP_TOE)) {
+
+ CTR2(KTR_IW_CXGBE, "%s:cc8 %p", __func__, ep);
+ printf("%s - interface not TOE capable.\n", __func__);
+ goto fail3;
+ }
+ tdev = TOEDEV(rt->rt_ifp);
+
+ if (tdev == NULL) {
+
+ CTR2(KTR_IW_CXGBE, "%s:cc9 %p", __func__, ep);
+ printf("%s - No toedev for interface.\n", __func__);
+ goto fail3;
+ }
+ RTFREE(rt);
+
+ state_set(&ep->com, CONNECTING);
+ ep->tos = 0;
+ ep->com.local_addr = cm_id->local_addr;
+ ep->com.remote_addr = cm_id->remote_addr;
+ err = soconnect(ep->com.so, (struct sockaddr *)&ep->com.remote_addr,
+ ep->com.thread);
+
+ if (!err) {
+
+ CTR2(KTR_IW_CXGBE, "%s:cca %p", __func__, ep);
+ goto out;
+ }
+
+fail3:
+ CTR2(KTR_IW_CXGBE, "%s:ccb %p", __func__, ep);
+ RTFREE(rt);
+fail2:
+ cm_id->rem_ref(cm_id);
+ c4iw_put_ep(&ep->com);
+out:
+ CTR2(KTR_IW_CXGBE, "%s:ccE %p", __func__, ep);
+ return err;
+}
+
+/*
+ * iwcm->create_listen. Returns -errno on failure.
+ */
+int
+c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
+{
+ int rc;
+ struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
+ struct c4iw_listen_ep *ep;
+ struct socket *so = cm_id->so;
+
+ ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
+ CTR5(KTR_IW_CXGBE, "%s: cm_id %p, lso %p, ep %p, inp %p", __func__,
+ cm_id, so, ep, so->so_pcb);
+ if (ep == NULL) {
+ log(LOG_ERR, "%s: failed to alloc memory for endpoint\n",
+ __func__);
+ rc = ENOMEM;
+ goto failed;
+ }
+
+ cm_id->add_ref(cm_id);
+ ep->com.cm_id = cm_id;
+ ep->com.dev = dev;
+ ep->backlog = backlog;
+ ep->com.local_addr = cm_id->local_addr;
+ ep->com.thread = curthread;
+ state_set(&ep->com, LISTEN);
+ ep->com.so = so;
+ init_sock(&ep->com);
+
+ rc = solisten(so, ep->backlog, ep->com.thread);
+ if (rc != 0) {
+ log(LOG_ERR, "%s: failed to start listener: %d\n", __func__,
+ rc);
+ close_socket(&ep->com, 0);
+ cm_id->rem_ref(cm_id);
+ c4iw_put_ep(&ep->com);
+ goto failed;
+ }
+
+ cm_id->provider_data = ep;
+ return (0);
+
+failed:
+ CTR3(KTR_IW_CXGBE, "%s: cm_id %p, FAILED (%d)", __func__, cm_id, rc);
+ return (-rc);
+}
+
+int
+c4iw_destroy_listen(struct iw_cm_id *cm_id)
+{
+ int rc;
+ struct c4iw_listen_ep *ep = to_listen_ep(cm_id);
+
+ CTR4(KTR_IW_CXGBE, "%s: cm_id %p, so %p, inp %p", __func__, cm_id,
+ cm_id->so, cm_id->so->so_pcb);
+
+ state_set(&ep->com, DEAD);
+ rc = close_socket(&ep->com, 0);
+ cm_id->rem_ref(cm_id);
+ c4iw_put_ep(&ep->com);
+
+ return (rc);
+}
+
+int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
+{
+ int ret = 0;
+ int close = 0;
+ int fatal = 0;
+ struct c4iw_rdev *rdev;
+
+ mutex_lock(&ep->com.mutex);
+
+ CTR2(KTR_IW_CXGBE, "%s:cedB %p", __func__, ep);
+
+ rdev = &ep->com.dev->rdev;
+
+ if (c4iw_fatal_error(rdev)) {
+
+ CTR2(KTR_IW_CXGBE, "%s:ced1 %p", __func__, ep);
+ fatal = 1;
+ close_complete_upcall(ep);
+ ep->com.state = DEAD;
+ }
+ CTR3(KTR_IW_CXGBE, "%s:ced2 %p %s", __func__, ep,
+ states[ep->com.state]);
+
+ switch (ep->com.state) {
+
+ case MPA_REQ_WAIT:
+ case MPA_REQ_SENT:
+ case MPA_REQ_RCVD:
+ case MPA_REP_SENT:
+ case FPDU_MODE:
+ close = 1;
+ if (abrupt)
+ ep->com.state = ABORTING;
+ else {
+ ep->com.state = CLOSING;
+ START_EP_TIMER(ep);
+ }
+ set_bit(CLOSE_SENT, &ep->com.flags);
+ break;
+
+ case CLOSING:
+
+ if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
+
+ close = 1;
+ if (abrupt) {
+ STOP_EP_TIMER(ep);
+ ep->com.state = ABORTING;
+ } else
+ ep->com.state = MORIBUND;
+ }
+ break;
+
+ case MORIBUND:
+ case ABORTING:
+ case DEAD:
+ CTR3(KTR_IW_CXGBE,
+ "%s ignoring disconnect ep %p state %u", __func__,
+ ep, ep->com.state);
+ break;
+
+ default:
+ BUG();
+ break;
+ }
+
+ mutex_unlock(&ep->com.mutex);
+
+ if (close) {
+
+ CTR2(KTR_IW_CXGBE, "%s:ced3 %p", __func__, ep);
+
+ if (abrupt) {
+
+ CTR2(KTR_IW_CXGBE, "%s:ced4 %p", __func__, ep);
+ set_bit(EP_DISC_ABORT, &ep->com.history);
+ ret = abort_connection(ep);
+ } else {
+
+ CTR2(KTR_IW_CXGBE, "%s:ced5 %p", __func__, ep);
+ set_bit(EP_DISC_CLOSE, &ep->com.history);
+
+ if (!ep->parent_ep)
+ __state_set(&ep->com, MORIBUND);
+ ret = shutdown_socket(&ep->com);
+ }
+
+ if (ret) {
+
+ fatal = 1;
+ }
+ }
+
+ if (fatal) {
+
+ release_ep_resources(ep);
+ CTR2(KTR_IW_CXGBE, "%s:ced6 %p", __func__, ep);
+ }
+ CTR2(KTR_IW_CXGBE, "%s:cedE %p", __func__, ep);
+ return ret;
+}
+
+#ifdef C4IW_EP_REDIRECT
+int c4iw_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
+ struct l2t_entry *l2t)
+{
+ struct c4iw_ep *ep = ctx;
+
+ if (ep->dst != old)
+ return 0;
+
+ PDBG("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new,
+ l2t);
+ dst_hold(new);
+ cxgb4_l2t_release(ep->l2t);
+ ep->l2t = l2t;
+ dst_release(old);
+ ep->dst = new;
+ return 1;
+}
+#endif
+
+
+
+static void ep_timeout(unsigned long arg)
+{
+ struct c4iw_ep *ep = (struct c4iw_ep *)arg;
+ int kickit = 0;
+
+ CTR2(KTR_IW_CXGBE, "%s:etB %p", __func__, ep);
+ spin_lock(&timeout_lock);
+
+ if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
+
+ list_add_tail(&ep->entry, &timeout_list);
+ kickit = 1;
+ }
+ spin_unlock(&timeout_lock);
+
+ if (kickit) {
+
+ CTR2(KTR_IW_CXGBE, "%s:et1 %p", __func__, ep);
+ queue_work(c4iw_taskq, &c4iw_task);
+ }
+ CTR2(KTR_IW_CXGBE, "%s:etE %p", __func__, ep);
+}
+
+static int fw6_wr_rpl(struct adapter *sc, const __be64 *rpl)
+{
+ uint64_t val = be64toh(*rpl);
+ int ret;
+ struct c4iw_wr_wait *wr_waitp;
+
+ ret = (int)((val >> 8) & 0xff);
+ wr_waitp = (struct c4iw_wr_wait *)rpl[1];
+ CTR3(KTR_IW_CXGBE, "%s wr_waitp %p ret %u", __func__, wr_waitp, ret);
+ if (wr_waitp)
+ c4iw_wake_up(wr_waitp, ret ? -ret : 0);
+
+ return (0);
+}
+
+static int fw6_cqe_handler(struct adapter *sc, const __be64 *rpl)
+{
+ struct t4_cqe cqe =*(const struct t4_cqe *)(&rpl[0]);
+
+ CTR2(KTR_IW_CXGBE, "%s rpl %p", __func__, rpl);
+ c4iw_ev_dispatch(sc->iwarp_softc, &cqe);
+
+ return (0);
+}
+
+static int terminate(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
+{
+
+ struct adapter *sc = iq->adapter;
+
+ const struct cpl_rdma_terminate *rpl = (const void *)(rss + 1);
+ unsigned int tid = GET_TID(rpl);
+ struct c4iw_qp_attributes attrs;
+ struct toepcb *toep = lookup_tid(sc, tid);
+ struct socket *so = inp_inpcbtosocket(toep->inp);
+ struct c4iw_ep *ep = so->so_rcv.sb_upcallarg;
+
+ CTR2(KTR_IW_CXGBE, "%s:tB %p %d", __func__, ep);
+
+ if (ep && ep->com.qp) {
+
+ printk(KERN_WARNING MOD "TERM received tid %u qpid %u\n", tid,
+ ep->com.qp->wq.sq.qid);
+ attrs.next_state = C4IW_QP_STATE_TERMINATE;
+ c4iw_modify_qp(ep->com.dev, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs,
+ 1);
+ } else
+ printk(KERN_WARNING MOD "TERM received tid %u no ep/qp\n", tid);
+ CTR2(KTR_IW_CXGBE, "%s:tE %p %d", __func__, ep);
+
+ return 0;
+}
+
+ void
+c4iw_cm_init_cpl(struct adapter *sc)
+{
+
+ t4_register_cpl_handler(sc, CPL_RDMA_TERMINATE, terminate);
+ t4_register_fw_msg_handler(sc, FW6_TYPE_WR_RPL, fw6_wr_rpl);
+ t4_register_fw_msg_handler(sc, FW6_TYPE_CQE, fw6_cqe_handler);
+ t4_register_an_handler(sc, c4iw_ev_handler);
+}
+
+ void
+c4iw_cm_term_cpl(struct adapter *sc)
+{
+
+ t4_register_cpl_handler(sc, CPL_RDMA_TERMINATE, NULL);
+ t4_register_fw_msg_handler(sc, FW6_TYPE_WR_RPL, NULL);
+ t4_register_fw_msg_handler(sc, FW6_TYPE_CQE, NULL);
+}
+
+int __init c4iw_cm_init(void)
+{
+
+ TAILQ_INIT(&req_list);
+ spin_lock_init(&req_lock);
+ INIT_LIST_HEAD(&timeout_list);
+ spin_lock_init(&timeout_lock);
+
+ INIT_WORK(&c4iw_task, process_req);
+
+ c4iw_taskq = create_singlethread_workqueue("iw_cxgbe");
+ if (!c4iw_taskq)
+ return -ENOMEM;
+
+
+ return 0;
+}
+
+void __exit c4iw_cm_term(void)
+{
+ WARN_ON(!TAILQ_EMPTY(&req_list));
+ WARN_ON(!list_empty(&timeout_list));
+ flush_workqueue(c4iw_taskq);
+ destroy_workqueue(c4iw_taskq);
+}
+#endif
diff --git a/sys/dev/cxgbe/iw_cxgbe/cq.c b/sys/dev/cxgbe/iw_cxgbe/cq.c
new file mode 100644
index 0000000..ec72a6c
--- /dev/null
+++ b/sys/dev/cxgbe/iw_cxgbe/cq.c
@@ -0,0 +1,926 @@
+/*
+ * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+
+#ifdef TCP_OFFLOAD
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/bus.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/sbuf.h>
+
+#include "iw_cxgbe.h"
+#include "user.h"
+
+static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
+ struct c4iw_dev_ucontext *uctx)
+{
+ struct adapter *sc = rdev->adap;
+ struct fw_ri_res_wr *res_wr;
+ struct fw_ri_res *res;
+ int wr_len;
+ struct c4iw_wr_wait wr_wait;
+ struct wrqe *wr;
+
+ wr_len = sizeof *res_wr + sizeof *res;
+ wr = alloc_wrqe(wr_len, &sc->sge.mgmtq);
+ if (wr == NULL)
+ return (0);
+ res_wr = wrtod(wr);
+ memset(res_wr, 0, wr_len);
+ res_wr->op_nres = cpu_to_be32(
+ V_FW_WR_OP(FW_RI_RES_WR) |
+ V_FW_RI_RES_WR_NRES(1) |
+ F_FW_WR_COMPL);
+ res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
+ res_wr->cookie = (unsigned long) &wr_wait;
+ res = res_wr->res;
+ res->u.cq.restype = FW_RI_RES_TYPE_CQ;
+ res->u.cq.op = FW_RI_RES_OP_RESET;
+ res->u.cq.iqid = cpu_to_be32(cq->cqid);
+
+ c4iw_init_wr_wait(&wr_wait);
+
+ t4_wrq_tx(sc, wr);
+
+ c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__);
+
+ kfree(cq->sw_queue);
+ contigfree(cq->queue, cq->memsize, M_DEVBUF);
+ c4iw_put_cqid(rdev, cq->cqid, uctx);
+ return 0;
+}
+
+static int
+create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
+ struct c4iw_dev_ucontext *uctx)
+{
+ struct adapter *sc = rdev->adap;
+ struct fw_ri_res_wr *res_wr;
+ struct fw_ri_res *res;
+ int wr_len;
+ int user = (uctx != &rdev->uctx);
+ struct c4iw_wr_wait wr_wait;
+ int ret;
+ struct wrqe *wr;
+
+ cq->cqid = c4iw_get_cqid(rdev, uctx);
+ if (!cq->cqid) {
+ ret = -ENOMEM;
+ goto err1;
+ }
+
+ if (!user) {
+ cq->sw_queue = kzalloc(cq->memsize, GFP_KERNEL);
+ if (!cq->sw_queue) {
+ ret = -ENOMEM;
+ goto err2;
+ }
+ }
+
+ cq->queue = contigmalloc(cq->memsize, M_DEVBUF, M_NOWAIT, 0ul, ~0ul,
+ PAGE_SIZE, 0);
+ if (cq->queue)
+ cq->dma_addr = vtophys(cq->queue);
+ else {
+ ret = -ENOMEM;
+ goto err3;
+ }
+
+ pci_unmap_addr_set(cq, mapping, cq->dma_addr);
+ memset(cq->queue, 0, cq->memsize);
+
+ /* build fw_ri_res_wr */
+ wr_len = sizeof *res_wr + sizeof *res;
+
+ wr = alloc_wrqe(wr_len, &sc->sge.mgmtq);
+ if (wr == NULL)
+ return (0);
+ res_wr = wrtod(wr);
+
+ memset(res_wr, 0, wr_len);
+ res_wr->op_nres = cpu_to_be32(
+ V_FW_WR_OP(FW_RI_RES_WR) |
+ V_FW_RI_RES_WR_NRES(1) |
+ F_FW_WR_COMPL);
+ res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
+ res_wr->cookie = (unsigned long) &wr_wait;
+ res = res_wr->res;
+ res->u.cq.restype = FW_RI_RES_TYPE_CQ;
+ res->u.cq.op = FW_RI_RES_OP_WRITE;
+ res->u.cq.iqid = cpu_to_be32(cq->cqid);
+ //Fixme: Always use first queue id for IQANDSTINDEX. Linux does the same.
+ res->u.cq.iqandst_to_iqandstindex = cpu_to_be32(
+ V_FW_RI_RES_WR_IQANUS(0) |
+ V_FW_RI_RES_WR_IQANUD(1) |
+ F_FW_RI_RES_WR_IQANDST |
+ V_FW_RI_RES_WR_IQANDSTINDEX(sc->sge.ofld_rxq[0].iq.abs_id));
+ res->u.cq.iqdroprss_to_iqesize = cpu_to_be16(
+ F_FW_RI_RES_WR_IQDROPRSS |
+ V_FW_RI_RES_WR_IQPCIECH(2) |
+ V_FW_RI_RES_WR_IQINTCNTTHRESH(0) |
+ F_FW_RI_RES_WR_IQO |
+ V_FW_RI_RES_WR_IQESIZE(1));
+ res->u.cq.iqsize = cpu_to_be16(cq->size);
+ res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr);
+
+ c4iw_init_wr_wait(&wr_wait);
+
+ t4_wrq_tx(sc, wr);
+
+ CTR2(KTR_IW_CXGBE, "%s wait_event wr_wait %p", __func__, &wr_wait);
+ ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__);
+ if (ret)
+ goto err4;
+
+ cq->gen = 1;
+ cq->gts = (void *)((unsigned long)rman_get_virtual(sc->regs_res) +
+ MYPF_REG(SGE_PF_GTS));
+ cq->rdev = rdev;
+
+ if (user) {
+ cq->ugts = (u64)((char*)rman_get_virtual(sc->udbs_res) +
+ (cq->cqid << rdev->cqshift));
+ cq->ugts &= PAGE_MASK;
+ CTR5(KTR_IW_CXGBE,
+ "%s: UGTS %p cqid %x cqshift %d page_mask %x", __func__,
+ cq->ugts, cq->cqid, rdev->cqshift, PAGE_MASK);
+ }
+ return 0;
+err4:
+ contigfree(cq->queue, cq->memsize, M_DEVBUF);
+err3:
+ kfree(cq->sw_queue);
+err2:
+ c4iw_put_cqid(rdev, cq->cqid, uctx);
+err1:
+ return ret;
+}
+
+static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq)
+{
+ struct t4_cqe cqe;
+
+ CTR5(KTR_IW_CXGBE, "%s wq %p cq %p sw_cidx %u sw_pidx %u", __func__, wq,
+ cq, cq->sw_cidx, cq->sw_pidx);
+ memset(&cqe, 0, sizeof(cqe));
+ cqe.header = cpu_to_be32(V_CQE_STATUS(T4_ERR_SWFLUSH) |
+ V_CQE_OPCODE(FW_RI_SEND) |
+ V_CQE_TYPE(0) |
+ V_CQE_SWCQE(1) |
+ V_CQE_QPID(wq->sq.qid));
+ cqe.bits_type_ts = cpu_to_be64(V_CQE_GENBIT((u64)cq->gen));
+ cq->sw_queue[cq->sw_pidx] = cqe;
+ t4_swcq_produce(cq);
+}
+
+int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count)
+{
+ int flushed = 0;
+ int in_use = wq->rq.in_use - count;
+
+ BUG_ON(in_use < 0);
+ CTR5(KTR_IW_CXGBE, "%s wq %p cq %p rq.in_use %u skip count %u",
+ __func__, wq, cq, wq->rq.in_use, count);
+ while (in_use--) {
+ insert_recv_cqe(wq, cq);
+ flushed++;
+ }
+ return flushed;
+}
+
+static void insert_sq_cqe(struct t4_wq *wq, struct t4_cq *cq,
+ struct t4_swsqe *swcqe)
+{
+ struct t4_cqe cqe;
+
+ CTR5(KTR_IW_CXGBE, "%s wq %p cq %p sw_cidx %u sw_pidx %u", __func__, wq,
+ cq, cq->sw_cidx, cq->sw_pidx);
+ memset(&cqe, 0, sizeof(cqe));
+ cqe.header = cpu_to_be32(V_CQE_STATUS(T4_ERR_SWFLUSH) |
+ V_CQE_OPCODE(swcqe->opcode) |
+ V_CQE_TYPE(1) |
+ V_CQE_SWCQE(1) |
+ V_CQE_QPID(wq->sq.qid));
+ CQE_WRID_SQ_IDX(&cqe) = swcqe->idx;
+ cqe.bits_type_ts = cpu_to_be64(V_CQE_GENBIT((u64)cq->gen));
+ cq->sw_queue[cq->sw_pidx] = cqe;
+ t4_swcq_produce(cq);
+}
+
+int c4iw_flush_sq(struct t4_wq *wq, struct t4_cq *cq, int count)
+{
+ int flushed = 0;
+ struct t4_swsqe *swsqe = &wq->sq.sw_sq[wq->sq.cidx + count];
+ int in_use = wq->sq.in_use - count;
+
+ BUG_ON(in_use < 0);
+ while (in_use--) {
+ swsqe->signaled = 0;
+ insert_sq_cqe(wq, cq, swsqe);
+ swsqe++;
+ if (swsqe == (wq->sq.sw_sq + wq->sq.size))
+ swsqe = wq->sq.sw_sq;
+ flushed++;
+ }
+ return flushed;
+}
+
+/*
+ * Move all CQEs from the HWCQ into the SWCQ.
+ */
+void c4iw_flush_hw_cq(struct t4_cq *cq)
+{
+ struct t4_cqe *cqe = NULL, *swcqe;
+ int ret;
+
+ CTR3(KTR_IW_CXGBE, "%s cq %p cqid 0x%x", __func__, cq, cq->cqid);
+ ret = t4_next_hw_cqe(cq, &cqe);
+ while (!ret) {
+ CTR3(KTR_IW_CXGBE, "%s flushing hwcq cidx 0x%x swcq pidx 0x%x",
+ __func__, cq->cidx, cq->sw_pidx);
+ swcqe = &cq->sw_queue[cq->sw_pidx];
+ *swcqe = *cqe;
+ swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1));
+ t4_swcq_produce(cq);
+ t4_hwcq_consume(cq);
+ ret = t4_next_hw_cqe(cq, &cqe);
+ }
+}
+
+static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq)
+{
+ if (CQE_OPCODE(cqe) == FW_RI_TERMINATE)
+ return 0;
+
+ if ((CQE_OPCODE(cqe) == FW_RI_RDMA_WRITE) && RQ_TYPE(cqe))
+ return 0;
+
+ if ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) && SQ_TYPE(cqe))
+ return 0;
+
+ if (CQE_SEND_OPCODE(cqe) && RQ_TYPE(cqe) && t4_rq_empty(wq))
+ return 0;
+ return 1;
+}
+
+void c4iw_count_scqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
+{
+ struct t4_cqe *cqe;
+ u32 ptr;
+
+ *count = 0;
+ ptr = cq->sw_cidx;
+ while (ptr != cq->sw_pidx) {
+ cqe = &cq->sw_queue[ptr];
+ if ((SQ_TYPE(cqe) || ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) &&
+ wq->sq.oldest_read)) &&
+ (CQE_QPID(cqe) == wq->sq.qid))
+ (*count)++;
+ if (++ptr == cq->size)
+ ptr = 0;
+ }
+ CTR3(KTR_IW_CXGBE, "%s cq %p count %d", __func__, cq, *count);
+}
+
+void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
+{
+ struct t4_cqe *cqe;
+ u32 ptr;
+
+ *count = 0;
+ CTR2(KTR_IW_CXGBE, "%s count zero %d", __func__, *count);
+ ptr = cq->sw_cidx;
+ while (ptr != cq->sw_pidx) {
+ cqe = &cq->sw_queue[ptr];
+ if (RQ_TYPE(cqe) && (CQE_OPCODE(cqe) != FW_RI_READ_RESP) &&
+ (CQE_QPID(cqe) == wq->sq.qid) && cqe_completes_wr(cqe, wq))
+ (*count)++;
+ if (++ptr == cq->size)
+ ptr = 0;
+ }
+ CTR3(KTR_IW_CXGBE, "%s cq %p count %d", __func__, cq, *count);
+}
+
+static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq)
+{
+ struct t4_swsqe *swsqe;
+ u16 ptr = wq->sq.cidx;
+ int count = wq->sq.in_use;
+ int unsignaled = 0;
+
+ swsqe = &wq->sq.sw_sq[ptr];
+ while (count--)
+ if (!swsqe->signaled) {
+ if (++ptr == wq->sq.size)
+ ptr = 0;
+ swsqe = &wq->sq.sw_sq[ptr];
+ unsignaled++;
+ } else if (swsqe->complete) {
+
+ /*
+ * Insert this completed cqe into the swcq.
+ */
+ CTR3(KTR_IW_CXGBE,
+ "%s moving cqe into swcq sq idx %u cq idx %u",
+ __func__, ptr, cq->sw_pidx);
+ swsqe->cqe.header |= htonl(V_CQE_SWCQE(1));
+ cq->sw_queue[cq->sw_pidx] = swsqe->cqe;
+ t4_swcq_produce(cq);
+ swsqe->signaled = 0;
+ wq->sq.in_use -= unsignaled;
+ break;
+ } else
+ break;
+}
+
+static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe,
+ struct t4_cqe *read_cqe)
+{
+ read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx;
+ read_cqe->len = cpu_to_be32(wq->sq.oldest_read->read_len);
+ read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(hw_cqe)) |
+ V_CQE_SWCQE(SW_CQE(hw_cqe)) |
+ V_CQE_OPCODE(FW_RI_READ_REQ) |
+ V_CQE_TYPE(1));
+ read_cqe->bits_type_ts = hw_cqe->bits_type_ts;
+}
+
+/*
+ * Return a ptr to the next read wr in the SWSQ or NULL.
+ */
+static void advance_oldest_read(struct t4_wq *wq)
+{
+
+ u32 rptr = wq->sq.oldest_read - wq->sq.sw_sq + 1;
+
+ if (rptr == wq->sq.size)
+ rptr = 0;
+ while (rptr != wq->sq.pidx) {
+ wq->sq.oldest_read = &wq->sq.sw_sq[rptr];
+
+ if (wq->sq.oldest_read->opcode == FW_RI_READ_REQ)
+ return;
+ if (++rptr == wq->sq.size)
+ rptr = 0;
+ }
+ wq->sq.oldest_read = NULL;
+}
+
+/*
+ * poll_cq
+ *
+ * Caller must:
+ * check the validity of the first CQE,
+ * supply the wq assicated with the qpid.
+ *
+ * credit: cq credit to return to sge.
+ * cqe_flushed: 1 iff the CQE is flushed.
+ * cqe: copy of the polled CQE.
+ *
+ * return value:
+ * 0 CQE returned ok.
+ * -EAGAIN CQE skipped, try again.
+ * -EOVERFLOW CQ overflow detected.
+ */
+static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
+ u8 *cqe_flushed, u64 *cookie, u32 *credit)
+{
+ int ret = 0;
+ struct t4_cqe *hw_cqe, read_cqe;
+
+ *cqe_flushed = 0;
+ *credit = 0;
+ ret = t4_next_cqe(cq, &hw_cqe);
+ if (ret)
+ return ret;
+
+ CTR6(KTR_IW_CXGBE,
+ "%s CQE OVF %u qpid 0x%0x genbit %u type %u status 0x%0x", __func__,
+ CQE_OVFBIT(hw_cqe), CQE_QPID(hw_cqe), CQE_GENBIT(hw_cqe),
+ CQE_TYPE(hw_cqe), CQE_STATUS(hw_cqe));
+ CTR5(KTR_IW_CXGBE,
+ "%s opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x",
+ __func__, CQE_OPCODE(hw_cqe), CQE_LEN(hw_cqe), CQE_WRID_HI(hw_cqe),
+ CQE_WRID_LOW(hw_cqe));
+
+ /*
+ * skip cqe's not affiliated with a QP.
+ */
+ if (wq == NULL) {
+ ret = -EAGAIN;
+ goto skip_cqe;
+ }
+
+ /*
+ * Gotta tweak READ completions:
+ * 1) the cqe doesn't contain the sq_wptr from the wr.
+ * 2) opcode not reflected from the wr.
+ * 3) read_len not reflected from the wr.
+ * 4) cq_type is RQ_TYPE not SQ_TYPE.
+ */
+ if (RQ_TYPE(hw_cqe) && (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP)) {
+
+ /*
+ * If this is an unsolicited read response, then the read
+ * was generated by the kernel driver as part of peer-2-peer
+ * connection setup. So ignore the completion.
+ */
+ if (!wq->sq.oldest_read) {
+ if (CQE_STATUS(hw_cqe))
+ t4_set_wq_in_error(wq);
+ ret = -EAGAIN;
+ goto skip_cqe;
+ }
+
+ /*
+ * Don't write to the HWCQ, so create a new read req CQE
+ * in local memory.
+ */
+ create_read_req_cqe(wq, hw_cqe, &read_cqe);
+ hw_cqe = &read_cqe;
+ advance_oldest_read(wq);
+ }
+
+ if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) {
+ *cqe_flushed = t4_wq_in_error(wq);
+ t4_set_wq_in_error(wq);
+ goto proc_cqe;
+ }
+
+ if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) {
+ ret = -EAGAIN;
+ goto skip_cqe;
+ }
+
+ /*
+ * RECV completion.
+ */
+ if (RQ_TYPE(hw_cqe)) {
+
+ /*
+ * HW only validates 4 bits of MSN. So we must validate that
+ * the MSN in the SEND is the next expected MSN. If its not,
+ * then we complete this with T4_ERR_MSN and mark the wq in
+ * error.
+ */
+
+ if (t4_rq_empty(wq)) {
+ t4_set_wq_in_error(wq);
+ ret = -EAGAIN;
+ goto skip_cqe;
+ }
+ if (unlikely((CQE_WRID_MSN(hw_cqe) != (wq->rq.msn)))) {
+ t4_set_wq_in_error(wq);
+ hw_cqe->header |= htonl(V_CQE_STATUS(T4_ERR_MSN));
+ goto proc_cqe;
+ }
+ goto proc_cqe;
+ }
+
+ /*
+ * If we get here its a send completion.
+ *
+ * Handle out of order completion. These get stuffed
+ * in the SW SQ. Then the SW SQ is walked to move any
+ * now in-order completions into the SW CQ. This handles
+ * 2 cases:
+ * 1) reaping unsignaled WRs when the first subsequent
+ * signaled WR is completed.
+ * 2) out of order read completions.
+ */
+ if (!SW_CQE(hw_cqe) && (CQE_WRID_SQ_IDX(hw_cqe) != wq->sq.cidx)) {
+ struct t4_swsqe *swsqe;
+
+ CTR2(KTR_IW_CXGBE,
+ "%s out of order completion going in sw_sq at idx %u",
+ __func__, CQE_WRID_SQ_IDX(hw_cqe));
+ swsqe = &wq->sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)];
+ swsqe->cqe = *hw_cqe;
+ swsqe->complete = 1;
+ ret = -EAGAIN;
+ goto flush_wq;
+ }
+
+proc_cqe:
+ *cqe = *hw_cqe;
+
+ /*
+ * Reap the associated WR(s) that are freed up with this
+ * completion.
+ */
+ if (SQ_TYPE(hw_cqe)) {
+ wq->sq.cidx = CQE_WRID_SQ_IDX(hw_cqe);
+ CTR2(KTR_IW_CXGBE, "%s completing sq idx %u",
+ __func__, wq->sq.cidx);
+ *cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id;
+ t4_sq_consume(wq);
+ } else {
+ CTR2(KTR_IW_CXGBE, "%s completing rq idx %u",
+ __func__, wq->rq.cidx);
+ *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id;
+ BUG_ON(t4_rq_empty(wq));
+ t4_rq_consume(wq);
+ }
+
+flush_wq:
+ /*
+ * Flush any completed cqes that are now in-order.
+ */
+ flush_completed_wrs(wq, cq);
+
+skip_cqe:
+ if (SW_CQE(hw_cqe)) {
+ CTR4(KTR_IW_CXGBE, "%s cq %p cqid 0x%x skip sw cqe cidx %u",
+ __func__, cq, cq->cqid, cq->sw_cidx);
+ t4_swcq_consume(cq);
+ } else {
+ CTR4(KTR_IW_CXGBE, "%s cq %p cqid 0x%x skip hw cqe cidx %u",
+ __func__, cq, cq->cqid, cq->cidx);
+ t4_hwcq_consume(cq);
+ }
+ return ret;
+}
+
+/*
+ * Get one cq entry from c4iw and map it to openib.
+ *
+ * Returns:
+ * 0 cqe returned
+ * -ENODATA EMPTY;
+ * -EAGAIN caller must try again
+ * any other -errno fatal error
+ */
+static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
+{
+ struct c4iw_qp *qhp = NULL;
+ struct t4_cqe cqe = {0, 0}, *rd_cqe;
+ struct t4_wq *wq;
+ u32 credit = 0;
+ u8 cqe_flushed;
+ u64 cookie = 0;
+ int ret;
+
+ ret = t4_next_cqe(&chp->cq, &rd_cqe);
+
+ if (ret)
+ return ret;
+
+ qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe));
+ if (!qhp)
+ wq = NULL;
+ else {
+ spin_lock(&qhp->lock);
+ wq = &(qhp->wq);
+ }
+ ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit);
+ if (ret)
+ goto out;
+
+ wc->wr_id = cookie;
+ wc->qp = &qhp->ibqp;
+ wc->vendor_err = CQE_STATUS(&cqe);
+ wc->wc_flags = 0;
+
+ CTR5(KTR_IW_CXGBE, "%s qpid 0x%x type %d opcode %d status 0x%x",
+ __func__, CQE_QPID(&cqe), CQE_TYPE(&cqe), CQE_OPCODE(&cqe),
+ CQE_STATUS(&cqe));
+ CTR5(KTR_IW_CXGBE, "%s len %u wrid hi 0x%x lo 0x%x cookie 0x%llx",
+ __func__, CQE_LEN(&cqe), CQE_WRID_HI(&cqe), CQE_WRID_LOW(&cqe),
+ (unsigned long long)cookie);
+
+ if (CQE_TYPE(&cqe) == 0) {
+ if (!CQE_STATUS(&cqe))
+ wc->byte_len = CQE_LEN(&cqe);
+ else
+ wc->byte_len = 0;
+ wc->opcode = IB_WC_RECV;
+ if (CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_INV ||
+ CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) {
+ wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe);
+ wc->wc_flags |= IB_WC_WITH_INVALIDATE;
+ }
+ } else {
+ switch (CQE_OPCODE(&cqe)) {
+ case FW_RI_RDMA_WRITE:
+ wc->opcode = IB_WC_RDMA_WRITE;
+ break;
+ case FW_RI_READ_REQ:
+ wc->opcode = IB_WC_RDMA_READ;
+ wc->byte_len = CQE_LEN(&cqe);
+ break;
+ case FW_RI_SEND_WITH_INV:
+ case FW_RI_SEND_WITH_SE_INV:
+ wc->opcode = IB_WC_SEND;
+ wc->wc_flags |= IB_WC_WITH_INVALIDATE;
+ break;
+ case FW_RI_SEND:
+ case FW_RI_SEND_WITH_SE:
+ wc->opcode = IB_WC_SEND;
+ break;
+ case FW_RI_BIND_MW:
+ wc->opcode = IB_WC_BIND_MW;
+ break;
+
+ case FW_RI_LOCAL_INV:
+ wc->opcode = IB_WC_LOCAL_INV;
+ break;
+ case FW_RI_FAST_REGISTER:
+ wc->opcode = IB_WC_FAST_REG_MR;
+ break;
+ default:
+ printf("Unexpected opcode %d "
+ "in the CQE received for QPID = 0x%0x\n",
+ CQE_OPCODE(&cqe), CQE_QPID(&cqe));
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ if (cqe_flushed)
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ else {
+
+ switch (CQE_STATUS(&cqe)) {
+ case T4_ERR_SUCCESS:
+ wc->status = IB_WC_SUCCESS;
+ break;
+ case T4_ERR_STAG:
+ wc->status = IB_WC_LOC_ACCESS_ERR;
+ break;
+ case T4_ERR_PDID:
+ wc->status = IB_WC_LOC_PROT_ERR;
+ break;
+ case T4_ERR_QPID:
+ case T4_ERR_ACCESS:
+ wc->status = IB_WC_LOC_ACCESS_ERR;
+ break;
+ case T4_ERR_WRAP:
+ wc->status = IB_WC_GENERAL_ERR;
+ break;
+ case T4_ERR_BOUND:
+ wc->status = IB_WC_LOC_LEN_ERR;
+ break;
+ case T4_ERR_INVALIDATE_SHARED_MR:
+ case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND:
+ wc->status = IB_WC_MW_BIND_ERR;
+ break;
+ case T4_ERR_CRC:
+ case T4_ERR_MARKER:
+ case T4_ERR_PDU_LEN_ERR:
+ case T4_ERR_OUT_OF_RQE:
+ case T4_ERR_DDP_VERSION:
+ case T4_ERR_RDMA_VERSION:
+ case T4_ERR_DDP_QUEUE_NUM:
+ case T4_ERR_MSN:
+ case T4_ERR_TBIT:
+ case T4_ERR_MO:
+ case T4_ERR_MSN_RANGE:
+ case T4_ERR_IRD_OVERFLOW:
+ case T4_ERR_OPCODE:
+ case T4_ERR_INTERNAL_ERR:
+ wc->status = IB_WC_FATAL_ERR;
+ break;
+ case T4_ERR_SWFLUSH:
+ wc->status = IB_WC_WR_FLUSH_ERR;
+ break;
+ default:
+ printf("Unexpected cqe_status 0x%x for QPID = 0x%0x\n",
+ CQE_STATUS(&cqe), CQE_QPID(&cqe));
+ ret = -EINVAL;
+ }
+ }
+out:
+ if (wq)
+ spin_unlock(&qhp->lock);
+ return ret;
+}
+
+int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
+{
+ struct c4iw_cq *chp;
+ unsigned long flags;
+ int npolled;
+ int err = 0;
+
+ chp = to_c4iw_cq(ibcq);
+
+ spin_lock_irqsave(&chp->lock, flags);
+ for (npolled = 0; npolled < num_entries; ++npolled) {
+ do {
+ err = c4iw_poll_cq_one(chp, wc + npolled);
+ } while (err == -EAGAIN);
+ if (err)
+ break;
+ }
+ spin_unlock_irqrestore(&chp->lock, flags);
+ return !err || err == -ENODATA ? npolled : err;
+}
+
+int c4iw_destroy_cq(struct ib_cq *ib_cq)
+{
+ struct c4iw_cq *chp;
+ struct c4iw_ucontext *ucontext;
+
+ CTR2(KTR_IW_CXGBE, "%s ib_cq %p", __func__, ib_cq);
+ chp = to_c4iw_cq(ib_cq);
+
+ remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid);
+ atomic_dec(&chp->refcnt);
+ wait_event(chp->wait, !atomic_read(&chp->refcnt));
+
+ ucontext = ib_cq->uobject ? to_c4iw_ucontext(ib_cq->uobject->context)
+ : NULL;
+ destroy_cq(&chp->rhp->rdev, &chp->cq,
+ ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx);
+ kfree(chp);
+ return 0;
+}
+
+struct ib_cq *
+c4iw_create_cq(struct ib_device *ibdev, int entries, int vector,
+ struct ib_ucontext *ib_context, struct ib_udata *udata)
+{
+ struct c4iw_dev *rhp;
+ struct c4iw_cq *chp;
+ struct c4iw_create_cq_resp uresp;
+ struct c4iw_ucontext *ucontext = NULL;
+ int ret;
+ size_t memsize, hwentries;
+ struct c4iw_mm_entry *mm, *mm2;
+
+ CTR3(KTR_IW_CXGBE, "%s ib_dev %p entries %d", __func__, ibdev, entries);
+
+ rhp = to_c4iw_dev(ibdev);
+
+ chp = kzalloc(sizeof(*chp), GFP_KERNEL);
+ if (!chp)
+ return ERR_PTR(-ENOMEM);
+
+ if (ib_context)
+ ucontext = to_c4iw_ucontext(ib_context);
+
+ /* account for the status page. */
+ entries++;
+
+ /* IQ needs one extra entry to differentiate full vs empty. */
+ entries++;
+
+ /*
+ * entries must be multiple of 16 for HW.
+ */
+ entries = roundup(entries, 16);
+
+ /*
+ * Make actual HW queue 2x to avoid cidx_inc overflows.
+ */
+ hwentries = entries * 2;
+
+ /*
+ * Make HW queue at least 64 entries so GTS updates aren't too
+ * frequent.
+ */
+ if (hwentries < 64)
+ hwentries = 64;
+
+ memsize = hwentries * sizeof *chp->cq.queue;
+
+ /*
+ * memsize must be a multiple of the page size if its a user cq.
+ */
+ if (ucontext) {
+ memsize = roundup(memsize, PAGE_SIZE);
+ hwentries = memsize / sizeof *chp->cq.queue;
+ while (hwentries > T4_MAX_IQ_SIZE) {
+ memsize -= PAGE_SIZE;
+ hwentries = memsize / sizeof *chp->cq.queue;
+ }
+ }
+ chp->cq.size = hwentries;
+ chp->cq.memsize = memsize;
+
+ ret = create_cq(&rhp->rdev, &chp->cq,
+ ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+ if (ret)
+ goto err1;
+
+ chp->rhp = rhp;
+ chp->cq.size--; /* status page */
+ chp->ibcq.cqe = entries - 2;
+ spin_lock_init(&chp->lock);
+ spin_lock_init(&chp->comp_handler_lock);
+ atomic_set(&chp->refcnt, 1);
+ init_waitqueue_head(&chp->wait);
+ ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid);
+ if (ret)
+ goto err2;
+
+ if (ucontext) {
+ mm = kmalloc(sizeof *mm, GFP_KERNEL);
+ if (!mm)
+ goto err3;
+ mm2 = kmalloc(sizeof *mm2, GFP_KERNEL);
+ if (!mm2)
+ goto err4;
+
+ uresp.qid_mask = rhp->rdev.cqmask;
+ uresp.cqid = chp->cq.cqid;
+ uresp.size = chp->cq.size;
+ uresp.memsize = chp->cq.memsize;
+ spin_lock(&ucontext->mmap_lock);
+ uresp.key = ucontext->key;
+ ucontext->key += PAGE_SIZE;
+ uresp.gts_key = ucontext->key;
+ ucontext->key += PAGE_SIZE;
+ spin_unlock(&ucontext->mmap_lock);
+ ret = ib_copy_to_udata(udata, &uresp, sizeof uresp);
+ if (ret)
+ goto err5;
+
+ mm->key = uresp.key;
+ mm->addr = vtophys(chp->cq.queue);
+ mm->len = chp->cq.memsize;
+ insert_mmap(ucontext, mm);
+
+ mm2->key = uresp.gts_key;
+ mm2->addr = chp->cq.ugts;
+ mm2->len = PAGE_SIZE;
+ insert_mmap(ucontext, mm2);
+ }
+ CTR6(KTR_IW_CXGBE,
+ "%s cqid 0x%0x chp %p size %u memsize %zu, dma_addr 0x%0llx",
+ __func__, chp->cq.cqid, chp, chp->cq.size, chp->cq.memsize,
+ (unsigned long long) chp->cq.dma_addr);
+ return &chp->ibcq;
+err5:
+ kfree(mm2);
+err4:
+ kfree(mm);
+err3:
+ remove_handle(rhp, &rhp->cqidr, chp->cq.cqid);
+err2:
+ destroy_cq(&chp->rhp->rdev, &chp->cq,
+ ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+err1:
+ kfree(chp);
+ return ERR_PTR(ret);
+}
+
+int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
+{
+ return -ENOSYS;
+}
+
+int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
+{
+ struct c4iw_cq *chp;
+ int ret;
+ unsigned long flag;
+
+ chp = to_c4iw_cq(ibcq);
+ spin_lock_irqsave(&chp->lock, flag);
+ ret = t4_arm_cq(&chp->cq,
+ (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED);
+ spin_unlock_irqrestore(&chp->lock, flag);
+ if (ret && !(flags & IB_CQ_REPORT_MISSED_EVENTS))
+ ret = 0;
+ return ret;
+}
+#endif
diff --git a/sys/dev/cxgbe/iw_cxgbe/device.c b/sys/dev/cxgbe/iw_cxgbe/device.c
new file mode 100644
index 0000000..adb283d
--- /dev/null
+++ b/sys/dev/cxgbe/iw_cxgbe/device.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+
+#include <sys/ktr.h>
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+
+#include <rdma/ib_verbs.h>
+#include <linux/idr.h>
+
+#ifdef TCP_OFFLOAD
+#include "iw_cxgbe.h"
+
+int spg_creds = 2; /* Default status page size is 2 credits = 128B */
+
+void
+c4iw_release_dev_ucontext(struct c4iw_rdev *rdev,
+ struct c4iw_dev_ucontext *uctx)
+{
+ struct list_head *pos, *nxt;
+ struct c4iw_qid_list *entry;
+
+ mutex_lock(&uctx->lock);
+ list_for_each_safe(pos, nxt, &uctx->qpids) {
+ entry = list_entry(pos, struct c4iw_qid_list, entry);
+ list_del_init(&entry->entry);
+ if (!(entry->qid & rdev->qpmask)) {
+ c4iw_put_resource(&rdev->resource.qid_table,
+ entry->qid);
+ mutex_lock(&rdev->stats.lock);
+ rdev->stats.qid.cur -= rdev->qpmask + 1;
+ mutex_unlock(&rdev->stats.lock);
+ }
+ kfree(entry);
+ }
+
+ list_for_each_safe(pos, nxt, &uctx->qpids) {
+ entry = list_entry(pos, struct c4iw_qid_list, entry);
+ list_del_init(&entry->entry);
+ kfree(entry);
+ }
+ mutex_unlock(&uctx->lock);
+}
+
+void
+c4iw_init_dev_ucontext(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
+{
+
+ INIT_LIST_HEAD(&uctx->qpids);
+ INIT_LIST_HEAD(&uctx->cqids);
+ mutex_init(&uctx->lock);
+}
+
+static int
+c4iw_rdev_open(struct c4iw_rdev *rdev)
+{
+ struct adapter *sc = rdev->adap;
+ int rc;
+
+ c4iw_init_dev_ucontext(rdev, &rdev->uctx);
+
+ /* Save the status page size set by if_cxgbe */
+ spg_creds = (t4_read_reg(sc, A_SGE_CONTROL) & F_EGRSTATUSPAGESIZE) ?
+ 2 : 1;
+
+ /* XXX: we can probably make this work */
+ if (sc->sge.eq_s_qpp > PAGE_SHIFT || sc->sge.iq_s_qpp > PAGE_SHIFT) {
+ device_printf(sc->dev,
+ "doorbell density too high (eq %d, iq %d, pg %d).\n",
+ sc->sge.eq_s_qpp, sc->sge.eq_s_qpp, PAGE_SHIFT);
+ rc = -EINVAL;
+ goto err1;
+ }
+
+ rdev->qpshift = PAGE_SHIFT - sc->sge.eq_s_qpp;
+ rdev->qpmask = (1 << sc->sge.eq_s_qpp) - 1;
+ rdev->cqshift = PAGE_SHIFT - sc->sge.iq_s_qpp;
+ rdev->cqmask = (1 << sc->sge.iq_s_qpp) - 1;
+
+ if (c4iw_num_stags(rdev) == 0) {
+ rc = -EINVAL;
+ goto err1;
+ }
+
+ rdev->stats.pd.total = T4_MAX_NUM_PD;
+ rdev->stats.stag.total = sc->vres.stag.size;
+ rdev->stats.pbl.total = sc->vres.pbl.size;
+ rdev->stats.rqt.total = sc->vres.rq.size;
+ rdev->stats.qid.total = sc->vres.qp.size;
+
+ rc = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD);
+ if (rc) {
+ device_printf(sc->dev, "error %d initializing resources\n", rc);
+ goto err1;
+ }
+ rc = c4iw_pblpool_create(rdev);
+ if (rc) {
+ device_printf(sc->dev, "error %d initializing pbl pool\n", rc);
+ goto err2;
+ }
+ rc = c4iw_rqtpool_create(rdev);
+ if (rc) {
+ device_printf(sc->dev, "error %d initializing rqt pool\n", rc);
+ goto err3;
+ }
+
+ return (0);
+err3:
+ c4iw_pblpool_destroy(rdev);
+err2:
+ c4iw_destroy_resource(&rdev->resource);
+err1:
+ return (rc);
+}
+
+static void c4iw_rdev_close(struct c4iw_rdev *rdev)
+{
+ c4iw_pblpool_destroy(rdev);
+ c4iw_rqtpool_destroy(rdev);
+ c4iw_destroy_resource(&rdev->resource);
+}
+
+static void
+c4iw_dealloc(struct c4iw_dev *iwsc)
+{
+
+ c4iw_rdev_close(&iwsc->rdev);
+ idr_destroy(&iwsc->cqidr);
+ idr_destroy(&iwsc->qpidr);
+ idr_destroy(&iwsc->mmidr);
+ ib_dealloc_device(&iwsc->ibdev);
+}
+
+static struct c4iw_dev *
+c4iw_alloc(struct adapter *sc)
+{
+ struct c4iw_dev *iwsc;
+ int rc;
+
+ iwsc = (struct c4iw_dev *)ib_alloc_device(sizeof(*iwsc));
+ if (iwsc == NULL) {
+ device_printf(sc->dev, "Cannot allocate ib device.\n");
+ return (ERR_PTR(-ENOMEM));
+ }
+ iwsc->rdev.adap = sc;
+
+ rc = c4iw_rdev_open(&iwsc->rdev);
+ if (rc != 0) {
+ device_printf(sc->dev, "Unable to open CXIO rdev (%d)\n", rc);
+ ib_dealloc_device(&iwsc->ibdev);
+ return (ERR_PTR(rc));
+ }
+
+ idr_init(&iwsc->cqidr);
+ idr_init(&iwsc->qpidr);
+ idr_init(&iwsc->mmidr);
+ spin_lock_init(&iwsc->lock);
+ mutex_init(&iwsc->rdev.stats.lock);
+
+ return (iwsc);
+}
+
+static int c4iw_mod_load(void);
+static int c4iw_mod_unload(void);
+static int c4iw_activate(struct adapter *);
+static int c4iw_deactivate(struct adapter *);
+
+static struct uld_info c4iw_uld_info = {
+ .uld_id = ULD_IWARP,
+ .activate = c4iw_activate,
+ .deactivate = c4iw_deactivate,
+};
+
+static int
+c4iw_activate(struct adapter *sc)
+{
+ struct c4iw_dev *iwsc;
+ int rc;
+
+ ASSERT_SYNCHRONIZED_OP(sc);
+
+ if (isset(&sc->offload_map, MAX_NPORTS)) {
+ KASSERT(0, ("%s: RDMA already eanbled on sc %p", __func__, sc));
+ return (0);
+ }
+
+ if (sc->rdmacaps == 0) {
+ device_printf(sc->dev,
+ "RDMA not supported or RDMA cap is not enabled.\n");
+ return (ENOSYS);
+ }
+
+ iwsc = c4iw_alloc(sc);
+ if (IS_ERR(iwsc)) {
+ rc = -PTR_ERR(iwsc);
+ device_printf(sc->dev, "initialization failed: %d\n", rc);
+ return (rc);
+ }
+
+ sc->iwarp_softc = iwsc;
+ c4iw_cm_init_cpl(sc);
+
+ rc = -c4iw_register_device(iwsc);
+ if (rc) {
+ device_printf(sc->dev, "RDMA registration failed: %d\n", rc);
+ c4iw_dealloc(iwsc);
+ sc->iwarp_softc = NULL;
+ }
+
+ return (rc);
+}
+
+static int
+c4iw_deactivate(struct adapter *sc)
+{
+ struct c4iw_dev *iwsc = sc->iwarp_softc;
+
+ ASSERT_SYNCHRONIZED_OP(sc);
+
+ c4iw_unregister_device(iwsc);
+ c4iw_dealloc(iwsc);
+ sc->iwarp_softc = NULL;
+
+ return (0);
+}
+
+static void
+c4iw_activate_all(struct adapter *sc, void *arg __unused)
+{
+
+ if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4iwact") != 0)
+ return;
+
+ if (!isset(&sc->offload_map, MAX_NPORTS) &&
+ t4_activate_uld(sc, ULD_IWARP) == 0)
+ setbit(&sc->offload_map, MAX_NPORTS);
+
+ end_synchronized_op(sc, 0);
+}
+
+static void
+c4iw_deactivate_all(struct adapter *sc, void *arg __unused)
+{
+
+ if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4iwdea") != 0)
+ return;
+
+ if (isset(&sc->offload_map, MAX_NPORTS) &&
+ t4_deactivate_uld(sc, ULD_IWARP) == 0)
+ clrbit(&sc->offload_map, MAX_NPORTS);
+
+ end_synchronized_op(sc, 0);
+}
+
+static int
+c4iw_mod_load(void)
+{
+ int rc;
+
+ rc = -c4iw_cm_init();
+ if (rc != 0)
+ return (rc);
+
+ rc = t4_register_uld(&c4iw_uld_info);
+ if (rc != 0) {
+ c4iw_cm_term();
+ return (rc);
+ }
+
+ t4_iterate(c4iw_activate_all, NULL);
+
+ return (rc);
+}
+
+static int
+c4iw_mod_unload(void)
+{
+
+ t4_iterate(c4iw_deactivate_all, NULL);
+
+ c4iw_cm_term();
+
+ if (t4_unregister_uld(&c4iw_uld_info) == EBUSY)
+ return (EBUSY);
+
+ return (0);
+}
+
+#endif
+#undef MODULE_VERSION
+#include <sys/module.h>
+
+/*
+ * t4_tom won't load on kernels without TCP_OFFLOAD and this module's dependency
+ * on t4_tom ensures that it won't either. So we don't directly check for
+ * TCP_OFFLOAD here.
+ */
+static int
+c4iw_modevent(module_t mod, int cmd, void *arg)
+{
+ int rc = 0;
+
+#ifdef TCP_OFFLOAD
+ switch (cmd) {
+ case MOD_LOAD:
+ rc = c4iw_mod_load();
+ if (rc == 0)
+ printf("iw_cxgbe: Chelsio T4/T5 RDMA driver loaded.\n");
+ break;
+
+ case MOD_UNLOAD:
+ rc = c4iw_mod_unload();
+ break;
+
+ default:
+ rc = EINVAL;
+ }
+#else
+ printf("t4_tom: compiled without TCP_OFFLOAD support.\n");
+ rc = EOPNOTSUPP;
+#endif
+ return (rc);
+}
+
+static moduledata_t c4iw_mod_data = {
+ "iw_cxgbe",
+ c4iw_modevent,
+ 0
+};
+
+MODULE_VERSION(iw_cxgbe, 1);
+MODULE_DEPEND(iw_cxgbe, t4nex, 1, 1, 1);
+MODULE_DEPEND(iw_cxgbe, t4_tom, 1, 1, 1);
+MODULE_DEPEND(iw_cxgbe, ibcore, 1, 1, 1);
+DECLARE_MODULE(iw_cxgbe, c4iw_mod_data, SI_SUB_EXEC, SI_ORDER_ANY);
diff --git a/sys/dev/cxgbe/iw_cxgbe/ev.c b/sys/dev/cxgbe/iw_cxgbe/ev.c
new file mode 100644
index 0000000..b0a9e13
--- /dev/null
+++ b/sys/dev/cxgbe/iw_cxgbe/ev.c
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+
+#ifdef TCP_OFFLOAD
+#include <linux/slab.h>
+
+#include "iw_cxgbe.h"
+
+static void post_qp_event(struct c4iw_dev *dev, struct c4iw_cq *chp,
+ struct c4iw_qp *qhp,
+ struct t4_cqe *err_cqe,
+ enum ib_event_type ib_event)
+{
+ struct ib_event event;
+ struct c4iw_qp_attributes attrs;
+ unsigned long flag;
+
+ if ((qhp->attr.state == C4IW_QP_STATE_ERROR) ||
+ (qhp->attr.state == C4IW_QP_STATE_TERMINATE)) {
+ CTR4(KTR_IW_CXGBE, "%s AE received after RTS - "
+ "qp state %d qpid 0x%x status 0x%x", __func__,
+ qhp->attr.state, qhp->wq.sq.qid, CQE_STATUS(err_cqe));
+ return;
+ }
+
+ printf("AE qpid 0x%x opcode %d status 0x%x "
+ "type %d wrid.hi 0x%x wrid.lo 0x%x\n",
+ CQE_QPID(err_cqe), CQE_OPCODE(err_cqe),
+ CQE_STATUS(err_cqe), CQE_TYPE(err_cqe),
+ CQE_WRID_HI(err_cqe), CQE_WRID_LOW(err_cqe));
+
+ if (qhp->attr.state == C4IW_QP_STATE_RTS) {
+ attrs.next_state = C4IW_QP_STATE_TERMINATE;
+ c4iw_modify_qp(qhp->rhp, qhp, C4IW_QP_ATTR_NEXT_STATE,
+ &attrs, 0);
+ }
+
+ event.event = ib_event;
+ event.device = chp->ibcq.device;
+ if (ib_event == IB_EVENT_CQ_ERR)
+ event.element.cq = &chp->ibcq;
+ else
+ event.element.qp = &qhp->ibqp;
+ if (qhp->ibqp.event_handler)
+ (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context);
+
+ spin_lock_irqsave(&chp->comp_handler_lock, flag);
+ (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
+ spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
+}
+
+void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
+{
+ struct c4iw_cq *chp;
+ struct c4iw_qp *qhp;
+ u32 cqid;
+
+ spin_lock_irq(&dev->lock);
+ qhp = get_qhp(dev, CQE_QPID(err_cqe));
+ if (!qhp) {
+ printf("BAD AE qpid 0x%x opcode %d "
+ "status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n",
+ CQE_QPID(err_cqe),
+ CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe),
+ CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe),
+ CQE_WRID_LOW(err_cqe));
+ spin_unlock_irq(&dev->lock);
+ goto out;
+ }
+
+ if (SQ_TYPE(err_cqe))
+ cqid = qhp->attr.scq;
+ else
+ cqid = qhp->attr.rcq;
+ chp = get_chp(dev, cqid);
+ if (!chp) {
+ printf("BAD AE cqid 0x%x qpid 0x%x opcode %d "
+ "status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x\n",
+ cqid, CQE_QPID(err_cqe),
+ CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe),
+ CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe),
+ CQE_WRID_LOW(err_cqe));
+ spin_unlock_irq(&dev->lock);
+ goto out;
+ }
+
+ c4iw_qp_add_ref(&qhp->ibqp);
+ atomic_inc(&chp->refcnt);
+ spin_unlock_irq(&dev->lock);
+
+ /* Bad incoming write */
+ if (RQ_TYPE(err_cqe) &&
+ (CQE_OPCODE(err_cqe) == FW_RI_RDMA_WRITE)) {
+ post_qp_event(dev, chp, qhp, err_cqe, IB_EVENT_QP_REQ_ERR);
+ goto done;
+ }
+
+ switch (CQE_STATUS(err_cqe)) {
+
+ /* Completion Events */
+ case T4_ERR_SUCCESS:
+ printf(KERN_ERR MOD "AE with status 0!\n");
+ break;
+
+ case T4_ERR_STAG:
+ case T4_ERR_PDID:
+ case T4_ERR_QPID:
+ case T4_ERR_ACCESS:
+ case T4_ERR_WRAP:
+ case T4_ERR_BOUND:
+ case T4_ERR_INVALIDATE_SHARED_MR:
+ case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND:
+ post_qp_event(dev, chp, qhp, err_cqe, IB_EVENT_QP_ACCESS_ERR);
+ break;
+
+ /* Device Fatal Errors */
+ case T4_ERR_ECC:
+ case T4_ERR_ECC_PSTAG:
+ case T4_ERR_INTERNAL_ERR:
+ post_qp_event(dev, chp, qhp, err_cqe, IB_EVENT_DEVICE_FATAL);
+ break;
+
+ /* QP Fatal Errors */
+ case T4_ERR_OUT_OF_RQE:
+ case T4_ERR_PBL_ADDR_BOUND:
+ case T4_ERR_CRC:
+ case T4_ERR_MARKER:
+ case T4_ERR_PDU_LEN_ERR:
+ case T4_ERR_DDP_VERSION:
+ case T4_ERR_RDMA_VERSION:
+ case T4_ERR_OPCODE:
+ case T4_ERR_DDP_QUEUE_NUM:
+ case T4_ERR_MSN:
+ case T4_ERR_TBIT:
+ case T4_ERR_MO:
+ case T4_ERR_MSN_GAP:
+ case T4_ERR_MSN_RANGE:
+ case T4_ERR_RQE_ADDR_BOUND:
+ case T4_ERR_IRD_OVERFLOW:
+ post_qp_event(dev, chp, qhp, err_cqe, IB_EVENT_QP_FATAL);
+ break;
+
+ default:
+ printf("Unknown T4 status 0x%x QPID 0x%x\n",
+ CQE_STATUS(err_cqe), qhp->wq.sq.qid);
+ post_qp_event(dev, chp, qhp, err_cqe, IB_EVENT_QP_FATAL);
+ break;
+ }
+done:
+ if (atomic_dec_and_test(&chp->refcnt))
+ wake_up(&chp->wait);
+ c4iw_qp_rem_ref(&qhp->ibqp);
+out:
+ return;
+}
+
+int c4iw_ev_handler(struct sge_iq *iq, const struct rsp_ctrl *rc)
+{
+ struct c4iw_dev *dev = iq->adapter->iwarp_softc;
+ u32 qid = be32_to_cpu(rc->pldbuflen_qid);
+ struct c4iw_cq *chp;
+ unsigned long flag;
+
+ chp = get_chp(dev, qid);
+ if (chp) {
+ spin_lock_irqsave(&chp->comp_handler_lock, flag);
+ (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
+ spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
+ } else
+ CTR2(KTR_IW_CXGBE, "%s unknown cqid 0x%x", __func__, qid);
+ return 0;
+}
+#endif
diff --git a/sys/dev/cxgbe/iw_cxgbe/id_table.c b/sys/dev/cxgbe/iw_cxgbe/id_table.c
new file mode 100644
index 0000000..6fab5dc
--- /dev/null
+++ b/sys/dev/cxgbe/iw_cxgbe/id_table.c
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2011-2013 Chelsio Communications. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+
+#ifdef TCP_OFFLOAD
+#include <sys/libkern.h>
+#include "iw_cxgbe.h"
+
+#define RANDOM_SKIP 16
+
+/*
+ * Trivial bitmap-based allocator. If the random flag is set, the
+ * allocator is designed to:
+ * - pseudo-randomize the id returned such that it is not trivially predictable.
+ * - avoid reuse of recently used id (at the expense of predictability)
+ */
+u32 c4iw_id_alloc(struct c4iw_id_table *alloc)
+{
+ unsigned long flags;
+ u32 obj;
+
+ spin_lock_irqsave(&alloc->lock, flags);
+
+ obj = find_next_zero_bit(alloc->table, alloc->max, alloc->last);
+ if (obj >= alloc->max)
+ obj = find_first_zero_bit(alloc->table, alloc->max);
+
+ if (obj < alloc->max) {
+ if (alloc->flags & C4IW_ID_TABLE_F_RANDOM)
+ alloc->last += arc4random() % RANDOM_SKIP;
+ else
+ alloc->last = obj + 1;
+ if (alloc->last >= alloc->max)
+ alloc->last = 0;
+ set_bit(obj, alloc->table);
+ obj += alloc->start;
+ } else
+ obj = -1;
+
+ spin_unlock_irqrestore(&alloc->lock, flags);
+ return obj;
+}
+
+void c4iw_id_free(struct c4iw_id_table *alloc, u32 obj)
+{
+ unsigned long flags;
+
+ obj -= alloc->start;
+ BUG_ON((int)obj < 0);
+
+ spin_lock_irqsave(&alloc->lock, flags);
+ clear_bit(obj, alloc->table);
+ spin_unlock_irqrestore(&alloc->lock, flags);
+}
+
+int c4iw_id_table_alloc(struct c4iw_id_table *alloc, u32 start, u32 num,
+ u32 reserved, u32 flags)
+{
+ int i;
+
+ alloc->start = start;
+ alloc->flags = flags;
+ if (flags & C4IW_ID_TABLE_F_RANDOM)
+ alloc->last = arc4random() % RANDOM_SKIP;
+ else
+ alloc->last = 0;
+ alloc->max = num;
+ spin_lock_init(&alloc->lock);
+ alloc->table = kmalloc(BITS_TO_LONGS(num) * sizeof(long),
+ GFP_KERNEL);
+ if (!alloc->table)
+ return -ENOMEM;
+
+ bitmap_zero(alloc->table, num);
+ if (!(alloc->flags & C4IW_ID_TABLE_F_EMPTY))
+ for (i = 0; i < reserved; ++i)
+ set_bit(i, alloc->table);
+
+ return 0;
+}
+
+void c4iw_id_table_free(struct c4iw_id_table *alloc)
+{
+ kfree(alloc->table);
+}
+#endif
diff --git a/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h b/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h
new file mode 100644
index 0000000..0201a79
--- /dev/null
+++ b/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h
@@ -0,0 +1,1046 @@
+/*
+ * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $FreeBSD$
+ */
+#ifndef __IW_CXGB4_H__
+#define __IW_CXGB4_H__
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/idr.h>
+#include <linux/completion.h>
+#include <linux/netdevice.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/inet.h>
+#include <linux/wait.h>
+#include <linux/kref.h>
+#include <linux/timer.h>
+#include <linux/io.h>
+
+#include <asm/byteorder.h>
+
+#include <netinet/in.h>
+#include <netinet/toecore.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/iw_cm.h>
+
+#undef prefetch
+
+#include "common/common.h"
+#include "common/t4_msg.h"
+#include "common/t4_regs.h"
+#include "common/t4_tcb.h"
+#include "t4_l2t.h"
+
+#define DRV_NAME "iw_cxgbe"
+#define MOD DRV_NAME ":"
+#define KTR_IW_CXGBE KTR_SPARE3
+
+extern int c4iw_debug;
+#define PDBG(fmt, args...) \
+do { \
+ if (c4iw_debug) \
+ printf(MOD fmt, ## args); \
+} while (0)
+
+#include "t4.h"
+
+static inline void *cplhdr(struct mbuf *m)
+{
+ return mtod(m, void*);
+}
+
+#define PBL_OFF(rdev_p, a) ((a) - (rdev_p)->adap->vres.pbl.start)
+#define RQT_OFF(rdev_p, a) ((a) - (rdev_p)->adap->vres.rq.start)
+
+#define C4IW_ID_TABLE_F_RANDOM 1 /* Pseudo-randomize the id's returned */
+#define C4IW_ID_TABLE_F_EMPTY 2 /* Table is initially empty */
+
+struct c4iw_id_table {
+ u32 flags;
+ u32 start; /* logical minimal id */
+ u32 last; /* hint for find */
+ u32 max;
+ spinlock_t lock;
+ unsigned long *table;
+};
+
+struct c4iw_resource {
+ struct c4iw_id_table tpt_table;
+ struct c4iw_id_table qid_table;
+ struct c4iw_id_table pdid_table;
+};
+
+struct c4iw_qid_list {
+ struct list_head entry;
+ u32 qid;
+};
+
+struct c4iw_dev_ucontext {
+ struct list_head qpids;
+ struct list_head cqids;
+ struct mutex lock;
+};
+
+enum c4iw_rdev_flags {
+ T4_FATAL_ERROR = (1<<0),
+};
+
+struct c4iw_stat {
+ u64 total;
+ u64 cur;
+ u64 max;
+ u64 fail;
+};
+
+struct c4iw_stats {
+ struct mutex lock;
+ struct c4iw_stat qid;
+ struct c4iw_stat pd;
+ struct c4iw_stat stag;
+ struct c4iw_stat pbl;
+ struct c4iw_stat rqt;
+ u64 db_full;
+ u64 db_empty;
+ u64 db_drop;
+ u64 db_state_transitions;
+};
+
+struct c4iw_rdev {
+ struct adapter *adap;
+ struct c4iw_resource resource;
+ unsigned long qpshift;
+ u32 qpmask;
+ unsigned long cqshift;
+ u32 cqmask;
+ struct c4iw_dev_ucontext uctx;
+ struct gen_pool *pbl_pool;
+ struct gen_pool *rqt_pool;
+ u32 flags;
+ struct c4iw_stats stats;
+};
+
+static inline int c4iw_fatal_error(struct c4iw_rdev *rdev)
+{
+ return rdev->flags & T4_FATAL_ERROR;
+}
+
+static inline int c4iw_num_stags(struct c4iw_rdev *rdev)
+{
+ return min((int)T4_MAX_NUM_STAG, (int)(rdev->adap->vres.stag.size >> 5));
+}
+
+#define C4IW_WR_TO (10*HZ)
+
+struct c4iw_wr_wait {
+ int ret;
+ atomic_t completion;
+};
+
+static inline void c4iw_init_wr_wait(struct c4iw_wr_wait *wr_waitp)
+{
+ wr_waitp->ret = 0;
+ atomic_set(&wr_waitp->completion, 0);
+}
+
+static inline void c4iw_wake_up(struct c4iw_wr_wait *wr_waitp, int ret)
+{
+ wr_waitp->ret = ret;
+ atomic_set(&wr_waitp->completion, 1);
+ wakeup(wr_waitp);
+}
+
+static inline int
+c4iw_wait_for_reply(struct c4iw_rdev *rdev, struct c4iw_wr_wait *wr_waitp,
+ u32 hwtid, u32 qpid, const char *func)
+{
+ struct adapter *sc = rdev->adap;
+ unsigned to = C4IW_WR_TO;
+
+ while (!atomic_read(&wr_waitp->completion)) {
+ tsleep(wr_waitp, 0, "c4iw_wait", to);
+ if (SIGPENDING(curthread)) {
+ printf("%s - Device %s not responding - "
+ "tid %u qpid %u\n", func,
+ device_get_nameunit(sc->dev), hwtid, qpid);
+ if (c4iw_fatal_error(rdev)) {
+ wr_waitp->ret = -EIO;
+ break;
+ }
+ to = to << 2;
+ }
+ }
+ if (wr_waitp->ret)
+ CTR4(KTR_IW_CXGBE, "%s: FW reply %d tid %u qpid %u",
+ device_get_nameunit(sc->dev), wr_waitp->ret, hwtid, qpid);
+ return (wr_waitp->ret);
+}
+
+enum db_state {
+ NORMAL = 0,
+ FLOW_CONTROL = 1,
+ RECOVERY = 2
+};
+
+struct c4iw_dev {
+ struct ib_device ibdev;
+ struct c4iw_rdev rdev;
+ u32 device_cap_flags;
+ struct idr cqidr;
+ struct idr qpidr;
+ struct idr mmidr;
+ spinlock_t lock;
+ struct dentry *debugfs_root;
+ enum db_state db_state;
+ int qpcnt;
+};
+
+static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct c4iw_dev, ibdev);
+}
+
+static inline struct c4iw_dev *rdev_to_c4iw_dev(struct c4iw_rdev *rdev)
+{
+ return container_of(rdev, struct c4iw_dev, rdev);
+}
+
+static inline struct c4iw_cq *get_chp(struct c4iw_dev *rhp, u32 cqid)
+{
+ return idr_find(&rhp->cqidr, cqid);
+}
+
+static inline struct c4iw_qp *get_qhp(struct c4iw_dev *rhp, u32 qpid)
+{
+ return idr_find(&rhp->qpidr, qpid);
+}
+
+static inline struct c4iw_mr *get_mhp(struct c4iw_dev *rhp, u32 mmid)
+{
+ return idr_find(&rhp->mmidr, mmid);
+}
+
+static inline int _insert_handle(struct c4iw_dev *rhp, struct idr *idr,
+ void *handle, u32 id, int lock)
+{
+ int ret;
+ int newid;
+
+ do {
+ if (!idr_pre_get(idr, lock ? GFP_KERNEL : GFP_ATOMIC))
+ return -ENOMEM;
+ if (lock)
+ spin_lock_irq(&rhp->lock);
+ ret = idr_get_new_above(idr, handle, id, &newid);
+ BUG_ON(!ret && newid != id);
+ if (lock)
+ spin_unlock_irq(&rhp->lock);
+ } while (ret == -EAGAIN);
+
+ return ret;
+}
+
+static inline int insert_handle(struct c4iw_dev *rhp, struct idr *idr,
+ void *handle, u32 id)
+{
+ return _insert_handle(rhp, idr, handle, id, 1);
+}
+
+static inline int insert_handle_nolock(struct c4iw_dev *rhp, struct idr *idr,
+ void *handle, u32 id)
+{
+ return _insert_handle(rhp, idr, handle, id, 0);
+}
+
+static inline void _remove_handle(struct c4iw_dev *rhp, struct idr *idr,
+ u32 id, int lock)
+{
+ if (lock)
+ spin_lock_irq(&rhp->lock);
+ idr_remove(idr, id);
+ if (lock)
+ spin_unlock_irq(&rhp->lock);
+}
+
+static inline void remove_handle(struct c4iw_dev *rhp, struct idr *idr, u32 id)
+{
+ _remove_handle(rhp, idr, id, 1);
+}
+
+static inline void remove_handle_nolock(struct c4iw_dev *rhp,
+ struct idr *idr, u32 id)
+{
+ _remove_handle(rhp, idr, id, 0);
+}
+
+struct c4iw_pd {
+ struct ib_pd ibpd;
+ u32 pdid;
+ struct c4iw_dev *rhp;
+};
+
+static inline struct c4iw_pd *to_c4iw_pd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct c4iw_pd, ibpd);
+}
+
+struct tpt_attributes {
+ u64 len;
+ u64 va_fbo;
+ enum fw_ri_mem_perms perms;
+ u32 stag;
+ u32 pdid;
+ u32 qpid;
+ u32 pbl_addr;
+ u32 pbl_size;
+ u32 state:1;
+ u32 type:2;
+ u32 rsvd:1;
+ u32 remote_invaliate_disable:1;
+ u32 zbva:1;
+ u32 mw_bind_enable:1;
+ u32 page_size:5;
+};
+
+struct c4iw_mr {
+ struct ib_mr ibmr;
+ struct ib_umem *umem;
+ struct c4iw_dev *rhp;
+ u64 kva;
+ struct tpt_attributes attr;
+};
+
+static inline struct c4iw_mr *to_c4iw_mr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct c4iw_mr, ibmr);
+}
+
+struct c4iw_mw {
+ struct ib_mw ibmw;
+ struct c4iw_dev *rhp;
+ u64 kva;
+ struct tpt_attributes attr;
+};
+
+static inline struct c4iw_mw *to_c4iw_mw(struct ib_mw *ibmw)
+{
+ return container_of(ibmw, struct c4iw_mw, ibmw);
+}
+
+struct c4iw_fr_page_list {
+ struct ib_fast_reg_page_list ibpl;
+ DECLARE_PCI_UNMAP_ADDR(mapping);
+ dma_addr_t dma_addr;
+ struct c4iw_dev *dev;
+ int size;
+};
+
+static inline struct c4iw_fr_page_list *to_c4iw_fr_page_list(
+ struct ib_fast_reg_page_list *ibpl)
+{
+ return container_of(ibpl, struct c4iw_fr_page_list, ibpl);
+}
+
+struct c4iw_cq {
+ struct ib_cq ibcq;
+ struct c4iw_dev *rhp;
+ struct t4_cq cq;
+ spinlock_t lock;
+ spinlock_t comp_handler_lock;
+ atomic_t refcnt;
+ wait_queue_head_t wait;
+};
+
+static inline struct c4iw_cq *to_c4iw_cq(struct ib_cq *ibcq)
+{
+ return container_of(ibcq, struct c4iw_cq, ibcq);
+}
+
+struct c4iw_mpa_attributes {
+ u8 initiator;
+ u8 recv_marker_enabled;
+ u8 xmit_marker_enabled;
+ u8 crc_enabled;
+ u8 enhanced_rdma_conn;
+ u8 version;
+ u8 p2p_type;
+};
+
+struct c4iw_qp_attributes {
+ u32 scq;
+ u32 rcq;
+ u32 sq_num_entries;
+ u32 rq_num_entries;
+ u32 sq_max_sges;
+ u32 sq_max_sges_rdma_write;
+ u32 rq_max_sges;
+ u32 state;
+ u8 enable_rdma_read;
+ u8 enable_rdma_write;
+ u8 enable_bind;
+ u8 enable_mmid0_fastreg;
+ u32 max_ord;
+ u32 max_ird;
+ u32 pd;
+ u32 next_state;
+ char terminate_buffer[52];
+ u32 terminate_msg_len;
+ u8 is_terminate_local;
+ struct c4iw_mpa_attributes mpa_attr;
+ struct c4iw_ep *llp_stream_handle;
+ u8 layer_etype;
+ u8 ecode;
+ u16 sq_db_inc;
+ u16 rq_db_inc;
+};
+
+struct c4iw_qp {
+ struct ib_qp ibqp;
+ struct c4iw_dev *rhp;
+ struct c4iw_ep *ep;
+ struct c4iw_qp_attributes attr;
+ struct t4_wq wq;
+ spinlock_t lock;
+ struct mutex mutex;
+ atomic_t refcnt;
+ wait_queue_head_t wait;
+ struct timer_list timer;
+};
+
+static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct c4iw_qp, ibqp);
+}
+
+struct c4iw_ucontext {
+ struct ib_ucontext ibucontext;
+ struct c4iw_dev_ucontext uctx;
+ u32 key;
+ spinlock_t mmap_lock;
+ struct list_head mmaps;
+};
+
+static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c)
+{
+ return container_of(c, struct c4iw_ucontext, ibucontext);
+}
+
+struct c4iw_mm_entry {
+ struct list_head entry;
+ u64 addr;
+ u32 key;
+ unsigned len;
+};
+
+static inline struct c4iw_mm_entry *remove_mmap(struct c4iw_ucontext *ucontext,
+ u32 key, unsigned len)
+{
+ struct list_head *pos, *nxt;
+ struct c4iw_mm_entry *mm;
+
+ spin_lock(&ucontext->mmap_lock);
+ list_for_each_safe(pos, nxt, &ucontext->mmaps) {
+
+ mm = list_entry(pos, struct c4iw_mm_entry, entry);
+ if (mm->key == key && mm->len == len) {
+ list_del_init(&mm->entry);
+ spin_unlock(&ucontext->mmap_lock);
+ CTR4(KTR_IW_CXGBE, "%s key 0x%x addr 0x%llx len %d",
+ __func__, key, (unsigned long long) mm->addr,
+ mm->len);
+ return mm;
+ }
+ }
+ spin_unlock(&ucontext->mmap_lock);
+ return NULL;
+}
+
+static inline void insert_mmap(struct c4iw_ucontext *ucontext,
+ struct c4iw_mm_entry *mm)
+{
+ spin_lock(&ucontext->mmap_lock);
+ CTR4(KTR_IW_CXGBE, "%s key 0x%x addr 0x%llx len %d", __func__, mm->key,
+ (unsigned long long) mm->addr, mm->len);
+ list_add_tail(&mm->entry, &ucontext->mmaps);
+ spin_unlock(&ucontext->mmap_lock);
+}
+
+enum c4iw_qp_attr_mask {
+ C4IW_QP_ATTR_NEXT_STATE = 1 << 0,
+ C4IW_QP_ATTR_SQ_DB = 1<<1,
+ C4IW_QP_ATTR_RQ_DB = 1<<2,
+ C4IW_QP_ATTR_ENABLE_RDMA_READ = 1 << 7,
+ C4IW_QP_ATTR_ENABLE_RDMA_WRITE = 1 << 8,
+ C4IW_QP_ATTR_ENABLE_RDMA_BIND = 1 << 9,
+ C4IW_QP_ATTR_MAX_ORD = 1 << 11,
+ C4IW_QP_ATTR_MAX_IRD = 1 << 12,
+ C4IW_QP_ATTR_LLP_STREAM_HANDLE = 1 << 22,
+ C4IW_QP_ATTR_STREAM_MSG_BUFFER = 1 << 23,
+ C4IW_QP_ATTR_MPA_ATTR = 1 << 24,
+ C4IW_QP_ATTR_QP_CONTEXT_ACTIVATE = 1 << 25,
+ C4IW_QP_ATTR_VALID_MODIFY = (C4IW_QP_ATTR_ENABLE_RDMA_READ |
+ C4IW_QP_ATTR_ENABLE_RDMA_WRITE |
+ C4IW_QP_ATTR_MAX_ORD |
+ C4IW_QP_ATTR_MAX_IRD |
+ C4IW_QP_ATTR_LLP_STREAM_HANDLE |
+ C4IW_QP_ATTR_STREAM_MSG_BUFFER |
+ C4IW_QP_ATTR_MPA_ATTR |
+ C4IW_QP_ATTR_QP_CONTEXT_ACTIVATE)
+};
+
+int c4iw_modify_qp(struct c4iw_dev *rhp,
+ struct c4iw_qp *qhp,
+ enum c4iw_qp_attr_mask mask,
+ struct c4iw_qp_attributes *attrs,
+ int internal);
+
+enum c4iw_qp_state {
+ C4IW_QP_STATE_IDLE,
+ C4IW_QP_STATE_RTS,
+ C4IW_QP_STATE_ERROR,
+ C4IW_QP_STATE_TERMINATE,
+ C4IW_QP_STATE_CLOSING,
+ C4IW_QP_STATE_TOT
+};
+
+static inline int c4iw_convert_state(enum ib_qp_state ib_state)
+{
+ switch (ib_state) {
+ case IB_QPS_RESET:
+ case IB_QPS_INIT:
+ return C4IW_QP_STATE_IDLE;
+ case IB_QPS_RTS:
+ return C4IW_QP_STATE_RTS;
+ case IB_QPS_SQD:
+ return C4IW_QP_STATE_CLOSING;
+ case IB_QPS_SQE:
+ return C4IW_QP_STATE_TERMINATE;
+ case IB_QPS_ERR:
+ return C4IW_QP_STATE_ERROR;
+ default:
+ return -1;
+ }
+}
+
+static inline int to_ib_qp_state(int c4iw_qp_state)
+{
+ switch (c4iw_qp_state) {
+ case C4IW_QP_STATE_IDLE:
+ return IB_QPS_INIT;
+ case C4IW_QP_STATE_RTS:
+ return IB_QPS_RTS;
+ case C4IW_QP_STATE_CLOSING:
+ return IB_QPS_SQD;
+ case C4IW_QP_STATE_TERMINATE:
+ return IB_QPS_SQE;
+ case C4IW_QP_STATE_ERROR:
+ return IB_QPS_ERR;
+ }
+ return IB_QPS_ERR;
+}
+
+static inline u32 c4iw_ib_to_tpt_access(int a)
+{
+ return (a & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) |
+ (a & IB_ACCESS_REMOTE_READ ? FW_RI_MEM_ACCESS_REM_READ : 0) |
+ (a & IB_ACCESS_LOCAL_WRITE ? FW_RI_MEM_ACCESS_LOCAL_WRITE : 0) |
+ FW_RI_MEM_ACCESS_LOCAL_READ;
+}
+
+static inline u32 c4iw_ib_to_tpt_bind_access(int acc)
+{
+ return (acc & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) |
+ (acc & IB_ACCESS_REMOTE_READ ? FW_RI_MEM_ACCESS_REM_READ : 0);
+}
+
+enum c4iw_mmid_state {
+ C4IW_STAG_STATE_VALID,
+ C4IW_STAG_STATE_INVALID
+};
+
+#define C4IW_NODE_DESC "iw_cxgbe Chelsio Communications"
+
+#define MPA_KEY_REQ "MPA ID Req Frame"
+#define MPA_KEY_REP "MPA ID Rep Frame"
+
+#define MPA_MAX_PRIVATE_DATA 256
+#define MPA_ENHANCED_RDMA_CONN 0x10
+#define MPA_REJECT 0x20
+#define MPA_CRC 0x40
+#define MPA_MARKERS 0x80
+#define MPA_FLAGS_MASK 0xE0
+
+#define MPA_V2_PEER2PEER_MODEL 0x8000
+#define MPA_V2_ZERO_LEN_FPDU_RTR 0x4000
+#define MPA_V2_RDMA_WRITE_RTR 0x8000
+#define MPA_V2_RDMA_READ_RTR 0x4000
+#define MPA_V2_IRD_ORD_MASK 0x3FFF
+
+/* Fixme: Use atomic_read for kref.count as same as Linux */
+#define c4iw_put_ep(ep) { \
+ CTR4(KTR_IW_CXGBE, "put_ep (%s:%u) ep %p, refcnt %d", \
+ __func__, __LINE__, ep, (ep)->kref.count); \
+ WARN_ON((ep)->kref.count < 1); \
+ kref_put(&((ep)->kref), _c4iw_free_ep); \
+}
+
+/* Fixme: Use atomic_read for kref.count as same as Linux */
+#define c4iw_get_ep(ep) { \
+ CTR4(KTR_IW_CXGBE, "get_ep (%s:%u) ep %p, refcnt %d", \
+ __func__, __LINE__, ep, (ep)->kref.count); \
+ kref_get(&((ep)->kref)); \
+}
+
+void _c4iw_free_ep(struct kref *kref);
+
+struct mpa_message {
+ u8 key[16];
+ u8 flags;
+ u8 revision;
+ __be16 private_data_size;
+ u8 private_data[0];
+};
+
+struct mpa_v2_conn_params {
+ __be16 ird;
+ __be16 ord;
+};
+
+struct terminate_message {
+ u8 layer_etype;
+ u8 ecode;
+ __be16 hdrct_rsvd;
+ u8 len_hdrs[0];
+};
+
+#define TERM_MAX_LENGTH (sizeof(struct terminate_message) + 2 + 18 + 28)
+
+enum c4iw_layers_types {
+ LAYER_RDMAP = 0x00,
+ LAYER_DDP = 0x10,
+ LAYER_MPA = 0x20,
+ RDMAP_LOCAL_CATA = 0x00,
+ RDMAP_REMOTE_PROT = 0x01,
+ RDMAP_REMOTE_OP = 0x02,
+ DDP_LOCAL_CATA = 0x00,
+ DDP_TAGGED_ERR = 0x01,
+ DDP_UNTAGGED_ERR = 0x02,
+ DDP_LLP = 0x03
+};
+
+enum c4iw_rdma_ecodes {
+ RDMAP_INV_STAG = 0x00,
+ RDMAP_BASE_BOUNDS = 0x01,
+ RDMAP_ACC_VIOL = 0x02,
+ RDMAP_STAG_NOT_ASSOC = 0x03,
+ RDMAP_TO_WRAP = 0x04,
+ RDMAP_INV_VERS = 0x05,
+ RDMAP_INV_OPCODE = 0x06,
+ RDMAP_STREAM_CATA = 0x07,
+ RDMAP_GLOBAL_CATA = 0x08,
+ RDMAP_CANT_INV_STAG = 0x09,
+ RDMAP_UNSPECIFIED = 0xff
+};
+
+enum c4iw_ddp_ecodes {
+ DDPT_INV_STAG = 0x00,
+ DDPT_BASE_BOUNDS = 0x01,
+ DDPT_STAG_NOT_ASSOC = 0x02,
+ DDPT_TO_WRAP = 0x03,
+ DDPT_INV_VERS = 0x04,
+ DDPU_INV_QN = 0x01,
+ DDPU_INV_MSN_NOBUF = 0x02,
+ DDPU_INV_MSN_RANGE = 0x03,
+ DDPU_INV_MO = 0x04,
+ DDPU_MSG_TOOBIG = 0x05,
+ DDPU_INV_VERS = 0x06
+};
+
+enum c4iw_mpa_ecodes {
+ MPA_CRC_ERR = 0x02,
+ MPA_MARKER_ERR = 0x03,
+ MPA_LOCAL_CATA = 0x05,
+ MPA_INSUFF_IRD = 0x06,
+ MPA_NOMATCH_RTR = 0x07,
+};
+
+enum c4iw_ep_state {
+ IDLE = 0,
+ LISTEN,
+ CONNECTING,
+ MPA_REQ_WAIT,
+ MPA_REQ_SENT,
+ MPA_REQ_RCVD,
+ MPA_REP_SENT,
+ FPDU_MODE,
+ ABORTING,
+ CLOSING,
+ MORIBUND,
+ DEAD,
+};
+
+enum c4iw_ep_flags {
+ PEER_ABORT_IN_PROGRESS = 0,
+ ABORT_REQ_IN_PROGRESS = 1,
+ RELEASE_RESOURCES = 2,
+ CLOSE_SENT = 3,
+ TIMEOUT = 4
+};
+
+enum c4iw_ep_history {
+ ACT_OPEN_REQ = 0,
+ ACT_OFLD_CONN = 1,
+ ACT_OPEN_RPL = 2,
+ ACT_ESTAB = 3,
+ PASS_ACCEPT_REQ = 4,
+ PASS_ESTAB = 5,
+ ABORT_UPCALL = 6,
+ ESTAB_UPCALL = 7,
+ CLOSE_UPCALL = 8,
+ ULP_ACCEPT = 9,
+ ULP_REJECT = 10,
+ TIMEDOUT = 11,
+ PEER_ABORT = 12,
+ PEER_CLOSE = 13,
+ CONNREQ_UPCALL = 14,
+ ABORT_CONN = 15,
+ DISCONN_UPCALL = 16,
+ EP_DISC_CLOSE = 17,
+ EP_DISC_ABORT = 18,
+ CONN_RPL_UPCALL = 19,
+ ACT_RETRY_NOMEM = 20,
+ ACT_RETRY_INUSE = 21
+};
+
+struct c4iw_ep_common {
+ TAILQ_ENTRY(c4iw_ep_common) entry; /* Work queue attachment */
+ struct iw_cm_id *cm_id;
+ struct c4iw_qp *qp;
+ struct c4iw_dev *dev;
+ enum c4iw_ep_state state;
+ struct kref kref;
+ struct mutex mutex;
+ struct sockaddr_in local_addr;
+ struct sockaddr_in remote_addr;
+ struct c4iw_wr_wait wr_wait;
+ unsigned long flags;
+ unsigned long history;
+ int rpl_err;
+ int rpl_done;
+ struct thread *thread;
+ struct socket *so;
+};
+
+struct c4iw_listen_ep {
+ struct c4iw_ep_common com;
+ unsigned int stid;
+ int backlog;
+};
+
+struct c4iw_ep {
+ struct c4iw_ep_common com;
+ struct c4iw_ep *parent_ep;
+ struct timer_list timer;
+ struct list_head entry;
+ unsigned int atid;
+ u32 hwtid;
+ u32 snd_seq;
+ u32 rcv_seq;
+ struct l2t_entry *l2t;
+ struct dst_entry *dst;
+ struct c4iw_mpa_attributes mpa_attr;
+ u8 mpa_pkt[sizeof(struct mpa_message) + MPA_MAX_PRIVATE_DATA];
+ unsigned int mpa_pkt_len;
+ u32 ird;
+ u32 ord;
+ u32 smac_idx;
+ u32 tx_chan;
+ u32 mtu;
+ u16 mss;
+ u16 emss;
+ u16 plen;
+ u16 rss_qid;
+ u16 txq_idx;
+ u16 ctrlq_idx;
+ u8 tos;
+ u8 retry_with_mpa_v1;
+ u8 tried_with_mpa_v1;
+};
+
+static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id)
+{
+ return cm_id->provider_data;
+}
+
+static inline struct c4iw_listen_ep *to_listen_ep(struct iw_cm_id *cm_id)
+{
+ return cm_id->provider_data;
+}
+
+static inline int compute_wscale(int win)
+{
+ int wscale = 0;
+
+ while (wscale < 14 && (65535<<wscale) < win)
+ wscale++;
+ return wscale;
+}
+
+u32 c4iw_id_alloc(struct c4iw_id_table *alloc);
+void c4iw_id_free(struct c4iw_id_table *alloc, u32 obj);
+int c4iw_id_table_alloc(struct c4iw_id_table *alloc, u32 start, u32 num,
+ u32 reserved, u32 flags);
+void c4iw_id_table_free(struct c4iw_id_table *alloc);
+
+typedef int (*c4iw_handler_func)(struct c4iw_dev *dev, struct mbuf *m);
+
+int c4iw_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
+ struct l2t_entry *l2t);
+u32 c4iw_get_resource(struct c4iw_id_table *id_table);
+void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry);
+int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid);
+int c4iw_init_ctrl_qp(struct c4iw_rdev *rdev);
+int c4iw_pblpool_create(struct c4iw_rdev *rdev);
+int c4iw_rqtpool_create(struct c4iw_rdev *rdev);
+void c4iw_pblpool_destroy(struct c4iw_rdev *rdev);
+void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev);
+void c4iw_destroy_resource(struct c4iw_resource *rscp);
+int c4iw_destroy_ctrl_qp(struct c4iw_rdev *rdev);
+int c4iw_register_device(struct c4iw_dev *dev);
+void c4iw_unregister_device(struct c4iw_dev *dev);
+int __init c4iw_cm_init(void);
+void __exit c4iw_cm_term(void);
+void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev,
+ struct c4iw_dev_ucontext *uctx);
+void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev,
+ struct c4iw_dev_ucontext *uctx);
+int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr);
+int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr);
+int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
+ struct ib_mw_bind *mw_bind);
+int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
+int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog);
+int c4iw_destroy_listen(struct iw_cm_id *cm_id);
+int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
+int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
+void c4iw_qp_add_ref(struct ib_qp *qp);
+void c4iw_qp_rem_ref(struct ib_qp *qp);
+void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *page_list);
+struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(
+ struct ib_device *device,
+ int page_list_len);
+struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth);
+int c4iw_dealloc_mw(struct ib_mw *mw);
+struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd);
+struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64
+ virt, int acc, struct ib_udata *udata, int mr_id);
+struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc);
+struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd,
+ struct ib_phys_buf *buffer_list,
+ int num_phys_buf,
+ int acc,
+ u64 *iova_start);
+int c4iw_reregister_phys_mem(struct ib_mr *mr,
+ int mr_rereg_mask,
+ struct ib_pd *pd,
+ struct ib_phys_buf *buffer_list,
+ int num_phys_buf,
+ int acc, u64 *iova_start);
+int c4iw_dereg_mr(struct ib_mr *ib_mr);
+int c4iw_destroy_cq(struct ib_cq *ib_cq);
+struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,
+ int vector,
+ struct ib_ucontext *ib_context,
+ struct ib_udata *udata);
+int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata);
+int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
+int c4iw_destroy_qp(struct ib_qp *ib_qp);
+struct ib_qp *c4iw_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *attrs,
+ struct ib_udata *udata);
+int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata);
+int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_qp_init_attr *init_attr);
+struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn);
+u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size);
+void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size);
+u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size);
+void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size);
+int c4iw_ofld_send(struct c4iw_rdev *rdev, struct mbuf *m);
+void c4iw_flush_hw_cq(struct t4_cq *cq);
+void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count);
+void c4iw_count_scqes(struct t4_cq *cq, struct t4_wq *wq, int *count);
+int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp);
+int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count);
+int c4iw_flush_sq(struct t4_wq *wq, struct t4_cq *cq, int count);
+int c4iw_ev_handler(struct sge_iq *, const struct rsp_ctrl *);
+u16 c4iw_rqes_posted(struct c4iw_qp *qhp);
+int c4iw_post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe);
+u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx);
+void c4iw_put_cqid(struct c4iw_rdev *rdev, u32 qid,
+ struct c4iw_dev_ucontext *uctx);
+u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx);
+void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qid,
+ struct c4iw_dev_ucontext *uctx);
+void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe);
+
+extern struct cxgb4_client t4c_client;
+extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS];
+extern int c4iw_max_read_depth;
+
+#include <sys/blist.h>
+struct gen_pool {
+ blist_t gen_list;
+ daddr_t gen_base;
+ int gen_chunk_shift;
+ struct mutex gen_lock;
+};
+
+static __inline struct gen_pool *
+gen_pool_create(daddr_t base, u_int chunk_shift, u_int len)
+{
+ struct gen_pool *gp;
+
+ gp = malloc(sizeof(struct gen_pool), M_DEVBUF, M_NOWAIT);
+ if (gp == NULL)
+ return (NULL);
+
+ memset(gp, 0, sizeof(struct gen_pool));
+ gp->gen_list = blist_create(len >> chunk_shift, M_NOWAIT);
+ if (gp->gen_list == NULL) {
+ free(gp, M_DEVBUF);
+ return (NULL);
+ }
+ blist_free(gp->gen_list, 0, len >> chunk_shift);
+ gp->gen_base = base;
+ gp->gen_chunk_shift = chunk_shift;
+ //mutex_init(&gp->gen_lock, "genpool", NULL, MTX_DUPOK|MTX_DEF);
+ mutex_init(&gp->gen_lock);
+
+ return (gp);
+}
+
+static __inline unsigned long
+gen_pool_alloc(struct gen_pool *gp, int size)
+{
+ int chunks;
+ daddr_t blkno;
+
+ chunks = (size + (1<<gp->gen_chunk_shift) - 1) >> gp->gen_chunk_shift;
+ mutex_lock(&gp->gen_lock);
+ blkno = blist_alloc(gp->gen_list, chunks);
+ mutex_unlock(&gp->gen_lock);
+
+ if (blkno == SWAPBLK_NONE)
+ return (0);
+
+ return (gp->gen_base + ((1 << gp->gen_chunk_shift) * blkno));
+}
+
+static __inline void
+gen_pool_free(struct gen_pool *gp, daddr_t address, int size)
+{
+ int chunks;
+ daddr_t blkno;
+
+ chunks = (size + (1<<gp->gen_chunk_shift) - 1) >> gp->gen_chunk_shift;
+ blkno = (address - gp->gen_base) / (1 << gp->gen_chunk_shift);
+ mutex_lock(&gp->gen_lock);
+ blist_free(gp->gen_list, blkno, chunks);
+ mutex_unlock(&gp->gen_lock);
+}
+
+static __inline void
+gen_pool_destroy(struct gen_pool *gp)
+{
+ blist_destroy(gp->gen_list);
+ free(gp, M_DEVBUF);
+}
+
+#if defined(__i386__) || defined(__amd64__)
+#define L1_CACHE_BYTES 128
+#else
+#define L1_CACHE_BYTES 32
+#endif
+
+static inline
+int idr_for_each(struct idr *idp,
+ int (*fn)(int id, void *p, void *data), void *data)
+{
+ int n, id, max, error = 0;
+ struct idr_layer *p;
+ struct idr_layer *pa[MAX_LEVEL];
+ struct idr_layer **paa = &pa[0];
+
+ n = idp->layers * IDR_BITS;
+ p = idp->top;
+ max = 1 << n;
+
+ id = 0;
+ while (id < max) {
+ while (n > 0 && p) {
+ n -= IDR_BITS;
+ *paa++ = p;
+ p = p->ary[(id >> n) & IDR_MASK];
+ }
+
+ if (p) {
+ error = fn(id, (void *)p, data);
+ if (error)
+ break;
+ }
+
+ id += 1 << n;
+ while (n < fls(id)) {
+ n += IDR_BITS;
+ p = *--paa;
+ }
+ }
+
+ return error;
+}
+
+void c4iw_cm_init_cpl(struct adapter *);
+void c4iw_cm_term_cpl(struct adapter *);
+
+void your_reg_device(struct c4iw_dev *dev);
+
+#define SGE_CTRLQ_NUM 0
+
+extern int spg_creds;/* Status Page size in credit units(1 unit = 64) */
+#endif
diff --git a/sys/dev/cxgbe/iw_cxgbe/mem.c b/sys/dev/cxgbe/iw_cxgbe/mem.c
new file mode 100644
index 0000000..50c5ed0
--- /dev/null
+++ b/sys/dev/cxgbe/iw_cxgbe/mem.c
@@ -0,0 +1,828 @@
+/*
+ * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+
+#ifdef TCP_OFFLOAD
+#include <linux/types.h>
+#include <linux/kref.h>
+#include <rdma/ib_umem.h>
+#include <asm/atomic.h>
+
+#include <common/t4_msg.h>
+#include "iw_cxgbe.h"
+
+#define T4_ULPTX_MIN_IO 32
+#define C4IW_MAX_INLINE_SIZE 96
+
+static int
+write_adapter_mem(struct c4iw_rdev *rdev, u32 addr, u32 len, void *data)
+{
+ struct adapter *sc = rdev->adap;
+ struct ulp_mem_io *ulpmc;
+ struct ulptx_idata *ulpsc;
+ u8 wr_len, *to_dp, *from_dp;
+ int copy_len, num_wqe, i, ret = 0;
+ struct c4iw_wr_wait wr_wait;
+ struct wrqe *wr;
+ u32 cmd;
+
+ cmd = cpu_to_be32(V_ULPTX_CMD(ULP_TX_MEM_WRITE));
+ if (is_t4(sc))
+ cmd |= cpu_to_be32(F_ULP_MEMIO_ORDER);
+ else
+ cmd |= cpu_to_be32(F_T5_ULP_MEMIO_IMM);
+
+ addr &= 0x7FFFFFF;
+ CTR3(KTR_IW_CXGBE, "%s addr 0x%x len %u", __func__, addr, len);
+ num_wqe = DIV_ROUND_UP(len, C4IW_MAX_INLINE_SIZE);
+ c4iw_init_wr_wait(&wr_wait);
+ for (i = 0; i < num_wqe; i++) {
+
+ copy_len = min(len, C4IW_MAX_INLINE_SIZE);
+ wr_len = roundup(sizeof *ulpmc + sizeof *ulpsc +
+ roundup(copy_len, T4_ULPTX_MIN_IO), 16);
+
+ wr = alloc_wrqe(wr_len, &sc->sge.mgmtq);
+ if (wr == NULL)
+ return (0);
+ ulpmc = wrtod(wr);
+
+ memset(ulpmc, 0, wr_len);
+ INIT_ULPTX_WR(ulpmc, wr_len, 0, 0);
+
+ if (i == (num_wqe-1)) {
+ ulpmc->wr.wr_hi = cpu_to_be32(V_FW_WR_OP(FW_ULPTX_WR) |
+ F_FW_WR_COMPL);
+ ulpmc->wr.wr_lo = (__force __be64)(unsigned long) &wr_wait;
+ } else
+ ulpmc->wr.wr_hi = cpu_to_be32(V_FW_WR_OP(FW_ULPTX_WR));
+ ulpmc->wr.wr_mid = cpu_to_be32(
+ V_FW_WR_LEN16(DIV_ROUND_UP(wr_len, 16)));
+
+ ulpmc->cmd = cmd;
+ ulpmc->dlen = cpu_to_be32(V_ULP_MEMIO_DATA_LEN(
+ DIV_ROUND_UP(copy_len, T4_ULPTX_MIN_IO)));
+ ulpmc->len16 = cpu_to_be32(DIV_ROUND_UP(wr_len-sizeof(ulpmc->wr),
+ 16));
+ ulpmc->lock_addr = cpu_to_be32(V_ULP_MEMIO_ADDR(addr + i * 3));
+
+ ulpsc = (struct ulptx_idata *)(ulpmc + 1);
+ ulpsc->cmd_more = cpu_to_be32(V_ULPTX_CMD(ULP_TX_SC_IMM));
+ ulpsc->len = cpu_to_be32(roundup(copy_len, T4_ULPTX_MIN_IO));
+
+ to_dp = (u8 *)(ulpsc + 1);
+ from_dp = (u8 *)data + i * C4IW_MAX_INLINE_SIZE;
+ if (data)
+ memcpy(to_dp, from_dp, copy_len);
+ else
+ memset(to_dp, 0, copy_len);
+ if (copy_len % T4_ULPTX_MIN_IO)
+ memset(to_dp + copy_len, 0, T4_ULPTX_MIN_IO -
+ (copy_len % T4_ULPTX_MIN_IO));
+ t4_wrq_tx(sc, wr);
+ len -= C4IW_MAX_INLINE_SIZE;
+ }
+
+ ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, 0, __func__);
+ return ret;
+}
+
+/*
+ * Build and write a TPT entry.
+ * IN: stag key, pdid, perm, bind_enabled, zbva, to, len, page_size,
+ * pbl_size and pbl_addr
+ * OUT: stag index
+ */
+static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
+ u32 *stag, u8 stag_state, u32 pdid,
+ enum fw_ri_stag_type type, enum fw_ri_mem_perms perm,
+ int bind_enabled, u32 zbva, u64 to,
+ u64 len, u8 page_size, u32 pbl_size, u32 pbl_addr)
+{
+ int err;
+ struct fw_ri_tpte tpt;
+ u32 stag_idx;
+ static atomic_t key;
+
+ if (c4iw_fatal_error(rdev))
+ return -EIO;
+
+ stag_state = stag_state > 0;
+ stag_idx = (*stag) >> 8;
+
+ if ((!reset_tpt_entry) && (*stag == T4_STAG_UNSET)) {
+ stag_idx = c4iw_get_resource(&rdev->resource.tpt_table);
+ if (!stag_idx)
+ return -ENOMEM;
+ mutex_lock(&rdev->stats.lock);
+ rdev->stats.stag.cur += 32;
+ if (rdev->stats.stag.cur > rdev->stats.stag.max)
+ rdev->stats.stag.max = rdev->stats.stag.cur;
+ mutex_unlock(&rdev->stats.lock);
+ *stag = (stag_idx << 8) | (atomic_inc_return(&key) & 0xff);
+ }
+ CTR5(KTR_IW_CXGBE,
+ "%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x",
+ __func__, stag_state, type, pdid, stag_idx);
+
+ /* write TPT entry */
+ if (reset_tpt_entry)
+ memset(&tpt, 0, sizeof(tpt));
+ else {
+ tpt.valid_to_pdid = cpu_to_be32(F_FW_RI_TPTE_VALID |
+ V_FW_RI_TPTE_STAGKEY((*stag & M_FW_RI_TPTE_STAGKEY)) |
+ V_FW_RI_TPTE_STAGSTATE(stag_state) |
+ V_FW_RI_TPTE_STAGTYPE(type) | V_FW_RI_TPTE_PDID(pdid));
+ tpt.locread_to_qpid = cpu_to_be32(V_FW_RI_TPTE_PERM(perm) |
+ (bind_enabled ? F_FW_RI_TPTE_MWBINDEN : 0) |
+ V_FW_RI_TPTE_ADDRTYPE((zbva ? FW_RI_ZERO_BASED_TO :
+ FW_RI_VA_BASED_TO))|
+ V_FW_RI_TPTE_PS(page_size));
+ tpt.nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32(
+ V_FW_RI_TPTE_PBLADDR(PBL_OFF(rdev, pbl_addr)>>3));
+ tpt.len_lo = cpu_to_be32((u32)(len & 0xffffffffUL));
+ tpt.va_hi = cpu_to_be32((u32)(to >> 32));
+ tpt.va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL));
+ tpt.dca_mwbcnt_pstag = cpu_to_be32(0);
+ tpt.len_hi = cpu_to_be32((u32)(len >> 32));
+ }
+ err = write_adapter_mem(rdev, stag_idx +
+ (rdev->adap->vres.stag.start >> 5),
+ sizeof(tpt), &tpt);
+
+ if (reset_tpt_entry) {
+ c4iw_put_resource(&rdev->resource.tpt_table, stag_idx);
+ mutex_lock(&rdev->stats.lock);
+ rdev->stats.stag.cur -= 32;
+ mutex_unlock(&rdev->stats.lock);
+ }
+ return err;
+}
+
+static int write_pbl(struct c4iw_rdev *rdev, __be64 *pbl,
+ u32 pbl_addr, u32 pbl_size)
+{
+ int err;
+
+ CTR4(KTR_IW_CXGBE, "%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d",
+ __func__, pbl_addr, rdev->adap->vres.pbl.start, pbl_size);
+
+ err = write_adapter_mem(rdev, pbl_addr >> 5, pbl_size << 3, pbl);
+ return err;
+}
+
+static int dereg_mem(struct c4iw_rdev *rdev, u32 stag, u32 pbl_size,
+ u32 pbl_addr)
+{
+ return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0,
+ pbl_size, pbl_addr);
+}
+
+static int allocate_window(struct c4iw_rdev *rdev, u32 * stag, u32 pdid)
+{
+ *stag = T4_STAG_UNSET;
+ return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_MW, 0, 0, 0,
+ 0UL, 0, 0, 0, 0);
+}
+
+static int deallocate_window(struct c4iw_rdev *rdev, u32 stag)
+{
+ return write_tpt_entry(rdev, 1, &stag, 0, 0, 0, 0, 0, 0, 0UL, 0, 0, 0,
+ 0);
+}
+
+static int allocate_stag(struct c4iw_rdev *rdev, u32 *stag, u32 pdid,
+ u32 pbl_size, u32 pbl_addr)
+{
+ *stag = T4_STAG_UNSET;
+ return write_tpt_entry(rdev, 0, stag, 0, pdid, FW_RI_STAG_NSMR, 0, 0, 0,
+ 0UL, 0, 0, pbl_size, pbl_addr);
+}
+
+static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag)
+{
+ u32 mmid;
+
+ mhp->attr.state = 1;
+ mhp->attr.stag = stag;
+ mmid = stag >> 8;
+ mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
+ CTR3(KTR_IW_CXGBE, "%s mmid 0x%x mhp %p", __func__, mmid, mhp);
+ return insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid);
+}
+
+static int register_mem(struct c4iw_dev *rhp, struct c4iw_pd *php,
+ struct c4iw_mr *mhp, int shift)
+{
+ u32 stag = T4_STAG_UNSET;
+ int ret;
+
+ ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, mhp->attr.pdid,
+ FW_RI_STAG_NSMR, mhp->attr.perms,
+ mhp->attr.mw_bind_enable, mhp->attr.zbva,
+ mhp->attr.va_fbo, mhp->attr.len, shift - 12,
+ mhp->attr.pbl_size, mhp->attr.pbl_addr);
+ if (ret)
+ return ret;
+
+ ret = finish_mem_reg(mhp, stag);
+ if (ret)
+ dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
+ mhp->attr.pbl_addr);
+ return ret;
+}
+
+static int reregister_mem(struct c4iw_dev *rhp, struct c4iw_pd *php,
+ struct c4iw_mr *mhp, int shift, int npages)
+{
+ u32 stag;
+ int ret;
+
+ if (npages > mhp->attr.pbl_size)
+ return -ENOMEM;
+
+ stag = mhp->attr.stag;
+ ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, mhp->attr.pdid,
+ FW_RI_STAG_NSMR, mhp->attr.perms,
+ mhp->attr.mw_bind_enable, mhp->attr.zbva,
+ mhp->attr.va_fbo, mhp->attr.len, shift - 12,
+ mhp->attr.pbl_size, mhp->attr.pbl_addr);
+ if (ret)
+ return ret;
+
+ ret = finish_mem_reg(mhp, stag);
+ if (ret)
+ dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
+ mhp->attr.pbl_addr);
+
+ return ret;
+}
+
+static int alloc_pbl(struct c4iw_mr *mhp, int npages)
+{
+ mhp->attr.pbl_addr = c4iw_pblpool_alloc(&mhp->rhp->rdev,
+ npages << 3);
+
+ if (!mhp->attr.pbl_addr)
+ return -ENOMEM;
+
+ mhp->attr.pbl_size = npages;
+
+ return 0;
+}
+
+static int build_phys_page_list(struct ib_phys_buf *buffer_list,
+ int num_phys_buf, u64 *iova_start,
+ u64 *total_size, int *npages,
+ int *shift, __be64 **page_list)
+{
+ u64 mask;
+ int i, j, n;
+
+ mask = 0;
+ *total_size = 0;
+ for (i = 0; i < num_phys_buf; ++i) {
+ if (i != 0 && buffer_list[i].addr & ~PAGE_MASK)
+ return -EINVAL;
+ if (i != 0 && i != num_phys_buf - 1 &&
+ (buffer_list[i].size & ~PAGE_MASK))
+ return -EINVAL;
+ *total_size += buffer_list[i].size;
+ if (i > 0)
+ mask |= buffer_list[i].addr;
+ else
+ mask |= buffer_list[i].addr & PAGE_MASK;
+ if (i != num_phys_buf - 1)
+ mask |= buffer_list[i].addr + buffer_list[i].size;
+ else
+ mask |= (buffer_list[i].addr + buffer_list[i].size +
+ PAGE_SIZE - 1) & PAGE_MASK;
+ }
+
+ if (*total_size > 0xFFFFFFFFULL)
+ return -ENOMEM;
+
+ /* Find largest page shift we can use to cover buffers */
+ for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift))
+ if ((1ULL << *shift) & mask)
+ break;
+
+ buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1);
+ buffer_list[0].addr &= ~0ull << *shift;
+
+ *npages = 0;
+ for (i = 0; i < num_phys_buf; ++i)
+ *npages += (buffer_list[i].size +
+ (1ULL << *shift) - 1) >> *shift;
+
+ if (!*npages)
+ return -EINVAL;
+
+ *page_list = kmalloc(sizeof(u64) * *npages, GFP_KERNEL);
+ if (!*page_list)
+ return -ENOMEM;
+
+ n = 0;
+ for (i = 0; i < num_phys_buf; ++i)
+ for (j = 0;
+ j < (buffer_list[i].size + (1ULL << *shift) - 1) >> *shift;
+ ++j)
+ (*page_list)[n++] = cpu_to_be64(buffer_list[i].addr +
+ ((u64) j << *shift));
+
+ CTR6(KTR_IW_CXGBE,
+ "%s va 0x%llx mask 0x%llx shift %d len %lld pbl_size %d", __func__,
+ (unsigned long long)*iova_start, (unsigned long long)mask, *shift,
+ (unsigned long long)*total_size, *npages);
+
+ return 0;
+
+}
+
+int c4iw_reregister_phys_mem(struct ib_mr *mr, int mr_rereg_mask,
+ struct ib_pd *pd, struct ib_phys_buf *buffer_list,
+ int num_phys_buf, int acc, u64 *iova_start)
+{
+
+ struct c4iw_mr mh, *mhp;
+ struct c4iw_pd *php;
+ struct c4iw_dev *rhp;
+ __be64 *page_list = NULL;
+ int shift = 0;
+ u64 total_size;
+ int npages = 0;
+ int ret;
+
+ CTR3(KTR_IW_CXGBE, "%s ib_mr %p ib_pd %p", __func__, mr, pd);
+
+ /* There can be no memory windows */
+ if (atomic_read(&mr->usecnt))
+ return -EINVAL;
+
+ mhp = to_c4iw_mr(mr);
+ rhp = mhp->rhp;
+ php = to_c4iw_pd(mr->pd);
+
+ /* make sure we are on the same adapter */
+ if (rhp != php->rhp)
+ return -EINVAL;
+
+ memcpy(&mh, mhp, sizeof *mhp);
+
+ if (mr_rereg_mask & IB_MR_REREG_PD)
+ php = to_c4iw_pd(pd);
+ if (mr_rereg_mask & IB_MR_REREG_ACCESS) {
+ mh.attr.perms = c4iw_ib_to_tpt_access(acc);
+ mh.attr.mw_bind_enable = (acc & IB_ACCESS_MW_BIND) ==
+ IB_ACCESS_MW_BIND;
+ }
+ if (mr_rereg_mask & IB_MR_REREG_TRANS) {
+ ret = build_phys_page_list(buffer_list, num_phys_buf,
+ iova_start,
+ &total_size, &npages,
+ &shift, &page_list);
+ if (ret)
+ return ret;
+ }
+
+ ret = reregister_mem(rhp, php, &mh, shift, npages);
+ kfree(page_list);
+ if (ret)
+ return ret;
+ if (mr_rereg_mask & IB_MR_REREG_PD)
+ mhp->attr.pdid = php->pdid;
+ if (mr_rereg_mask & IB_MR_REREG_ACCESS)
+ mhp->attr.perms = c4iw_ib_to_tpt_access(acc);
+ if (mr_rereg_mask & IB_MR_REREG_TRANS) {
+ mhp->attr.zbva = 0;
+ mhp->attr.va_fbo = *iova_start;
+ mhp->attr.page_size = shift - 12;
+ mhp->attr.len = (u32) total_size;
+ mhp->attr.pbl_size = npages;
+ }
+
+ return 0;
+}
+
+struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd,
+ struct ib_phys_buf *buffer_list,
+ int num_phys_buf, int acc, u64 *iova_start)
+{
+ __be64 *page_list;
+ int shift;
+ u64 total_size;
+ int npages;
+ struct c4iw_dev *rhp;
+ struct c4iw_pd *php;
+ struct c4iw_mr *mhp;
+ int ret;
+
+ CTR2(KTR_IW_CXGBE, "%s ib_pd %p", __func__, pd);
+ php = to_c4iw_pd(pd);
+ rhp = php->rhp;
+
+ mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
+ if (!mhp)
+ return ERR_PTR(-ENOMEM);
+
+ mhp->rhp = rhp;
+
+ /* First check that we have enough alignment */
+ if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ if (num_phys_buf > 1 &&
+ ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start,
+ &total_size, &npages, &shift,
+ &page_list);
+ if (ret)
+ goto err;
+
+ ret = alloc_pbl(mhp, npages);
+ if (ret) {
+ kfree(page_list);
+ goto err_pbl;
+ }
+
+ ret = write_pbl(&mhp->rhp->rdev, page_list, mhp->attr.pbl_addr,
+ npages);
+ kfree(page_list);
+ if (ret)
+ goto err_pbl;
+
+ mhp->attr.pdid = php->pdid;
+ mhp->attr.zbva = 0;
+
+ mhp->attr.perms = c4iw_ib_to_tpt_access(acc);
+ mhp->attr.va_fbo = *iova_start;
+ mhp->attr.page_size = shift - 12;
+
+ mhp->attr.len = (u32) total_size;
+ mhp->attr.pbl_size = npages;
+ ret = register_mem(rhp, php, mhp, shift);
+ if (ret)
+ goto err_pbl;
+
+ return &mhp->ibmr;
+
+err_pbl:
+ c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
+ mhp->attr.pbl_size << 3);
+
+err:
+ kfree(mhp);
+ return ERR_PTR(ret);
+
+}
+
+struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc)
+{
+ struct c4iw_dev *rhp;
+ struct c4iw_pd *php;
+ struct c4iw_mr *mhp;
+ int ret;
+ u32 stag = T4_STAG_UNSET;
+
+ CTR2(KTR_IW_CXGBE, "%s ib_pd %p", __func__, pd);
+ php = to_c4iw_pd(pd);
+ rhp = php->rhp;
+
+ mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
+ if (!mhp)
+ return ERR_PTR(-ENOMEM);
+
+ mhp->rhp = rhp;
+ mhp->attr.pdid = php->pdid;
+ mhp->attr.perms = c4iw_ib_to_tpt_access(acc);
+ mhp->attr.mw_bind_enable = (acc&IB_ACCESS_MW_BIND) == IB_ACCESS_MW_BIND;
+ mhp->attr.zbva = 0;
+ mhp->attr.va_fbo = 0;
+ mhp->attr.page_size = 0;
+ mhp->attr.len = ~0UL;
+ mhp->attr.pbl_size = 0;
+
+ ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, php->pdid,
+ FW_RI_STAG_NSMR, mhp->attr.perms,
+ mhp->attr.mw_bind_enable, 0, 0, ~0UL, 0, 0, 0);
+ if (ret)
+ goto err1;
+
+ ret = finish_mem_reg(mhp, stag);
+ if (ret)
+ goto err2;
+ return &mhp->ibmr;
+err2:
+ dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
+ mhp->attr.pbl_addr);
+err1:
+ kfree(mhp);
+ return ERR_PTR(ret);
+}
+
+struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt, int acc, struct ib_udata *udata, int mr_id)
+{
+ __be64 *pages;
+ int shift, n, len;
+ int i, j, k;
+ int err = 0;
+ struct ib_umem_chunk *chunk;
+ struct c4iw_dev *rhp;
+ struct c4iw_pd *php;
+ struct c4iw_mr *mhp;
+
+ CTR2(KTR_IW_CXGBE, "%s ib_pd %p", __func__, pd);
+
+ if (length == ~0ULL)
+ return ERR_PTR(-EINVAL);
+
+ if ((length + start) < start)
+ return ERR_PTR(-EINVAL);
+
+ php = to_c4iw_pd(pd);
+ rhp = php->rhp;
+ mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
+ if (!mhp)
+ return ERR_PTR(-ENOMEM);
+
+ mhp->rhp = rhp;
+
+ mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
+ if (IS_ERR(mhp->umem)) {
+ err = PTR_ERR(mhp->umem);
+ kfree(mhp);
+ return ERR_PTR(err);
+ }
+
+ shift = ffs(mhp->umem->page_size) - 1;
+
+ n = 0;
+ list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
+ n += chunk->nents;
+
+ err = alloc_pbl(mhp, n);
+ if (err)
+ goto err;
+
+ pages = (__be64 *) __get_free_page(GFP_KERNEL);
+ if (!pages) {
+ err = -ENOMEM;
+ goto err_pbl;
+ }
+
+ i = n = 0;
+
+ list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
+ for (j = 0; j < chunk->nmap; ++j) {
+ len = sg_dma_len(&chunk->page_list[j]) >> shift;
+ for (k = 0; k < len; ++k) {
+ pages[i++] = cpu_to_be64(sg_dma_address(
+ &chunk->page_list[j]) +
+ mhp->umem->page_size * k);
+ if (i == PAGE_SIZE / sizeof *pages) {
+ err = write_pbl(&mhp->rhp->rdev,
+ pages,
+ mhp->attr.pbl_addr + (n << 3), i);
+ if (err)
+ goto pbl_done;
+ n += i;
+ i = 0;
+ }
+ }
+ }
+
+ if (i)
+ err = write_pbl(&mhp->rhp->rdev, pages,
+ mhp->attr.pbl_addr + (n << 3), i);
+
+pbl_done:
+ free_page((unsigned long) pages);
+ if (err)
+ goto err_pbl;
+
+ mhp->attr.pdid = php->pdid;
+ mhp->attr.zbva = 0;
+ mhp->attr.perms = c4iw_ib_to_tpt_access(acc);
+ mhp->attr.va_fbo = virt;
+ mhp->attr.page_size = shift - 12;
+ mhp->attr.len = length;
+
+ err = register_mem(rhp, php, mhp, shift);
+ if (err)
+ goto err_pbl;
+
+ return &mhp->ibmr;
+
+err_pbl:
+ c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
+ mhp->attr.pbl_size << 3);
+
+err:
+ ib_umem_release(mhp->umem);
+ kfree(mhp);
+ return ERR_PTR(err);
+}
+
+struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd)
+{
+ struct c4iw_dev *rhp;
+ struct c4iw_pd *php;
+ struct c4iw_mw *mhp;
+ u32 mmid;
+ u32 stag = 0;
+ int ret;
+
+ php = to_c4iw_pd(pd);
+ rhp = php->rhp;
+ mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
+ if (!mhp)
+ return ERR_PTR(-ENOMEM);
+ ret = allocate_window(&rhp->rdev, &stag, php->pdid);
+ if (ret) {
+ kfree(mhp);
+ return ERR_PTR(ret);
+ }
+ mhp->rhp = rhp;
+ mhp->attr.pdid = php->pdid;
+ mhp->attr.type = FW_RI_STAG_MW;
+ mhp->attr.stag = stag;
+ mmid = (stag) >> 8;
+ mhp->ibmw.rkey = stag;
+ if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) {
+ deallocate_window(&rhp->rdev, mhp->attr.stag);
+ kfree(mhp);
+ return ERR_PTR(-ENOMEM);
+ }
+ CTR4(KTR_IW_CXGBE, "%s mmid 0x%x mhp %p stag 0x%x", __func__, mmid, mhp,
+ stag);
+ return &(mhp->ibmw);
+}
+
+int c4iw_dealloc_mw(struct ib_mw *mw)
+{
+ struct c4iw_dev *rhp;
+ struct c4iw_mw *mhp;
+ u32 mmid;
+
+ mhp = to_c4iw_mw(mw);
+ rhp = mhp->rhp;
+ mmid = (mw->rkey) >> 8;
+ remove_handle(rhp, &rhp->mmidr, mmid);
+ deallocate_window(&rhp->rdev, mhp->attr.stag);
+ kfree(mhp);
+ CTR4(KTR_IW_CXGBE, "%s ib_mw %p mmid 0x%x ptr %p", __func__, mw, mmid,
+ mhp);
+ return 0;
+}
+
+struct ib_mr *c4iw_alloc_fast_reg_mr(struct ib_pd *pd, int pbl_depth)
+{
+ struct c4iw_dev *rhp;
+ struct c4iw_pd *php;
+ struct c4iw_mr *mhp;
+ u32 mmid;
+ u32 stag = 0;
+ int ret = 0;
+
+ php = to_c4iw_pd(pd);
+ rhp = php->rhp;
+ mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
+ if (!mhp) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ mhp->rhp = rhp;
+ ret = alloc_pbl(mhp, pbl_depth);
+ if (ret)
+ goto err1;
+ mhp->attr.pbl_size = pbl_depth;
+ ret = allocate_stag(&rhp->rdev, &stag, php->pdid,
+ mhp->attr.pbl_size, mhp->attr.pbl_addr);
+ if (ret)
+ goto err2;
+ mhp->attr.pdid = php->pdid;
+ mhp->attr.type = FW_RI_STAG_NSMR;
+ mhp->attr.stag = stag;
+ mhp->attr.state = 1;
+ mmid = (stag) >> 8;
+ mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
+ if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) {
+ ret = -ENOMEM;
+ goto err3;
+ }
+
+ CTR4(KTR_IW_CXGBE, "%s mmid 0x%x mhp %p stag 0x%x", __func__, mmid, mhp,
+ stag);
+ return &(mhp->ibmr);
+err3:
+ dereg_mem(&rhp->rdev, stag, mhp->attr.pbl_size,
+ mhp->attr.pbl_addr);
+err2:
+ c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
+ mhp->attr.pbl_size << 3);
+err1:
+ kfree(mhp);
+err:
+ return ERR_PTR(ret);
+}
+
+struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(struct ib_device *device,
+ int page_list_len)
+{
+ struct c4iw_fr_page_list *c4pl;
+ struct c4iw_dev *dev = to_c4iw_dev(device);
+ bus_addr_t dma_addr;
+ int size = sizeof *c4pl + page_list_len * sizeof(u64);
+
+ c4pl = contigmalloc(size,
+ M_DEVBUF, M_NOWAIT, 0ul, ~0ul, 4096, 0);
+ if (c4pl)
+ dma_addr = vtophys(c4pl);
+ else
+ return ERR_PTR(-ENOMEM);;
+
+ pci_unmap_addr_set(c4pl, mapping, dma_addr);
+ c4pl->dma_addr = dma_addr;
+ c4pl->dev = dev;
+ c4pl->size = size;
+ c4pl->ibpl.page_list = (u64 *)(c4pl + 1);
+ c4pl->ibpl.max_page_list_len = page_list_len;
+
+ return &c4pl->ibpl;
+}
+
+void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *ibpl)
+{
+ struct c4iw_fr_page_list *c4pl = to_c4iw_fr_page_list(ibpl);
+ contigfree(c4pl, c4pl->size, M_DEVBUF);
+}
+
+int c4iw_dereg_mr(struct ib_mr *ib_mr)
+{
+ struct c4iw_dev *rhp;
+ struct c4iw_mr *mhp;
+ u32 mmid;
+
+ CTR2(KTR_IW_CXGBE, "%s ib_mr %p", __func__, ib_mr);
+ /* There can be no memory windows */
+ if (atomic_read(&ib_mr->usecnt))
+ return -EINVAL;
+
+ mhp = to_c4iw_mr(ib_mr);
+ rhp = mhp->rhp;
+ mmid = mhp->attr.stag >> 8;
+ remove_handle(rhp, &rhp->mmidr, mmid);
+ dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
+ mhp->attr.pbl_addr);
+ if (mhp->attr.pbl_size)
+ c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
+ mhp->attr.pbl_size << 3);
+ if (mhp->kva)
+ kfree((void *) (unsigned long) mhp->kva);
+ if (mhp->umem)
+ ib_umem_release(mhp->umem);
+ CTR3(KTR_IW_CXGBE, "%s mmid 0x%x ptr %p", __func__, mmid, mhp);
+ kfree(mhp);
+ return 0;
+}
+#endif
diff --git a/sys/dev/cxgbe/iw_cxgbe/provider.c b/sys/dev/cxgbe/iw_cxgbe/provider.c
new file mode 100644
index 0000000..2a25ad8
--- /dev/null
+++ b/sys/dev/cxgbe/iw_cxgbe/provider.c
@@ -0,0 +1,498 @@
+/*
+ * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+
+#ifdef TCP_OFFLOAD
+#include <asm/pgtable.h>
+#include <linux/page.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+
+#include "iw_cxgbe.h"
+#include "user.h"
+
+static int fastreg_support = 1;
+module_param(fastreg_support, int, 0644);
+MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default = 1)");
+
+static int c4iw_modify_port(struct ib_device *ibdev,
+ u8 port, int port_modify_mask,
+ struct ib_port_modify *props)
+{
+ return -ENOSYS;
+}
+
+static struct ib_ah *c4iw_ah_create(struct ib_pd *pd,
+ struct ib_ah_attr *ah_attr)
+{
+ return ERR_PTR(-ENOSYS);
+}
+
+static int c4iw_ah_destroy(struct ib_ah *ah)
+{
+ return -ENOSYS;
+}
+
+static int c4iw_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ return -ENOSYS;
+}
+
+static int c4iw_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ return -ENOSYS;
+}
+
+static int c4iw_process_mad(struct ib_device *ibdev, int mad_flags,
+ u8 port_num, struct ib_wc *in_wc,
+ struct ib_grh *in_grh, struct ib_mad *in_mad,
+ struct ib_mad *out_mad)
+{
+ return -ENOSYS;
+}
+
+static int c4iw_dealloc_ucontext(struct ib_ucontext *context)
+{
+ struct c4iw_dev *rhp = to_c4iw_dev(context->device);
+ struct c4iw_ucontext *ucontext = to_c4iw_ucontext(context);
+ struct c4iw_mm_entry *mm, *tmp;
+
+ CTR2(KTR_IW_CXGBE, "%s context %p", __func__, context);
+ list_for_each_entry_safe(mm, tmp, &ucontext->mmaps, entry)
+ kfree(mm);
+ c4iw_release_dev_ucontext(&rhp->rdev, &ucontext->uctx);
+ kfree(ucontext);
+ return 0;
+}
+
+static struct ib_ucontext *c4iw_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata)
+{
+ struct c4iw_ucontext *context;
+ struct c4iw_dev *rhp = to_c4iw_dev(ibdev);
+
+ CTR2(KTR_IW_CXGBE, "%s ibdev %p", __func__, ibdev);
+ context = kzalloc(sizeof(*context), GFP_KERNEL);
+ if (!context)
+ return ERR_PTR(-ENOMEM);
+ c4iw_init_dev_ucontext(&rhp->rdev, &context->uctx);
+ INIT_LIST_HEAD(&context->mmaps);
+ spin_lock_init(&context->mmap_lock);
+ return &context->ibucontext;
+}
+
+static inline pgprot_t t4_pgprot_wc(pgprot_t prot)
+{
+ return pgprot_writecombine(prot);
+}
+
+static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+ int len = vma->vm_end - vma->vm_start;
+ u32 key = vma->vm_pgoff << PAGE_SHIFT;
+ struct c4iw_rdev *rdev;
+ int ret = 0;
+ struct c4iw_mm_entry *mm;
+ struct c4iw_ucontext *ucontext;
+ u64 addr, paddr;
+
+ u64 va_regs_res = 0, va_udbs_res = 0;
+ u64 len_regs_res = 0, len_udbs_res = 0;
+
+ CTR3(KTR_IW_CXGBE, "%s:1 ctx %p vma %p", __func__, context, vma);
+
+ CTR4(KTR_IW_CXGBE, "%s:1a pgoff 0x%lx key 0x%x len %d", __func__,
+ vma->vm_pgoff, key, len);
+
+ if (vma->vm_start & (PAGE_SIZE-1)) {
+ CTR3(KTR_IW_CXGBE, "%s:2 unaligned vm_start %u vma %p",
+ __func__, vma->vm_start, vma);
+ return -EINVAL;
+ }
+
+ rdev = &(to_c4iw_dev(context->device)->rdev);
+ ucontext = to_c4iw_ucontext(context);
+
+ mm = remove_mmap(ucontext, key, len);
+ if (!mm) {
+ CTR4(KTR_IW_CXGBE, "%s:3 ucontext %p key %u len %u", __func__,
+ ucontext, key, len);
+ return -EINVAL;
+ }
+ addr = mm->addr;
+ kfree(mm);
+
+ va_regs_res = (u64)rman_get_virtual(rdev->adap->regs_res);
+ len_regs_res = (u64)rman_get_size(rdev->adap->regs_res);
+ va_udbs_res = (u64)rman_get_virtual(rdev->adap->udbs_res);
+ len_udbs_res = (u64)rman_get_size(rdev->adap->udbs_res);
+
+ CTR6(KTR_IW_CXGBE,
+ "%s:4 addr %p, masync region %p:%p, udb region %p:%p", __func__,
+ addr, va_regs_res, va_regs_res+len_regs_res, va_udbs_res,
+ va_udbs_res+len_udbs_res);
+
+ if (addr >= va_regs_res && addr < va_regs_res + len_regs_res) {
+ CTR4(KTR_IW_CXGBE, "%s:5 MA_SYNC addr %p region %p, reglen %u",
+ __func__, addr, va_regs_res, len_regs_res);
+ /*
+ * MA_SYNC register...
+ */
+ paddr = vtophys(addr);
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ ret = io_remap_pfn_range(vma, vma->vm_start,
+ paddr >> PAGE_SHIFT,
+ len, vma->vm_page_prot);
+ } else {
+
+ if (addr >= va_udbs_res && addr < va_udbs_res + len_udbs_res) {
+ /*
+ * Map user DB or OCQP memory...
+ */
+ paddr = vtophys(addr);
+ CTR4(KTR_IW_CXGBE,
+ "%s:6 USER DB-GTS addr %p region %p, reglen %u",
+ __func__, addr, va_udbs_res, len_udbs_res);
+#ifdef DOT5
+ if (is_t5(rdev->lldi.adapter_type) && map_udb_as_wc)
+ vma->vm_page_prot = t4_pgprot_wc(vma->vm_page_prot);
+ else
+#endif
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ ret = io_remap_pfn_range(vma, vma->vm_start,
+ paddr >> PAGE_SHIFT,
+ len, vma->vm_page_prot);
+ } else {
+ /*
+ * Map WQ or CQ contig dma memory...
+ */
+ CTR4(KTR_IW_CXGBE,
+ "%s:7 WQ/CQ addr %p vm_start %u vma %p", __func__,
+ addr, vma->vm_start, vma);
+ ret = io_remap_pfn_range(vma, vma->vm_start,
+ addr >> PAGE_SHIFT,
+ len, vma->vm_page_prot);
+ }
+ }
+ CTR4(KTR_IW_CXGBE, "%s:8 ctx %p vma %p ret %u", __func__, context, vma,
+ ret);
+ return ret;
+}
+
+static int
+c4iw_deallocate_pd(struct ib_pd *pd)
+{
+ struct c4iw_pd *php = to_c4iw_pd(pd);
+ struct c4iw_dev *rhp = php->rhp;
+
+ CTR3(KTR_IW_CXGBE, "%s: pd %p, pdid 0x%x", __func__, pd, php->pdid);
+
+ c4iw_put_resource(&rhp->rdev.resource.pdid_table, php->pdid);
+ mutex_lock(&rhp->rdev.stats.lock);
+ rhp->rdev.stats.pd.cur--;
+ mutex_unlock(&rhp->rdev.stats.lock);
+ kfree(php);
+
+ return (0);
+}
+
+static struct ib_pd *
+c4iw_allocate_pd(struct ib_device *ibdev, struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct c4iw_pd *php;
+ u32 pdid;
+ struct c4iw_dev *rhp;
+
+ CTR4(KTR_IW_CXGBE, "%s: ibdev %p, context %p, data %p", __func__, ibdev,
+ context, udata);
+ rhp = (struct c4iw_dev *) ibdev;
+ pdid = c4iw_get_resource(&rhp->rdev.resource.pdid_table);
+ if (!pdid)
+ return ERR_PTR(-EINVAL);
+ php = kzalloc(sizeof(*php), GFP_KERNEL);
+ if (!php) {
+ c4iw_put_resource(&rhp->rdev.resource.pdid_table, pdid);
+ return ERR_PTR(-ENOMEM);
+ }
+ php->pdid = pdid;
+ php->rhp = rhp;
+ if (context) {
+ if (ib_copy_to_udata(udata, &php->pdid, sizeof(u32))) {
+ c4iw_deallocate_pd(&php->ibpd);
+ return ERR_PTR(-EFAULT);
+ }
+ }
+ mutex_lock(&rhp->rdev.stats.lock);
+ rhp->rdev.stats.pd.cur++;
+ if (rhp->rdev.stats.pd.cur > rhp->rdev.stats.pd.max)
+ rhp->rdev.stats.pd.max = rhp->rdev.stats.pd.cur;
+ mutex_unlock(&rhp->rdev.stats.lock);
+
+ CTR6(KTR_IW_CXGBE,
+ "%s: ibdev %p, context %p, data %p, pddid 0x%x, pd %p", __func__,
+ ibdev, context, udata, pdid, php);
+ return (&php->ibpd);
+}
+
+static int
+c4iw_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
+{
+
+ CTR5(KTR_IW_CXGBE, "%s ibdev %p, port %d, index %d, pkey %p", __func__,
+ ibdev, port, index, pkey);
+
+ *pkey = 0;
+ return (0);
+}
+
+static int
+c4iw_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid)
+{
+ struct c4iw_dev *dev;
+ struct port_info *pi;
+ struct adapter *sc;
+
+ CTR5(KTR_IW_CXGBE, "%s ibdev %p, port %d, index %d, gid %p", __func__,
+ ibdev, port, index, gid);
+
+ memset(&gid->raw[0], 0, sizeof(gid->raw));
+ dev = to_c4iw_dev(ibdev);
+ sc = dev->rdev.adap;
+ if (port == 0 || port > sc->params.nports)
+ return (-EINVAL);
+ pi = sc->port[port - 1];
+ memcpy(&gid->raw[0], pi->hw_addr, sizeof(pi->hw_addr));
+ return (0);
+}
+
+static int
+c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
+{
+ struct c4iw_dev *dev = to_c4iw_dev(ibdev);
+ struct adapter *sc = dev->rdev.adap;
+
+ CTR3(KTR_IW_CXGBE, "%s ibdev %p, props %p", __func__, ibdev, props);
+
+ memset(props, 0, sizeof *props);
+ memcpy(&props->sys_image_guid, sc->port[0]->hw_addr, 6);
+ props->hw_ver = sc->params.chipid;
+ props->fw_ver = sc->params.fw_vers;
+ props->device_cap_flags = dev->device_cap_flags;
+ props->page_size_cap = T4_PAGESIZE_MASK;
+ props->vendor_id = pci_get_vendor(sc->dev);
+ props->vendor_part_id = pci_get_device(sc->dev);
+ props->max_mr_size = T4_MAX_MR_SIZE;
+ props->max_qp = T4_MAX_NUM_QP;
+ props->max_qp_wr = T4_MAX_QP_DEPTH;
+ props->max_sge = T4_MAX_RECV_SGE;
+ props->max_sge_rd = 1;
+ props->max_qp_rd_atom = c4iw_max_read_depth;
+ props->max_qp_init_rd_atom = c4iw_max_read_depth;
+ props->max_cq = T4_MAX_NUM_CQ;
+ props->max_cqe = T4_MAX_CQ_DEPTH;
+ props->max_mr = c4iw_num_stags(&dev->rdev);
+ props->max_pd = T4_MAX_NUM_PD;
+ props->local_ca_ack_delay = 0;
+ props->max_fast_reg_page_list_len = T4_MAX_FR_DEPTH;
+
+ return (0);
+}
+
+/*
+ * Returns -errno on failure.
+ */
+static int
+c4iw_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props)
+{
+ struct c4iw_dev *dev;
+ struct adapter *sc;
+ struct port_info *pi;
+ struct ifnet *ifp;
+
+ CTR4(KTR_IW_CXGBE, "%s ibdev %p, port %d, props %p", __func__, ibdev,
+ port, props);
+
+ dev = to_c4iw_dev(ibdev);
+ sc = dev->rdev.adap;
+ if (port > sc->params.nports)
+ return (-EINVAL);
+ pi = sc->port[port - 1];
+ ifp = pi->ifp;
+
+ memset(props, 0, sizeof(struct ib_port_attr));
+ props->max_mtu = IB_MTU_4096;
+ if (ifp->if_mtu >= 4096)
+ props->active_mtu = IB_MTU_4096;
+ else if (ifp->if_mtu >= 2048)
+ props->active_mtu = IB_MTU_2048;
+ else if (ifp->if_mtu >= 1024)
+ props->active_mtu = IB_MTU_1024;
+ else if (ifp->if_mtu >= 512)
+ props->active_mtu = IB_MTU_512;
+ else
+ props->active_mtu = IB_MTU_256;
+ props->state = pi->link_cfg.link_ok ? IB_PORT_ACTIVE : IB_PORT_DOWN;
+ props->port_cap_flags =
+ IB_PORT_CM_SUP |
+ IB_PORT_SNMP_TUNNEL_SUP |
+ IB_PORT_REINIT_SUP |
+ IB_PORT_DEVICE_MGMT_SUP |
+ IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
+ props->gid_tbl_len = 1;
+ props->pkey_tbl_len = 1;
+ props->active_width = 2;
+ props->active_speed = 2;
+ props->max_msg_sz = -1;
+
+ return 0;
+}
+
+/*
+ * Returns -errno on error.
+ */
+int
+c4iw_register_device(struct c4iw_dev *dev)
+{
+ struct adapter *sc = dev->rdev.adap;
+ struct ib_device *ibdev = &dev->ibdev;
+ struct iw_cm_verbs *iwcm;
+ int ret;
+
+ CTR3(KTR_IW_CXGBE, "%s c4iw_dev %p, adapter %p", __func__, dev, sc);
+ BUG_ON(!sc->port[0]);
+ strlcpy(ibdev->name, device_get_nameunit(sc->dev), sizeof(ibdev->name));
+ memset(&ibdev->node_guid, 0, sizeof(ibdev->node_guid));
+ memcpy(&ibdev->node_guid, sc->port[0]->hw_addr, 6);
+ ibdev->owner = THIS_MODULE;
+ dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW;
+ if (fastreg_support)
+ dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
+ ibdev->local_dma_lkey = 0;
+ ibdev->uverbs_cmd_mask =
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
+ (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+ (1ull << IB_USER_VERBS_CMD_POST_SEND) |
+ (1ull << IB_USER_VERBS_CMD_POST_RECV);
+ ibdev->node_type = RDMA_NODE_RNIC;
+ strlcpy(ibdev->node_desc, C4IW_NODE_DESC, sizeof(ibdev->node_desc));
+ ibdev->phys_port_cnt = sc->params.nports;
+ ibdev->num_comp_vectors = 1;
+ ibdev->dma_device = sc->dev;
+ ibdev->query_device = c4iw_query_device;
+ ibdev->query_port = c4iw_query_port;
+ ibdev->modify_port = c4iw_modify_port;
+ ibdev->query_pkey = c4iw_query_pkey;
+ ibdev->query_gid = c4iw_query_gid;
+ ibdev->alloc_ucontext = c4iw_alloc_ucontext;
+ ibdev->dealloc_ucontext = c4iw_dealloc_ucontext;
+ ibdev->mmap = c4iw_mmap;
+ ibdev->alloc_pd = c4iw_allocate_pd;
+ ibdev->dealloc_pd = c4iw_deallocate_pd;
+ ibdev->create_ah = c4iw_ah_create;
+ ibdev->destroy_ah = c4iw_ah_destroy;
+ ibdev->create_qp = c4iw_create_qp;
+ ibdev->modify_qp = c4iw_ib_modify_qp;
+ ibdev->query_qp = c4iw_ib_query_qp;
+ ibdev->destroy_qp = c4iw_destroy_qp;
+ ibdev->create_cq = c4iw_create_cq;
+ ibdev->destroy_cq = c4iw_destroy_cq;
+ ibdev->resize_cq = c4iw_resize_cq;
+ ibdev->poll_cq = c4iw_poll_cq;
+ ibdev->get_dma_mr = c4iw_get_dma_mr;
+ ibdev->reg_phys_mr = c4iw_register_phys_mem;
+ ibdev->rereg_phys_mr = c4iw_reregister_phys_mem;
+ ibdev->reg_user_mr = c4iw_reg_user_mr;
+ ibdev->dereg_mr = c4iw_dereg_mr;
+ ibdev->alloc_mw = c4iw_alloc_mw;
+ ibdev->bind_mw = c4iw_bind_mw;
+ ibdev->dealloc_mw = c4iw_dealloc_mw;
+ ibdev->alloc_fast_reg_mr = c4iw_alloc_fast_reg_mr;
+ ibdev->alloc_fast_reg_page_list = c4iw_alloc_fastreg_pbl;
+ ibdev->free_fast_reg_page_list = c4iw_free_fastreg_pbl;
+ ibdev->attach_mcast = c4iw_multicast_attach;
+ ibdev->detach_mcast = c4iw_multicast_detach;
+ ibdev->process_mad = c4iw_process_mad;
+ ibdev->req_notify_cq = c4iw_arm_cq;
+ ibdev->post_send = c4iw_post_send;
+ ibdev->post_recv = c4iw_post_receive;
+ ibdev->uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION;
+
+ iwcm = kmalloc(sizeof(*iwcm), GFP_KERNEL);
+ if (iwcm == NULL)
+ return (-ENOMEM);
+
+ iwcm->connect = c4iw_connect;
+ iwcm->accept = c4iw_accept_cr;
+ iwcm->reject = c4iw_reject_cr;
+ iwcm->create_listen = c4iw_create_listen;
+ iwcm->destroy_listen = c4iw_destroy_listen;
+ iwcm->add_ref = c4iw_qp_add_ref;
+ iwcm->rem_ref = c4iw_qp_rem_ref;
+ iwcm->get_qp = c4iw_get_qp;
+ ibdev->iwcm = iwcm;
+
+ ret = ib_register_device(&dev->ibdev, NULL);
+ if (ret)
+ kfree(iwcm);
+
+ return (ret);
+}
+
+void
+c4iw_unregister_device(struct c4iw_dev *dev)
+{
+
+ CTR3(KTR_IW_CXGBE, "%s c4iw_dev %p, adapter %p", __func__, dev,
+ dev->rdev.adap);
+ ib_unregister_device(&dev->ibdev);
+ kfree(dev->ibdev.iwcm);
+ return;
+}
+#endif
diff --git a/sys/dev/cxgbe/iw_cxgbe/qp.c b/sys/dev/cxgbe/iw_cxgbe/qp.c
new file mode 100644
index 0000000..f983d55
--- /dev/null
+++ b/sys/dev/cxgbe/iw_cxgbe/qp.c
@@ -0,0 +1,1707 @@
+/*
+ * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+
+#ifdef TCP_OFFLOAD
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sockio.h>
+#include <sys/taskqueue.h>
+#include <netinet/in.h>
+#include <net/neighbour.h>
+#include <net/route.h>
+
+#include <netinet/in_systm.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcpip.h>
+
+#include <netinet/toecore.h>
+
+struct sge_iq;
+struct rss_header;
+#include <linux/types.h>
+#include "offload.h"
+#include "tom/t4_tom.h"
+
+#include "iw_cxgbe.h"
+#include "user.h"
+
+extern int db_delay_usecs;
+extern int db_fc_threshold;
+static void creds(struct toepcb *toep, size_t wrsize);
+
+
+static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state)
+{
+ unsigned long flag;
+ spin_lock_irqsave(&qhp->lock, flag);
+ qhp->attr.state = state;
+ spin_unlock_irqrestore(&qhp->lock, flag);
+}
+
+static void dealloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
+{
+
+ contigfree(sq->queue, sq->memsize, M_DEVBUF);
+}
+
+static void dealloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
+{
+
+ dealloc_host_sq(rdev, sq);
+}
+
+static int alloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
+{
+ sq->queue = contigmalloc(sq->memsize, M_DEVBUF, M_NOWAIT, 0ul, ~0ul,
+ 4096, 0);
+
+ if (sq->queue)
+ sq->dma_addr = vtophys(sq->queue);
+ else
+ return -ENOMEM;
+ sq->phys_addr = vtophys(sq->queue);
+ pci_unmap_addr_set(sq, mapping, sq->dma_addr);
+ CTR4(KTR_IW_CXGBE, "%s sq %p dma_addr %p phys_addr %p", __func__,
+ sq->queue, sq->dma_addr, sq->phys_addr);
+ return 0;
+}
+
+static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
+ struct c4iw_dev_ucontext *uctx)
+{
+ /*
+ * uP clears EQ contexts when the connection exits rdma mode,
+ * so no need to post a RESET WR for these EQs.
+ */
+ contigfree(wq->rq.queue, wq->rq.memsize, M_DEVBUF);
+ dealloc_sq(rdev, &wq->sq);
+ c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
+ kfree(wq->rq.sw_rq);
+ kfree(wq->sq.sw_sq);
+ c4iw_put_qpid(rdev, wq->rq.qid, uctx);
+ c4iw_put_qpid(rdev, wq->sq.qid, uctx);
+ return 0;
+}
+
+static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
+ struct t4_cq *rcq, struct t4_cq *scq,
+ struct c4iw_dev_ucontext *uctx)
+{
+ struct adapter *sc = rdev->adap;
+ int user = (uctx != &rdev->uctx);
+ struct fw_ri_res_wr *res_wr;
+ struct fw_ri_res *res;
+ int wr_len;
+ struct c4iw_wr_wait wr_wait;
+ int ret;
+ int eqsize;
+ struct wrqe *wr;
+
+ wq->sq.qid = c4iw_get_qpid(rdev, uctx);
+ if (!wq->sq.qid)
+ return -ENOMEM;
+
+ wq->rq.qid = c4iw_get_qpid(rdev, uctx);
+ if (!wq->rq.qid)
+ goto err1;
+
+ if (!user) {
+ wq->sq.sw_sq = kzalloc(wq->sq.size * sizeof *wq->sq.sw_sq,
+ GFP_KERNEL);
+ if (!wq->sq.sw_sq)
+ goto err2;
+
+ wq->rq.sw_rq = kzalloc(wq->rq.size * sizeof *wq->rq.sw_rq,
+ GFP_KERNEL);
+ if (!wq->rq.sw_rq)
+ goto err3;
+ }
+
+ /* RQT must be a power of 2. */
+ wq->rq.rqt_size = roundup_pow_of_two(wq->rq.size);
+ wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size);
+ if (!wq->rq.rqt_hwaddr)
+ goto err4;
+
+ if (alloc_host_sq(rdev, &wq->sq))
+ goto err5;
+
+ memset(wq->sq.queue, 0, wq->sq.memsize);
+ pci_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr);
+
+ wq->rq.queue = contigmalloc(wq->rq.memsize,
+ M_DEVBUF, M_NOWAIT, 0ul, ~0ul, 4096, 0);
+ if (wq->rq.queue)
+ wq->rq.dma_addr = vtophys(wq->rq.queue);
+ else
+ goto err6;
+ CTR5(KTR_IW_CXGBE,
+ "%s sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx", __func__,
+ wq->sq.queue, (unsigned long long)vtophys(wq->sq.queue),
+ wq->rq.queue, (unsigned long long)vtophys(wq->rq.queue));
+ memset(wq->rq.queue, 0, wq->rq.memsize);
+ pci_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr);
+
+ wq->db = (void *)((unsigned long)rman_get_virtual(sc->regs_res) +
+ MYPF_REG(SGE_PF_KDOORBELL));
+ wq->gts = (void *)((unsigned long)rman_get_virtual(rdev->adap->regs_res)
+ + MYPF_REG(SGE_PF_GTS));
+ if (user) {
+ wq->sq.udb = (u64)((char*)rman_get_virtual(rdev->adap->udbs_res) +
+ (wq->sq.qid << rdev->qpshift));
+ wq->sq.udb &= PAGE_MASK;
+ wq->rq.udb = (u64)((char*)rman_get_virtual(rdev->adap->udbs_res) +
+ (wq->rq.qid << rdev->qpshift));
+ wq->rq.udb &= PAGE_MASK;
+ }
+ wq->rdev = rdev;
+ wq->rq.msn = 1;
+
+ /* build fw_ri_res_wr */
+ wr_len = sizeof *res_wr + 2 * sizeof *res;
+
+ wr = alloc_wrqe(wr_len, &sc->sge.mgmtq);
+ if (wr == NULL)
+ return (0);
+ res_wr = wrtod(wr);
+
+ memset(res_wr, 0, wr_len);
+ res_wr->op_nres = cpu_to_be32(
+ V_FW_WR_OP(FW_RI_RES_WR) |
+ V_FW_RI_RES_WR_NRES(2) |
+ F_FW_WR_COMPL);
+ res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
+ res_wr->cookie = (unsigned long) &wr_wait;
+ res = res_wr->res;
+ res->u.sqrq.restype = FW_RI_RES_TYPE_SQ;
+ res->u.sqrq.op = FW_RI_RES_OP_WRITE;
+
+ /* eqsize is the number of 64B entries plus the status page size. */
+ eqsize = wq->sq.size * T4_SQ_NUM_SLOTS + spg_creds;
+
+ res->u.sqrq.fetchszm_to_iqid = cpu_to_be32(
+ V_FW_RI_RES_WR_HOSTFCMODE(0) | /* no host cidx updates */
+ V_FW_RI_RES_WR_CPRIO(0) | /* don't keep in chip cache */
+ V_FW_RI_RES_WR_PCIECHN(0) | /* set by uP at ri_init time */
+ V_FW_RI_RES_WR_IQID(scq->cqid));
+ res->u.sqrq.dcaen_to_eqsize = cpu_to_be32(
+ V_FW_RI_RES_WR_DCAEN(0) |
+ V_FW_RI_RES_WR_DCACPU(0) |
+ V_FW_RI_RES_WR_FBMIN(2) |
+ V_FW_RI_RES_WR_FBMAX(2) |
+ V_FW_RI_RES_WR_CIDXFTHRESHO(0) |
+ V_FW_RI_RES_WR_CIDXFTHRESH(0) |
+ V_FW_RI_RES_WR_EQSIZE(eqsize));
+ res->u.sqrq.eqid = cpu_to_be32(wq->sq.qid);
+ res->u.sqrq.eqaddr = cpu_to_be64(wq->sq.dma_addr);
+ res++;
+ res->u.sqrq.restype = FW_RI_RES_TYPE_RQ;
+ res->u.sqrq.op = FW_RI_RES_OP_WRITE;
+
+ /* eqsize is the number of 64B entries plus the status page size. */
+ eqsize = wq->rq.size * T4_RQ_NUM_SLOTS + spg_creds ;
+ res->u.sqrq.fetchszm_to_iqid = cpu_to_be32(
+ V_FW_RI_RES_WR_HOSTFCMODE(0) | /* no host cidx updates */
+ V_FW_RI_RES_WR_CPRIO(0) | /* don't keep in chip cache */
+ V_FW_RI_RES_WR_PCIECHN(0) | /* set by uP at ri_init time */
+ V_FW_RI_RES_WR_IQID(rcq->cqid));
+ res->u.sqrq.dcaen_to_eqsize = cpu_to_be32(
+ V_FW_RI_RES_WR_DCAEN(0) |
+ V_FW_RI_RES_WR_DCACPU(0) |
+ V_FW_RI_RES_WR_FBMIN(2) |
+ V_FW_RI_RES_WR_FBMAX(2) |
+ V_FW_RI_RES_WR_CIDXFTHRESHO(0) |
+ V_FW_RI_RES_WR_CIDXFTHRESH(0) |
+ V_FW_RI_RES_WR_EQSIZE(eqsize));
+ res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid);
+ res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr);
+
+ c4iw_init_wr_wait(&wr_wait);
+
+ t4_wrq_tx(sc, wr);
+ ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, wq->sq.qid, __func__);
+ if (ret)
+ goto err7;
+
+ CTR6(KTR_IW_CXGBE,
+ "%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%llx rqudb 0x%llx",
+ __func__, wq->sq.qid, wq->rq.qid, wq->db,
+ (unsigned long long)wq->sq.udb, (unsigned long long)wq->rq.udb);
+
+ return 0;
+err7:
+ contigfree(wq->rq.queue, wq->rq.memsize, M_DEVBUF);
+err6:
+ dealloc_sq(rdev, &wq->sq);
+err5:
+ c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
+err4:
+ kfree(wq->rq.sw_rq);
+err3:
+ kfree(wq->sq.sw_sq);
+err2:
+ c4iw_put_qpid(rdev, wq->rq.qid, uctx);
+err1:
+ c4iw_put_qpid(rdev, wq->sq.qid, uctx);
+ return -ENOMEM;
+}
+
+static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp,
+ struct ib_send_wr *wr, int max, u32 *plenp)
+{
+ u8 *dstp, *srcp;
+ u32 plen = 0;
+ int i;
+ int rem, len;
+
+ dstp = (u8 *)immdp->data;
+ for (i = 0; i < wr->num_sge; i++) {
+ if ((plen + wr->sg_list[i].length) > max)
+ return -EMSGSIZE;
+ srcp = (u8 *)(unsigned long)wr->sg_list[i].addr;
+ plen += wr->sg_list[i].length;
+ rem = wr->sg_list[i].length;
+ while (rem) {
+ if (dstp == (u8 *)&sq->queue[sq->size])
+ dstp = (u8 *)sq->queue;
+ if (rem <= (u8 *)&sq->queue[sq->size] - dstp)
+ len = rem;
+ else
+ len = (u8 *)&sq->queue[sq->size] - dstp;
+ memcpy(dstp, srcp, len);
+ dstp += len;
+ srcp += len;
+ rem -= len;
+ }
+ }
+ len = roundup(plen + sizeof *immdp, 16) - (plen + sizeof *immdp);
+ if (len)
+ memset(dstp, 0, len);
+ immdp->op = FW_RI_DATA_IMMD;
+ immdp->r1 = 0;
+ immdp->r2 = 0;
+ immdp->immdlen = cpu_to_be32(plen);
+ *plenp = plen;
+ return 0;
+}
+
+static int build_isgl(__be64 *queue_start, __be64 *queue_end,
+ struct fw_ri_isgl *isglp, struct ib_sge *sg_list,
+ int num_sge, u32 *plenp)
+
+{
+ int i;
+ u32 plen = 0;
+ __be64 *flitp = (__be64 *)isglp->sge;
+
+ for (i = 0; i < num_sge; i++) {
+ if ((plen + sg_list[i].length) < plen)
+ return -EMSGSIZE;
+ plen += sg_list[i].length;
+ *flitp = cpu_to_be64(((u64)sg_list[i].lkey << 32) |
+ sg_list[i].length);
+ if (++flitp == queue_end)
+ flitp = queue_start;
+ *flitp = cpu_to_be64(sg_list[i].addr);
+ if (++flitp == queue_end)
+ flitp = queue_start;
+ }
+ *flitp = (__force __be64)0;
+ isglp->op = FW_RI_DATA_ISGL;
+ isglp->r1 = 0;
+ isglp->nsge = cpu_to_be16(num_sge);
+ isglp->r2 = 0;
+ if (plenp)
+ *plenp = plen;
+ return 0;
+}
+
+static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
+ struct ib_send_wr *wr, u8 *len16)
+{
+ u32 plen;
+ int size;
+ int ret;
+
+ if (wr->num_sge > T4_MAX_SEND_SGE)
+ return -EINVAL;
+ switch (wr->opcode) {
+ case IB_WR_SEND:
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ wqe->send.sendop_pkd = cpu_to_be32(
+ V_FW_RI_SEND_WR_SENDOP(FW_RI_SEND_WITH_SE));
+ else
+ wqe->send.sendop_pkd = cpu_to_be32(
+ V_FW_RI_SEND_WR_SENDOP(FW_RI_SEND));
+ wqe->send.stag_inv = 0;
+ break;
+ case IB_WR_SEND_WITH_INV:
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ wqe->send.sendop_pkd = cpu_to_be32(
+ V_FW_RI_SEND_WR_SENDOP(FW_RI_SEND_WITH_SE_INV));
+ else
+ wqe->send.sendop_pkd = cpu_to_be32(
+ V_FW_RI_SEND_WR_SENDOP(FW_RI_SEND_WITH_INV));
+ wqe->send.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey);
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ plen = 0;
+ if (wr->num_sge) {
+ if (wr->send_flags & IB_SEND_INLINE) {
+ ret = build_immd(sq, wqe->send.u.immd_src, wr,
+ T4_MAX_SEND_INLINE, &plen);
+ if (ret)
+ return ret;
+ size = sizeof wqe->send + sizeof(struct fw_ri_immd) +
+ plen;
+ } else {
+ ret = build_isgl((__be64 *)sq->queue,
+ (__be64 *)&sq->queue[sq->size],
+ wqe->send.u.isgl_src,
+ wr->sg_list, wr->num_sge, &plen);
+ if (ret)
+ return ret;
+ size = sizeof wqe->send + sizeof(struct fw_ri_isgl) +
+ wr->num_sge * sizeof(struct fw_ri_sge);
+ }
+ } else {
+ wqe->send.u.immd_src[0].op = FW_RI_DATA_IMMD;
+ wqe->send.u.immd_src[0].r1 = 0;
+ wqe->send.u.immd_src[0].r2 = 0;
+ wqe->send.u.immd_src[0].immdlen = 0;
+ size = sizeof wqe->send + sizeof(struct fw_ri_immd);
+ plen = 0;
+ }
+ *len16 = DIV_ROUND_UP(size, 16);
+ wqe->send.plen = cpu_to_be32(plen);
+ return 0;
+}
+
+static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
+ struct ib_send_wr *wr, u8 *len16)
+{
+ u32 plen;
+ int size;
+ int ret;
+
+ if (wr->num_sge > T4_MAX_SEND_SGE)
+ return -EINVAL;
+ wqe->write.r2 = 0;
+ wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey);
+ wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr);
+ if (wr->num_sge) {
+ if (wr->send_flags & IB_SEND_INLINE) {
+ ret = build_immd(sq, wqe->write.u.immd_src, wr,
+ T4_MAX_WRITE_INLINE, &plen);
+ if (ret)
+ return ret;
+ size = sizeof wqe->write + sizeof(struct fw_ri_immd) +
+ plen;
+ } else {
+ ret = build_isgl((__be64 *)sq->queue,
+ (__be64 *)&sq->queue[sq->size],
+ wqe->write.u.isgl_src,
+ wr->sg_list, wr->num_sge, &plen);
+ if (ret)
+ return ret;
+ size = sizeof wqe->write + sizeof(struct fw_ri_isgl) +
+ wr->num_sge * sizeof(struct fw_ri_sge);
+ }
+ } else {
+ wqe->write.u.immd_src[0].op = FW_RI_DATA_IMMD;
+ wqe->write.u.immd_src[0].r1 = 0;
+ wqe->write.u.immd_src[0].r2 = 0;
+ wqe->write.u.immd_src[0].immdlen = 0;
+ size = sizeof wqe->write + sizeof(struct fw_ri_immd);
+ plen = 0;
+ }
+ *len16 = DIV_ROUND_UP(size, 16);
+ wqe->write.plen = cpu_to_be32(plen);
+ return 0;
+}
+
+static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
+{
+ if (wr->num_sge > 1)
+ return -EINVAL;
+ if (wr->num_sge) {
+ wqe->read.stag_src = cpu_to_be32(wr->wr.rdma.rkey);
+ wqe->read.to_src_hi = cpu_to_be32((u32)(wr->wr.rdma.remote_addr
+ >> 32));
+ wqe->read.to_src_lo = cpu_to_be32((u32)wr->wr.rdma.remote_addr);
+ wqe->read.stag_sink = cpu_to_be32(wr->sg_list[0].lkey);
+ wqe->read.plen = cpu_to_be32(wr->sg_list[0].length);
+ wqe->read.to_sink_hi = cpu_to_be32((u32)(wr->sg_list[0].addr
+ >> 32));
+ wqe->read.to_sink_lo = cpu_to_be32((u32)(wr->sg_list[0].addr));
+ } else {
+ wqe->read.stag_src = cpu_to_be32(2);
+ wqe->read.to_src_hi = 0;
+ wqe->read.to_src_lo = 0;
+ wqe->read.stag_sink = cpu_to_be32(2);
+ wqe->read.plen = 0;
+ wqe->read.to_sink_hi = 0;
+ wqe->read.to_sink_lo = 0;
+ }
+ wqe->read.r2 = 0;
+ wqe->read.r5 = 0;
+ *len16 = DIV_ROUND_UP(sizeof wqe->read, 16);
+ return 0;
+}
+
+static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe,
+ struct ib_recv_wr *wr, u8 *len16)
+{
+ int ret;
+
+ ret = build_isgl((__be64 *)qhp->wq.rq.queue,
+ (__be64 *)&qhp->wq.rq.queue[qhp->wq.rq.size],
+ &wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL);
+ if (ret)
+ return ret;
+ *len16 = DIV_ROUND_UP(sizeof wqe->recv +
+ wr->num_sge * sizeof(struct fw_ri_sge), 16);
+ return 0;
+}
+
+static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe,
+ struct ib_send_wr *wr, u8 *len16)
+{
+
+ struct fw_ri_immd *imdp;
+ __be64 *p;
+ int i;
+ int pbllen = roundup(wr->wr.fast_reg.page_list_len * sizeof(u64), 32);
+ int rem;
+
+ if (wr->wr.fast_reg.page_list_len > T4_MAX_FR_DEPTH)
+ return -EINVAL;
+
+ wqe->fr.qpbinde_to_dcacpu = 0;
+ wqe->fr.pgsz_shift = wr->wr.fast_reg.page_shift - 12;
+ wqe->fr.addr_type = FW_RI_VA_BASED_TO;
+ wqe->fr.mem_perms = c4iw_ib_to_tpt_access(wr->wr.fast_reg.access_flags);
+ wqe->fr.len_hi = 0;
+ wqe->fr.len_lo = cpu_to_be32(wr->wr.fast_reg.length);
+ wqe->fr.stag = cpu_to_be32(wr->wr.fast_reg.rkey);
+ wqe->fr.va_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32);
+ wqe->fr.va_lo_fbo = cpu_to_be32(wr->wr.fast_reg.iova_start &
+ 0xffffffff);
+ WARN_ON(pbllen > T4_MAX_FR_IMMD);
+ imdp = (struct fw_ri_immd *)(&wqe->fr + 1);
+ imdp->op = FW_RI_DATA_IMMD;
+ imdp->r1 = 0;
+ imdp->r2 = 0;
+ imdp->immdlen = cpu_to_be32(pbllen);
+ p = (__be64 *)(imdp + 1);
+ rem = pbllen;
+ for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) {
+ *p = cpu_to_be64((u64)wr->wr.fast_reg.page_list->page_list[i]);
+ rem -= sizeof *p;
+ if (++p == (__be64 *)&sq->queue[sq->size])
+ p = (__be64 *)sq->queue;
+ }
+ BUG_ON(rem < 0);
+ while (rem) {
+ *p = 0;
+ rem -= sizeof *p;
+ if (++p == (__be64 *)&sq->queue[sq->size])
+ p = (__be64 *)sq->queue;
+ }
+ *len16 = DIV_ROUND_UP(sizeof wqe->fr + sizeof *imdp + pbllen, 16);
+ return 0;
+}
+
+static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr,
+ u8 *len16)
+{
+ wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey);
+ wqe->inv.r2 = 0;
+ *len16 = DIV_ROUND_UP(sizeof wqe->inv, 16);
+ return 0;
+}
+
+void c4iw_qp_add_ref(struct ib_qp *qp)
+{
+ CTR2(KTR_IW_CXGBE, "%s ib_qp %p", __func__, qp);
+ atomic_inc(&(to_c4iw_qp(qp)->refcnt));
+}
+
+void c4iw_qp_rem_ref(struct ib_qp *qp)
+{
+ CTR2(KTR_IW_CXGBE, "%s ib_qp %p", __func__, qp);
+ if (atomic_dec_and_test(&(to_c4iw_qp(qp)->refcnt)))
+ wake_up(&(to_c4iw_qp(qp)->wait));
+}
+
+int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr)
+{
+ int err = 0;
+ u8 len16 = 0;
+ enum fw_wr_opcodes fw_opcode = 0;
+ enum fw_ri_wr_flags fw_flags;
+ struct c4iw_qp *qhp;
+ union t4_wr *wqe;
+ u32 num_wrs;
+ struct t4_swsqe *swsqe;
+ unsigned long flag;
+ u16 idx = 0;
+
+ qhp = to_c4iw_qp(ibqp);
+ spin_lock_irqsave(&qhp->lock, flag);
+ if (t4_wq_in_error(&qhp->wq)) {
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ return -EINVAL;
+ }
+ num_wrs = t4_sq_avail(&qhp->wq);
+ if (num_wrs == 0) {
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ return -ENOMEM;
+ }
+ while (wr) {
+ if (num_wrs == 0) {
+ err = -ENOMEM;
+ *bad_wr = wr;
+ break;
+ }
+ wqe = (union t4_wr *)((u8 *)qhp->wq.sq.queue +
+ qhp->wq.sq.wq_pidx * T4_EQ_ENTRY_SIZE);
+
+ fw_flags = 0;
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ fw_flags |= FW_RI_SOLICITED_EVENT_FLAG;
+ if (wr->send_flags & IB_SEND_SIGNALED)
+ fw_flags |= FW_RI_COMPLETION_FLAG;
+ swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx];
+ switch (wr->opcode) {
+ case IB_WR_SEND_WITH_INV:
+ case IB_WR_SEND:
+ if (wr->send_flags & IB_SEND_FENCE)
+ fw_flags |= FW_RI_READ_FENCE_FLAG;
+ fw_opcode = FW_RI_SEND_WR;
+ if (wr->opcode == IB_WR_SEND)
+ swsqe->opcode = FW_RI_SEND;
+ else
+ swsqe->opcode = FW_RI_SEND_WITH_INV;
+ err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16);
+ break;
+ case IB_WR_RDMA_WRITE:
+ fw_opcode = FW_RI_RDMA_WRITE_WR;
+ swsqe->opcode = FW_RI_RDMA_WRITE;
+ err = build_rdma_write(&qhp->wq.sq, wqe, wr, &len16);
+ break;
+ case IB_WR_RDMA_READ:
+ case IB_WR_RDMA_READ_WITH_INV:
+ fw_opcode = FW_RI_RDMA_READ_WR;
+ swsqe->opcode = FW_RI_READ_REQ;
+ if (wr->opcode == IB_WR_RDMA_READ_WITH_INV)
+ fw_flags = FW_RI_RDMA_READ_INVALIDATE;
+ else
+ fw_flags = 0;
+ err = build_rdma_read(wqe, wr, &len16);
+ if (err)
+ break;
+ swsqe->read_len = wr->sg_list[0].length;
+ if (!qhp->wq.sq.oldest_read)
+ qhp->wq.sq.oldest_read = swsqe;
+ break;
+ case IB_WR_FAST_REG_MR:
+ fw_opcode = FW_RI_FR_NSMR_WR;
+ swsqe->opcode = FW_RI_FAST_REGISTER;
+ err = build_fastreg(&qhp->wq.sq, wqe, wr, &len16);
+ break;
+ case IB_WR_LOCAL_INV:
+ if (wr->send_flags & IB_SEND_FENCE)
+ fw_flags |= FW_RI_LOCAL_FENCE_FLAG;
+ fw_opcode = FW_RI_INV_LSTAG_WR;
+ swsqe->opcode = FW_RI_LOCAL_INV;
+ err = build_inv_stag(wqe, wr, &len16);
+ break;
+ default:
+ CTR2(KTR_IW_CXGBE, "%s post of type =%d TBD!", __func__,
+ wr->opcode);
+ err = -EINVAL;
+ }
+ if (err) {
+ *bad_wr = wr;
+ break;
+ }
+ swsqe->idx = qhp->wq.sq.pidx;
+ swsqe->complete = 0;
+ swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED);
+ swsqe->wr_id = wr->wr_id;
+
+ init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16);
+
+ CTR5(KTR_IW_CXGBE,
+ "%s cookie 0x%llx pidx 0x%x opcode 0x%x read_len %u",
+ __func__, (unsigned long long)wr->wr_id, qhp->wq.sq.pidx,
+ swsqe->opcode, swsqe->read_len);
+ wr = wr->next;
+ num_wrs--;
+ t4_sq_produce(&qhp->wq, len16);
+ idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
+ }
+ if (t4_wq_db_enabled(&qhp->wq))
+ t4_ring_sq_db(&qhp->wq, idx);
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ return err;
+}
+
+int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ int err = 0;
+ struct c4iw_qp *qhp;
+ union t4_recv_wr *wqe;
+ u32 num_wrs;
+ u8 len16 = 0;
+ unsigned long flag;
+ u16 idx = 0;
+
+ qhp = to_c4iw_qp(ibqp);
+ spin_lock_irqsave(&qhp->lock, flag);
+ if (t4_wq_in_error(&qhp->wq)) {
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ return -EINVAL;
+ }
+ num_wrs = t4_rq_avail(&qhp->wq);
+ if (num_wrs == 0) {
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ return -ENOMEM;
+ }
+ while (wr) {
+ if (wr->num_sge > T4_MAX_RECV_SGE) {
+ err = -EINVAL;
+ *bad_wr = wr;
+ break;
+ }
+ wqe = (union t4_recv_wr *)((u8 *)qhp->wq.rq.queue +
+ qhp->wq.rq.wq_pidx *
+ T4_EQ_ENTRY_SIZE);
+ if (num_wrs)
+ err = build_rdma_recv(qhp, wqe, wr, &len16);
+ else
+ err = -ENOMEM;
+ if (err) {
+ *bad_wr = wr;
+ break;
+ }
+
+ qhp->wq.rq.sw_rq[qhp->wq.rq.pidx].wr_id = wr->wr_id;
+
+ wqe->recv.opcode = FW_RI_RECV_WR;
+ wqe->recv.r1 = 0;
+ wqe->recv.wrid = qhp->wq.rq.pidx;
+ wqe->recv.r2[0] = 0;
+ wqe->recv.r2[1] = 0;
+ wqe->recv.r2[2] = 0;
+ wqe->recv.len16 = len16;
+ CTR3(KTR_IW_CXGBE, "%s cookie 0x%llx pidx %u", __func__,
+ (unsigned long long) wr->wr_id, qhp->wq.rq.pidx);
+ t4_rq_produce(&qhp->wq, len16);
+ idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
+ wr = wr->next;
+ num_wrs--;
+ }
+ if (t4_wq_db_enabled(&qhp->wq))
+ t4_ring_rq_db(&qhp->wq, idx);
+ spin_unlock_irqrestore(&qhp->lock, flag);
+ return err;
+}
+
+int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw, struct ib_mw_bind *mw_bind)
+{
+ return -ENOSYS;
+}
+
+static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type,
+ u8 *ecode)
+{
+ int status;
+ int tagged;
+ int opcode;
+ int rqtype;
+ int send_inv;
+
+ if (!err_cqe) {
+ *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA;
+ *ecode = 0;
+ return;
+ }
+
+ status = CQE_STATUS(err_cqe);
+ opcode = CQE_OPCODE(err_cqe);
+ rqtype = RQ_TYPE(err_cqe);
+ send_inv = (opcode == FW_RI_SEND_WITH_INV) ||
+ (opcode == FW_RI_SEND_WITH_SE_INV);
+ tagged = (opcode == FW_RI_RDMA_WRITE) ||
+ (rqtype && (opcode == FW_RI_READ_RESP));
+
+ switch (status) {
+ case T4_ERR_STAG:
+ if (send_inv) {
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
+ *ecode = RDMAP_CANT_INV_STAG;
+ } else {
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+ *ecode = RDMAP_INV_STAG;
+ }
+ break;
+ case T4_ERR_PDID:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+ if ((opcode == FW_RI_SEND_WITH_INV) ||
+ (opcode == FW_RI_SEND_WITH_SE_INV))
+ *ecode = RDMAP_CANT_INV_STAG;
+ else
+ *ecode = RDMAP_STAG_NOT_ASSOC;
+ break;
+ case T4_ERR_QPID:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+ *ecode = RDMAP_STAG_NOT_ASSOC;
+ break;
+ case T4_ERR_ACCESS:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+ *ecode = RDMAP_ACC_VIOL;
+ break;
+ case T4_ERR_WRAP:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+ *ecode = RDMAP_TO_WRAP;
+ break;
+ case T4_ERR_BOUND:
+ if (tagged) {
+ *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
+ *ecode = DDPT_BASE_BOUNDS;
+ } else {
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT;
+ *ecode = RDMAP_BASE_BOUNDS;
+ }
+ break;
+ case T4_ERR_INVALIDATE_SHARED_MR:
+ case T4_ERR_INVALIDATE_MR_WITH_MW_BOUND:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
+ *ecode = RDMAP_CANT_INV_STAG;
+ break;
+ case T4_ERR_ECC:
+ case T4_ERR_ECC_PSTAG:
+ case T4_ERR_INTERNAL_ERR:
+ *layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA;
+ *ecode = 0;
+ break;
+ case T4_ERR_OUT_OF_RQE:
+ *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+ *ecode = DDPU_INV_MSN_NOBUF;
+ break;
+ case T4_ERR_PBL_ADDR_BOUND:
+ *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
+ *ecode = DDPT_BASE_BOUNDS;
+ break;
+ case T4_ERR_CRC:
+ *layer_type = LAYER_MPA|DDP_LLP;
+ *ecode = MPA_CRC_ERR;
+ break;
+ case T4_ERR_MARKER:
+ *layer_type = LAYER_MPA|DDP_LLP;
+ *ecode = MPA_MARKER_ERR;
+ break;
+ case T4_ERR_PDU_LEN_ERR:
+ *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+ *ecode = DDPU_MSG_TOOBIG;
+ break;
+ case T4_ERR_DDP_VERSION:
+ if (tagged) {
+ *layer_type = LAYER_DDP|DDP_TAGGED_ERR;
+ *ecode = DDPT_INV_VERS;
+ } else {
+ *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+ *ecode = DDPU_INV_VERS;
+ }
+ break;
+ case T4_ERR_RDMA_VERSION:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
+ *ecode = RDMAP_INV_VERS;
+ break;
+ case T4_ERR_OPCODE:
+ *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP;
+ *ecode = RDMAP_INV_OPCODE;
+ break;
+ case T4_ERR_DDP_QUEUE_NUM:
+ *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+ *ecode = DDPU_INV_QN;
+ break;
+ case T4_ERR_MSN:
+ case T4_ERR_MSN_GAP:
+ case T4_ERR_MSN_RANGE:
+ case T4_ERR_IRD_OVERFLOW:
+ *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+ *ecode = DDPU_INV_MSN_RANGE;
+ break;
+ case T4_ERR_TBIT:
+ *layer_type = LAYER_DDP|DDP_LOCAL_CATA;
+ *ecode = 0;
+ break;
+ case T4_ERR_MO:
+ *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR;
+ *ecode = DDPU_INV_MO;
+ break;
+ default:
+ *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA;
+ *ecode = 0;
+ break;
+ }
+}
+
+static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe,
+ gfp_t gfp)
+{
+ struct fw_ri_wr *wqe;
+ struct terminate_message *term;
+ struct wrqe *wr;
+ struct socket *so = qhp->ep->com.so;
+ struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp = intotcpcb(inp);
+ struct toepcb *toep = tp->t_toe;
+
+ CTR4(KTR_IW_CXGBE, "%s qhp %p qid 0x%x tid %u", __func__, qhp,
+ qhp->wq.sq.qid, qhp->ep->hwtid);
+
+ wr = alloc_wrqe(sizeof(*wqe), toep->ofld_txq);
+ if (wr == NULL)
+ return;
+ wqe = wrtod(wr);
+
+ memset(wqe, 0, sizeof *wqe);
+ wqe->op_compl = cpu_to_be32(V_FW_WR_OP(FW_RI_WR));
+ wqe->flowid_len16 = cpu_to_be32(
+ V_FW_WR_FLOWID(qhp->ep->hwtid) |
+ V_FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16)));
+
+ wqe->u.terminate.type = FW_RI_TYPE_TERMINATE;
+ wqe->u.terminate.immdlen = cpu_to_be32(sizeof *term);
+ term = (struct terminate_message *)wqe->u.terminate.termmsg;
+ if (qhp->attr.layer_etype == (LAYER_MPA|DDP_LLP)) {
+ term->layer_etype = qhp->attr.layer_etype;
+ term->ecode = qhp->attr.ecode;
+ } else
+ build_term_codes(err_cqe, &term->layer_etype, &term->ecode);
+ creds(toep, sizeof(*wqe));
+ t4_wrq_tx(qhp->rhp->rdev.adap, wr);
+}
+
+/* Assumes qhp lock is held. */
+static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
+ struct c4iw_cq *schp)
+{
+ int count;
+ int flushed;
+ unsigned long flag;
+
+ CTR4(KTR_IW_CXGBE, "%s qhp %p rchp %p schp %p", __func__, qhp, rchp,
+ schp);
+
+ /* locking hierarchy: cq lock first, then qp lock. */
+ spin_lock_irqsave(&rchp->lock, flag);
+ spin_lock(&qhp->lock);
+ c4iw_flush_hw_cq(&rchp->cq);
+ c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
+ flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
+ spin_unlock(&qhp->lock);
+ spin_unlock_irqrestore(&rchp->lock, flag);
+ if (flushed) {
+ spin_lock_irqsave(&rchp->comp_handler_lock, flag);
+ (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
+ spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
+ }
+
+ /* locking hierarchy: cq lock first, then qp lock. */
+ spin_lock_irqsave(&schp->lock, flag);
+ spin_lock(&qhp->lock);
+ c4iw_flush_hw_cq(&schp->cq);
+ c4iw_count_scqes(&schp->cq, &qhp->wq, &count);
+ flushed = c4iw_flush_sq(&qhp->wq, &schp->cq, count);
+ spin_unlock(&qhp->lock);
+ spin_unlock_irqrestore(&schp->lock, flag);
+ if (flushed) {
+ spin_lock_irqsave(&schp->comp_handler_lock, flag);
+ (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
+ spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
+ }
+}
+
+static void flush_qp(struct c4iw_qp *qhp)
+{
+ struct c4iw_cq *rchp, *schp;
+ unsigned long flag;
+
+ rchp = get_chp(qhp->rhp, qhp->attr.rcq);
+ schp = get_chp(qhp->rhp, qhp->attr.scq);
+
+ if (qhp->ibqp.uobject) {
+ t4_set_wq_in_error(&qhp->wq);
+ t4_set_cq_in_error(&rchp->cq);
+ spin_lock_irqsave(&rchp->comp_handler_lock, flag);
+ (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
+ spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
+ if (schp != rchp) {
+ t4_set_cq_in_error(&schp->cq);
+ spin_lock_irqsave(&schp->comp_handler_lock, flag);
+ (*schp->ibcq.comp_handler)(&schp->ibcq,
+ schp->ibcq.cq_context);
+ spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
+ }
+ return;
+ }
+ __flush_qp(qhp, rchp, schp);
+}
+
+static int
+rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp, struct c4iw_ep *ep)
+{
+ struct c4iw_rdev *rdev = &rhp->rdev;
+ struct adapter *sc = rdev->adap;
+ struct fw_ri_wr *wqe;
+ int ret;
+ struct wrqe *wr;
+ struct socket *so = ep->com.so;
+ struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp = intotcpcb(inp);
+ struct toepcb *toep = tp->t_toe;
+
+ KASSERT(rhp == qhp->rhp && ep == qhp->ep, ("%s: EDOOFUS", __func__));
+
+ CTR4(KTR_IW_CXGBE, "%s qhp %p qid 0x%x tid %u", __func__, qhp,
+ qhp->wq.sq.qid, ep->hwtid);
+
+ wr = alloc_wrqe(sizeof(*wqe), toep->ofld_txq);
+ if (wr == NULL)
+ return (0);
+ wqe = wrtod(wr);
+
+ memset(wqe, 0, sizeof *wqe);
+
+ wqe->op_compl = cpu_to_be32(V_FW_WR_OP(FW_RI_WR) | F_FW_WR_COMPL);
+ wqe->flowid_len16 = cpu_to_be32(V_FW_WR_FLOWID(ep->hwtid) |
+ V_FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16)));
+ wqe->cookie = (unsigned long) &ep->com.wr_wait;
+ wqe->u.fini.type = FW_RI_TYPE_FINI;
+
+ c4iw_init_wr_wait(&ep->com.wr_wait);
+
+ creds(toep, sizeof(*wqe));
+ t4_wrq_tx(sc, wr);
+
+ ret = c4iw_wait_for_reply(rdev, &ep->com.wr_wait, ep->hwtid,
+ qhp->wq.sq.qid, __func__);
+ return ret;
+}
+
+static void build_rtr_msg(u8 p2p_type, struct fw_ri_init *init)
+{
+ CTR2(KTR_IW_CXGBE, "%s p2p_type = %d", __func__, p2p_type);
+ memset(&init->u, 0, sizeof init->u);
+ switch (p2p_type) {
+ case FW_RI_INIT_P2PTYPE_RDMA_WRITE:
+ init->u.write.opcode = FW_RI_RDMA_WRITE_WR;
+ init->u.write.stag_sink = cpu_to_be32(1);
+ init->u.write.to_sink = cpu_to_be64(1);
+ init->u.write.u.immd_src[0].op = FW_RI_DATA_IMMD;
+ init->u.write.len16 = DIV_ROUND_UP(sizeof init->u.write +
+ sizeof(struct fw_ri_immd),
+ 16);
+ break;
+ case FW_RI_INIT_P2PTYPE_READ_REQ:
+ init->u.write.opcode = FW_RI_RDMA_READ_WR;
+ init->u.read.stag_src = cpu_to_be32(1);
+ init->u.read.to_src_lo = cpu_to_be32(1);
+ init->u.read.stag_sink = cpu_to_be32(1);
+ init->u.read.to_sink_lo = cpu_to_be32(1);
+ init->u.read.len16 = DIV_ROUND_UP(sizeof init->u.read, 16);
+ break;
+ }
+}
+
+static void
+creds(struct toepcb *toep, size_t wrsize)
+{
+ struct ofld_tx_sdesc *txsd;
+
+ CTR3(KTR_IW_CXGBE, "%s:creB %p %u", __func__, toep , wrsize);
+ INP_WLOCK(toep->inp);
+ txsd = &toep->txsd[toep->txsd_pidx];
+ txsd->tx_credits = howmany(wrsize, 16);
+ txsd->plen = 0;
+ KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0,
+ ("%s: not enough credits (%d)", __func__, toep->tx_credits));
+ toep->tx_credits -= txsd->tx_credits;
+ if (__predict_false(++toep->txsd_pidx == toep->txsd_total))
+ toep->txsd_pidx = 0;
+ toep->txsd_avail--;
+ INP_WUNLOCK(toep->inp);
+ CTR5(KTR_IW_CXGBE, "%s:creE %p %u %u %u", __func__, toep ,
+ txsd->tx_credits, toep->tx_credits, toep->txsd_pidx);
+}
+
+static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
+{
+ struct fw_ri_wr *wqe;
+ int ret;
+ struct wrqe *wr;
+ struct c4iw_ep *ep = qhp->ep;
+ struct c4iw_rdev *rdev = &qhp->rhp->rdev;
+ struct adapter *sc = rdev->adap;
+ struct socket *so = ep->com.so;
+ struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp = intotcpcb(inp);
+ struct toepcb *toep = tp->t_toe;
+
+ CTR4(KTR_IW_CXGBE, "%s qhp %p qid 0x%x tid %u", __func__, qhp,
+ qhp->wq.sq.qid, ep->hwtid);
+
+ wr = alloc_wrqe(sizeof(*wqe), toep->ofld_txq);
+ if (wr == NULL)
+ return (0);
+ wqe = wrtod(wr);
+
+ memset(wqe, 0, sizeof *wqe);
+
+ wqe->op_compl = cpu_to_be32(
+ V_FW_WR_OP(FW_RI_WR) |
+ F_FW_WR_COMPL);
+ wqe->flowid_len16 = cpu_to_be32(V_FW_WR_FLOWID(ep->hwtid) |
+ V_FW_WR_LEN16(DIV_ROUND_UP(sizeof *wqe, 16)));
+
+ wqe->cookie = (unsigned long) &ep->com.wr_wait;
+
+ wqe->u.init.type = FW_RI_TYPE_INIT;
+ wqe->u.init.mpareqbit_p2ptype =
+ V_FW_RI_WR_MPAREQBIT(qhp->attr.mpa_attr.initiator) |
+ V_FW_RI_WR_P2PTYPE(qhp->attr.mpa_attr.p2p_type);
+ wqe->u.init.mpa_attrs = FW_RI_MPA_IETF_ENABLE;
+ if (qhp->attr.mpa_attr.recv_marker_enabled)
+ wqe->u.init.mpa_attrs |= FW_RI_MPA_RX_MARKER_ENABLE;
+ if (qhp->attr.mpa_attr.xmit_marker_enabled)
+ wqe->u.init.mpa_attrs |= FW_RI_MPA_TX_MARKER_ENABLE;
+ if (qhp->attr.mpa_attr.crc_enabled)
+ wqe->u.init.mpa_attrs |= FW_RI_MPA_CRC_ENABLE;
+
+ wqe->u.init.qp_caps = FW_RI_QP_RDMA_READ_ENABLE |
+ FW_RI_QP_RDMA_WRITE_ENABLE |
+ FW_RI_QP_BIND_ENABLE;
+ if (!qhp->ibqp.uobject)
+ wqe->u.init.qp_caps |= FW_RI_QP_FAST_REGISTER_ENABLE |
+ FW_RI_QP_STAG0_ENABLE;
+ wqe->u.init.nrqe = cpu_to_be16(t4_rqes_posted(&qhp->wq));
+ wqe->u.init.pdid = cpu_to_be32(qhp->attr.pd);
+ wqe->u.init.qpid = cpu_to_be32(qhp->wq.sq.qid);
+ wqe->u.init.sq_eqid = cpu_to_be32(qhp->wq.sq.qid);
+ wqe->u.init.rq_eqid = cpu_to_be32(qhp->wq.rq.qid);
+ wqe->u.init.scqid = cpu_to_be32(qhp->attr.scq);
+ wqe->u.init.rcqid = cpu_to_be32(qhp->attr.rcq);
+ wqe->u.init.ord_max = cpu_to_be32(qhp->attr.max_ord);
+ wqe->u.init.ird_max = cpu_to_be32(qhp->attr.max_ird);
+ wqe->u.init.iss = cpu_to_be32(ep->snd_seq);
+ wqe->u.init.irs = cpu_to_be32(ep->rcv_seq);
+ wqe->u.init.hwrqsize = cpu_to_be32(qhp->wq.rq.rqt_size);
+ wqe->u.init.hwrqaddr = cpu_to_be32(qhp->wq.rq.rqt_hwaddr -
+ sc->vres.rq.start);
+ if (qhp->attr.mpa_attr.initiator)
+ build_rtr_msg(qhp->attr.mpa_attr.p2p_type, &wqe->u.init);
+
+ c4iw_init_wr_wait(&ep->com.wr_wait);
+
+ creds(toep, sizeof(*wqe));
+ t4_wrq_tx(sc, wr);
+
+ ret = c4iw_wait_for_reply(rdev, &ep->com.wr_wait, ep->hwtid,
+ qhp->wq.sq.qid, __func__);
+
+ toep->ulp_mode = ULP_MODE_RDMA;
+
+ return ret;
+}
+
+int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
+ enum c4iw_qp_attr_mask mask,
+ struct c4iw_qp_attributes *attrs,
+ int internal)
+{
+ int ret = 0;
+ struct c4iw_qp_attributes newattr = qhp->attr;
+ int disconnect = 0;
+ int terminate = 0;
+ int abort = 0;
+ int free = 0;
+ struct c4iw_ep *ep = NULL;
+
+ CTR5(KTR_IW_CXGBE, "%s qhp %p sqid 0x%x rqid 0x%x ep %p", __func__, qhp,
+ qhp->wq.sq.qid, qhp->wq.rq.qid, qhp->ep);
+ CTR3(KTR_IW_CXGBE, "%s state %d -> %d", __func__, qhp->attr.state,
+ (mask & C4IW_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1);
+
+ mutex_lock(&qhp->mutex);
+
+ /* Process attr changes if in IDLE */
+ if (mask & C4IW_QP_ATTR_VALID_MODIFY) {
+ if (qhp->attr.state != C4IW_QP_STATE_IDLE) {
+ ret = -EIO;
+ goto out;
+ }
+ if (mask & C4IW_QP_ATTR_ENABLE_RDMA_READ)
+ newattr.enable_rdma_read = attrs->enable_rdma_read;
+ if (mask & C4IW_QP_ATTR_ENABLE_RDMA_WRITE)
+ newattr.enable_rdma_write = attrs->enable_rdma_write;
+ if (mask & C4IW_QP_ATTR_ENABLE_RDMA_BIND)
+ newattr.enable_bind = attrs->enable_bind;
+ if (mask & C4IW_QP_ATTR_MAX_ORD) {
+ if (attrs->max_ord > c4iw_max_read_depth) {
+ ret = -EINVAL;
+ goto out;
+ }
+ newattr.max_ord = attrs->max_ord;
+ }
+ if (mask & C4IW_QP_ATTR_MAX_IRD) {
+ if (attrs->max_ird > c4iw_max_read_depth) {
+ ret = -EINVAL;
+ goto out;
+ }
+ newattr.max_ird = attrs->max_ird;
+ }
+ qhp->attr = newattr;
+ }
+
+ if (!(mask & C4IW_QP_ATTR_NEXT_STATE))
+ goto out;
+ if (qhp->attr.state == attrs->next_state)
+ goto out;
+
+ switch (qhp->attr.state) {
+ case C4IW_QP_STATE_IDLE:
+ switch (attrs->next_state) {
+ case C4IW_QP_STATE_RTS:
+ if (!(mask & C4IW_QP_ATTR_LLP_STREAM_HANDLE)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (!(mask & C4IW_QP_ATTR_MPA_ATTR)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ qhp->attr.mpa_attr = attrs->mpa_attr;
+ qhp->attr.llp_stream_handle = attrs->llp_stream_handle;
+ qhp->ep = qhp->attr.llp_stream_handle;
+ set_state(qhp, C4IW_QP_STATE_RTS);
+
+ /*
+ * Ref the endpoint here and deref when we
+ * disassociate the endpoint from the QP. This
+ * happens in CLOSING->IDLE transition or *->ERROR
+ * transition.
+ */
+ c4iw_get_ep(&qhp->ep->com);
+ ret = rdma_init(rhp, qhp);
+ if (ret)
+ goto err;
+ break;
+ case C4IW_QP_STATE_ERROR:
+ set_state(qhp, C4IW_QP_STATE_ERROR);
+ flush_qp(qhp);
+ break;
+ default:
+ ret = -EINVAL;
+ goto out;
+ }
+ break;
+ case C4IW_QP_STATE_RTS:
+ switch (attrs->next_state) {
+ case C4IW_QP_STATE_CLOSING:
+ //Fixme: Use atomic_read as same as Linux
+ BUG_ON(qhp->ep->com.kref.count < 2);
+ set_state(qhp, C4IW_QP_STATE_CLOSING);
+ ep = qhp->ep;
+ if (!internal) {
+ abort = 0;
+ disconnect = 1;
+ c4iw_get_ep(&qhp->ep->com);
+ }
+ if (qhp->ibqp.uobject)
+ t4_set_wq_in_error(&qhp->wq);
+ ret = rdma_fini(rhp, qhp, ep);
+ if (ret)
+ goto err;
+ break;
+ case C4IW_QP_STATE_TERMINATE:
+ set_state(qhp, C4IW_QP_STATE_TERMINATE);
+ qhp->attr.layer_etype = attrs->layer_etype;
+ qhp->attr.ecode = attrs->ecode;
+ if (qhp->ibqp.uobject)
+ t4_set_wq_in_error(&qhp->wq);
+ ep = qhp->ep;
+ if (!internal)
+ terminate = 1;
+ disconnect = 1;
+ c4iw_get_ep(&qhp->ep->com);
+ break;
+ case C4IW_QP_STATE_ERROR:
+ set_state(qhp, C4IW_QP_STATE_ERROR);
+ if (qhp->ibqp.uobject)
+ t4_set_wq_in_error(&qhp->wq);
+ if (!internal) {
+ abort = 1;
+ disconnect = 1;
+ ep = qhp->ep;
+ c4iw_get_ep(&qhp->ep->com);
+ }
+ goto err;
+ break;
+ default:
+ ret = -EINVAL;
+ goto out;
+ }
+ break;
+ case C4IW_QP_STATE_CLOSING:
+ if (!internal) {
+ ret = -EINVAL;
+ goto out;
+ }
+ switch (attrs->next_state) {
+ case C4IW_QP_STATE_IDLE:
+ flush_qp(qhp);
+ set_state(qhp, C4IW_QP_STATE_IDLE);
+ qhp->attr.llp_stream_handle = NULL;
+ c4iw_put_ep(&qhp->ep->com);
+ qhp->ep = NULL;
+ wake_up(&qhp->wait);
+ break;
+ case C4IW_QP_STATE_ERROR:
+ goto err;
+ default:
+ ret = -EINVAL;
+ goto err;
+ }
+ break;
+ case C4IW_QP_STATE_ERROR:
+ if (attrs->next_state != C4IW_QP_STATE_IDLE) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (!t4_sq_empty(&qhp->wq) || !t4_rq_empty(&qhp->wq)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ set_state(qhp, C4IW_QP_STATE_IDLE);
+ break;
+ case C4IW_QP_STATE_TERMINATE:
+ if (!internal) {
+ ret = -EINVAL;
+ goto out;
+ }
+ goto err;
+ break;
+ default:
+ printf("%s in a bad state %d\n",
+ __func__, qhp->attr.state);
+ ret = -EINVAL;
+ goto err;
+ break;
+ }
+ goto out;
+err:
+ CTR3(KTR_IW_CXGBE, "%s disassociating ep %p qpid 0x%x", __func__,
+ qhp->ep, qhp->wq.sq.qid);
+
+ /* disassociate the LLP connection */
+ qhp->attr.llp_stream_handle = NULL;
+ if (!ep)
+ ep = qhp->ep;
+ qhp->ep = NULL;
+ set_state(qhp, C4IW_QP_STATE_ERROR);
+ free = 1;
+ wake_up(&qhp->wait);
+ BUG_ON(!ep);
+ flush_qp(qhp);
+out:
+ mutex_unlock(&qhp->mutex);
+
+ if (terminate)
+ post_terminate(qhp, NULL, internal ? GFP_ATOMIC : GFP_KERNEL);
+
+ /*
+ * If disconnect is 1, then we need to initiate a disconnect
+ * on the EP. This can be a normal close (RTS->CLOSING) or
+ * an abnormal close (RTS/CLOSING->ERROR).
+ */
+ if (disconnect) {
+ c4iw_ep_disconnect(ep, abort, internal ? GFP_ATOMIC :
+ GFP_KERNEL);
+ c4iw_put_ep(&ep->com);
+ }
+
+ /*
+ * If free is 1, then we've disassociated the EP from the QP
+ * and we need to dereference the EP.
+ */
+ if (free)
+ c4iw_put_ep(&ep->com);
+ CTR2(KTR_IW_CXGBE, "%s exit state %d", __func__, qhp->attr.state);
+ return ret;
+}
+
+static int enable_qp_db(int id, void *p, void *data)
+{
+ struct c4iw_qp *qp = p;
+
+ t4_enable_wq_db(&qp->wq);
+ return 0;
+}
+
+int c4iw_destroy_qp(struct ib_qp *ib_qp)
+{
+ struct c4iw_dev *rhp;
+ struct c4iw_qp *qhp;
+ struct c4iw_qp_attributes attrs;
+ struct c4iw_ucontext *ucontext;
+
+ CTR2(KTR_IW_CXGBE, "%s ib_qp %p", __func__, ib_qp);
+ qhp = to_c4iw_qp(ib_qp);
+ rhp = qhp->rhp;
+
+ attrs.next_state = C4IW_QP_STATE_ERROR;
+ if (qhp->attr.state == C4IW_QP_STATE_TERMINATE)
+ c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
+ else
+ c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
+ wait_event(qhp->wait, !qhp->ep);
+
+ spin_lock_irq(&rhp->lock);
+ remove_handle_nolock(rhp, &rhp->qpidr, qhp->wq.sq.qid);
+ rhp->qpcnt--;
+ BUG_ON(rhp->qpcnt < 0);
+ if (rhp->qpcnt <= db_fc_threshold && rhp->db_state == FLOW_CONTROL) {
+ rhp->rdev.stats.db_state_transitions++;
+ rhp->db_state = NORMAL;
+ idr_for_each(&rhp->qpidr, enable_qp_db, NULL);
+ }
+ spin_unlock_irq(&rhp->lock);
+ atomic_dec(&qhp->refcnt);
+ wait_event(qhp->wait, !atomic_read(&qhp->refcnt));
+
+ ucontext = ib_qp->uobject ?
+ to_c4iw_ucontext(ib_qp->uobject->context) : NULL;
+ destroy_qp(&rhp->rdev, &qhp->wq,
+ ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+
+ CTR3(KTR_IW_CXGBE, "%s ib_qp %p qpid 0x%0x", __func__, ib_qp,
+ qhp->wq.sq.qid);
+ kfree(qhp);
+ return 0;
+}
+
+static int disable_qp_db(int id, void *p, void *data)
+{
+ struct c4iw_qp *qp = p;
+
+ t4_disable_wq_db(&qp->wq);
+ return 0;
+}
+
+struct ib_qp *
+c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
+ struct ib_udata *udata)
+{
+ struct c4iw_dev *rhp;
+ struct c4iw_qp *qhp;
+ struct c4iw_pd *php;
+ struct c4iw_cq *schp;
+ struct c4iw_cq *rchp;
+ struct c4iw_create_qp_resp uresp;
+ int sqsize, rqsize;
+ struct c4iw_ucontext *ucontext;
+ int ret;
+ struct c4iw_mm_entry *mm1, *mm2, *mm3, *mm4;
+
+ CTR2(KTR_IW_CXGBE, "%s ib_pd %p", __func__, pd);
+
+ if (attrs->qp_type != IB_QPT_RC)
+ return ERR_PTR(-EINVAL);
+
+ php = to_c4iw_pd(pd);
+ rhp = php->rhp;
+ schp = get_chp(rhp, ((struct c4iw_cq *)attrs->send_cq)->cq.cqid);
+ rchp = get_chp(rhp, ((struct c4iw_cq *)attrs->recv_cq)->cq.cqid);
+ if (!schp || !rchp)
+ return ERR_PTR(-EINVAL);
+
+ if (attrs->cap.max_inline_data > T4_MAX_SEND_INLINE)
+ return ERR_PTR(-EINVAL);
+
+ rqsize = roundup(attrs->cap.max_recv_wr + 1, 16);
+ if (rqsize > T4_MAX_RQ_SIZE)
+ return ERR_PTR(-E2BIG);
+
+ sqsize = roundup(attrs->cap.max_send_wr + 1, 16);
+ if (sqsize > T4_MAX_SQ_SIZE)
+ return ERR_PTR(-E2BIG);
+
+ ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL;
+
+
+ qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
+ if (!qhp)
+ return ERR_PTR(-ENOMEM);
+ qhp->wq.sq.size = sqsize;
+ qhp->wq.sq.memsize = (sqsize + 1) * sizeof *qhp->wq.sq.queue;
+ qhp->wq.rq.size = rqsize;
+ qhp->wq.rq.memsize = (rqsize + 1) * sizeof *qhp->wq.rq.queue;
+
+ if (ucontext) {
+ qhp->wq.sq.memsize = roundup(qhp->wq.sq.memsize, PAGE_SIZE);
+ qhp->wq.rq.memsize = roundup(qhp->wq.rq.memsize, PAGE_SIZE);
+ }
+
+ CTR5(KTR_IW_CXGBE, "%s sqsize %u sqmemsize %zu rqsize %u rqmemsize %zu",
+ __func__, sqsize, qhp->wq.sq.memsize, rqsize, qhp->wq.rq.memsize);
+
+ ret = create_qp(&rhp->rdev, &qhp->wq, &schp->cq, &rchp->cq,
+ ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+ if (ret)
+ goto err1;
+
+ attrs->cap.max_recv_wr = rqsize - 1;
+ attrs->cap.max_send_wr = sqsize - 1;
+ attrs->cap.max_inline_data = T4_MAX_SEND_INLINE;
+
+ qhp->rhp = rhp;
+ qhp->attr.pd = php->pdid;
+ qhp->attr.scq = ((struct c4iw_cq *) attrs->send_cq)->cq.cqid;
+ qhp->attr.rcq = ((struct c4iw_cq *) attrs->recv_cq)->cq.cqid;
+ qhp->attr.sq_num_entries = attrs->cap.max_send_wr;
+ qhp->attr.rq_num_entries = attrs->cap.max_recv_wr;
+ qhp->attr.sq_max_sges = attrs->cap.max_send_sge;
+ qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge;
+ qhp->attr.rq_max_sges = attrs->cap.max_recv_sge;
+ qhp->attr.state = C4IW_QP_STATE_IDLE;
+ qhp->attr.next_state = C4IW_QP_STATE_IDLE;
+ qhp->attr.enable_rdma_read = 1;
+ qhp->attr.enable_rdma_write = 1;
+ qhp->attr.enable_bind = 1;
+ qhp->attr.max_ord = 1;
+ qhp->attr.max_ird = 1;
+ spin_lock_init(&qhp->lock);
+ mutex_init(&qhp->mutex);
+ init_waitqueue_head(&qhp->wait);
+ atomic_set(&qhp->refcnt, 1);
+
+ spin_lock_irq(&rhp->lock);
+ if (rhp->db_state != NORMAL)
+ t4_disable_wq_db(&qhp->wq);
+ if (++rhp->qpcnt > db_fc_threshold && rhp->db_state == NORMAL) {
+ rhp->rdev.stats.db_state_transitions++;
+ rhp->db_state = FLOW_CONTROL;
+ idr_for_each(&rhp->qpidr, disable_qp_db, NULL);
+ }
+ ret = insert_handle_nolock(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);
+ spin_unlock_irq(&rhp->lock);
+ if (ret)
+ goto err2;
+
+ if (udata) {
+ mm1 = kmalloc(sizeof *mm1, GFP_KERNEL);
+ if (!mm1) {
+ ret = -ENOMEM;
+ goto err3;
+ }
+ mm2 = kmalloc(sizeof *mm2, GFP_KERNEL);
+ if (!mm2) {
+ ret = -ENOMEM;
+ goto err4;
+ }
+ mm3 = kmalloc(sizeof *mm3, GFP_KERNEL);
+ if (!mm3) {
+ ret = -ENOMEM;
+ goto err5;
+ }
+ mm4 = kmalloc(sizeof *mm4, GFP_KERNEL);
+ if (!mm4) {
+ ret = -ENOMEM;
+ goto err6;
+ }
+ uresp.flags = 0;
+ uresp.qid_mask = rhp->rdev.qpmask;
+ uresp.sqid = qhp->wq.sq.qid;
+ uresp.sq_size = qhp->wq.sq.size;
+ uresp.sq_memsize = qhp->wq.sq.memsize;
+ uresp.rqid = qhp->wq.rq.qid;
+ uresp.rq_size = qhp->wq.rq.size;
+ uresp.rq_memsize = qhp->wq.rq.memsize;
+ spin_lock(&ucontext->mmap_lock);
+ uresp.sq_key = ucontext->key;
+ ucontext->key += PAGE_SIZE;
+ uresp.rq_key = ucontext->key;
+ ucontext->key += PAGE_SIZE;
+ uresp.sq_db_gts_key = ucontext->key;
+ ucontext->key += PAGE_SIZE;
+ uresp.rq_db_gts_key = ucontext->key;
+ ucontext->key += PAGE_SIZE;
+ spin_unlock(&ucontext->mmap_lock);
+ ret = ib_copy_to_udata(udata, &uresp, sizeof uresp);
+ if (ret)
+ goto err7;
+ mm1->key = uresp.sq_key;
+ mm1->addr = qhp->wq.sq.phys_addr;
+ mm1->len = PAGE_ALIGN(qhp->wq.sq.memsize);
+ CTR4(KTR_IW_CXGBE, "%s mm1 %x, %x, %d", __func__, mm1->key,
+ mm1->addr, mm1->len);
+ insert_mmap(ucontext, mm1);
+ mm2->key = uresp.rq_key;
+ mm2->addr = vtophys(qhp->wq.rq.queue);
+ mm2->len = PAGE_ALIGN(qhp->wq.rq.memsize);
+ CTR4(KTR_IW_CXGBE, "%s mm2 %x, %x, %d", __func__, mm2->key,
+ mm2->addr, mm2->len);
+ insert_mmap(ucontext, mm2);
+ mm3->key = uresp.sq_db_gts_key;
+ mm3->addr = qhp->wq.sq.udb;
+ mm3->len = PAGE_SIZE;
+ CTR4(KTR_IW_CXGBE, "%s mm3 %x, %x, %d", __func__, mm3->key,
+ mm3->addr, mm3->len);
+ insert_mmap(ucontext, mm3);
+ mm4->key = uresp.rq_db_gts_key;
+ mm4->addr = qhp->wq.rq.udb;
+ mm4->len = PAGE_SIZE;
+ CTR4(KTR_IW_CXGBE, "%s mm4 %x, %x, %d", __func__, mm4->key,
+ mm4->addr, mm4->len);
+ insert_mmap(ucontext, mm4);
+ }
+ qhp->ibqp.qp_num = qhp->wq.sq.qid;
+ init_timer(&(qhp->timer));
+ CTR5(KTR_IW_CXGBE,
+ "%s qhp %p sq_num_entries %d, rq_num_entries %d qpid 0x%0x",
+ __func__, qhp, qhp->attr.sq_num_entries, qhp->attr.rq_num_entries,
+ qhp->wq.sq.qid);
+ return &qhp->ibqp;
+err7:
+ kfree(mm4);
+err6:
+ kfree(mm3);
+err5:
+ kfree(mm2);
+err4:
+ kfree(mm1);
+err3:
+ remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
+err2:
+ destroy_qp(&rhp->rdev, &qhp->wq,
+ ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+err1:
+ kfree(qhp);
+ return ERR_PTR(ret);
+}
+
+int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct c4iw_dev *rhp;
+ struct c4iw_qp *qhp;
+ enum c4iw_qp_attr_mask mask = 0;
+ struct c4iw_qp_attributes attrs;
+
+ CTR2(KTR_IW_CXGBE, "%s ib_qp %p", __func__, ibqp);
+
+ /* iwarp does not support the RTR state */
+ if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR))
+ attr_mask &= ~IB_QP_STATE;
+
+ /* Make sure we still have something left to do */
+ if (!attr_mask)
+ return 0;
+
+ memset(&attrs, 0, sizeof attrs);
+ qhp = to_c4iw_qp(ibqp);
+ rhp = qhp->rhp;
+
+ attrs.next_state = c4iw_convert_state(attr->qp_state);
+ attrs.enable_rdma_read = (attr->qp_access_flags &
+ IB_ACCESS_REMOTE_READ) ? 1 : 0;
+ attrs.enable_rdma_write = (attr->qp_access_flags &
+ IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
+ attrs.enable_bind = (attr->qp_access_flags & IB_ACCESS_MW_BIND) ? 1 : 0;
+
+
+ mask |= (attr_mask & IB_QP_STATE) ? C4IW_QP_ATTR_NEXT_STATE : 0;
+ mask |= (attr_mask & IB_QP_ACCESS_FLAGS) ?
+ (C4IW_QP_ATTR_ENABLE_RDMA_READ |
+ C4IW_QP_ATTR_ENABLE_RDMA_WRITE |
+ C4IW_QP_ATTR_ENABLE_RDMA_BIND) : 0;
+
+ /*
+ * Use SQ_PSN and RQ_PSN to pass in IDX_INC values for
+ * ringing the queue db when we're in DB_FULL mode.
+ */
+ attrs.sq_db_inc = attr->sq_psn;
+ attrs.rq_db_inc = attr->rq_psn;
+ mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0;
+ mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0;
+
+ return c4iw_modify_qp(rhp, qhp, mask, &attrs, 0);
+}
+
+struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn)
+{
+ CTR3(KTR_IW_CXGBE, "%s ib_dev %p qpn 0x%x", __func__, dev, qpn);
+ return (struct ib_qp *)get_qhp(to_c4iw_dev(dev), qpn);
+}
+
+int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_qp_init_attr *init_attr)
+{
+ struct c4iw_qp *qhp = to_c4iw_qp(ibqp);
+
+ memset(attr, 0, sizeof *attr);
+ memset(init_attr, 0, sizeof *init_attr);
+ attr->qp_state = to_ib_qp_state(qhp->attr.state);
+ return 0;
+}
+#endif
diff --git a/sys/dev/cxgbe/iw_cxgbe/resource.c b/sys/dev/cxgbe/iw_cxgbe/resource.c
new file mode 100644
index 0000000..7d3694c
--- /dev/null
+++ b/sys/dev/cxgbe/iw_cxgbe/resource.c
@@ -0,0 +1,342 @@
+/*
+ * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+/* Crude resource management */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet.h"
+
+#ifdef TCP_OFFLOAD
+#include <linux/spinlock.h>
+#include "iw_cxgbe.h"
+
+static int c4iw_init_qid_table(struct c4iw_rdev *rdev)
+{
+ u32 i;
+
+ if (c4iw_id_table_alloc(&rdev->resource.qid_table,
+ rdev->adap->vres.qp.start,
+ rdev->adap->vres.qp.size,
+ rdev->adap->vres.qp.size, 0)) {
+ printf("%s: return ENOMEM\n", __func__);
+ return -ENOMEM;
+ }
+
+ for (i = rdev->adap->vres.qp.start;
+ i < rdev->adap->vres.qp.start + rdev->adap->vres.qp.size; i++)
+ if (!(i & rdev->qpmask))
+ c4iw_id_free(&rdev->resource.qid_table, i);
+ return 0;
+}
+
+/* nr_* must be power of 2 */
+int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid)
+{
+ int err = 0;
+ err = c4iw_id_table_alloc(&rdev->resource.tpt_table, 0, nr_tpt, 1,
+ C4IW_ID_TABLE_F_RANDOM);
+ if (err)
+ goto tpt_err;
+ err = c4iw_init_qid_table(rdev);
+ if (err)
+ goto qid_err;
+ err = c4iw_id_table_alloc(&rdev->resource.pdid_table, 0,
+ nr_pdid, 1, 0);
+ if (err)
+ goto pdid_err;
+ return 0;
+ pdid_err:
+ c4iw_id_table_free(&rdev->resource.qid_table);
+ qid_err:
+ c4iw_id_table_free(&rdev->resource.tpt_table);
+ tpt_err:
+ return -ENOMEM;
+}
+
+/*
+ * returns 0 if no resource available
+ */
+u32 c4iw_get_resource(struct c4iw_id_table *id_table)
+{
+ u32 entry;
+ entry = c4iw_id_alloc(id_table);
+ if (entry == (u32)(-1)) {
+ return 0;
+ }
+ return entry;
+}
+
+void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry)
+{
+ CTR2(KTR_IW_CXGBE, "%s entry 0x%x", __func__, entry);
+ c4iw_id_free(id_table, entry);
+}
+
+u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
+{
+ struct c4iw_qid_list *entry;
+ u32 qid;
+ int i;
+
+ mutex_lock(&uctx->lock);
+ if (!list_empty(&uctx->cqids)) {
+ entry = list_entry(uctx->cqids.next, struct c4iw_qid_list,
+ entry);
+ list_del(&entry->entry);
+ qid = entry->qid;
+ kfree(entry);
+ } else {
+ qid = c4iw_get_resource(&rdev->resource.qid_table);
+ if (!qid)
+ goto out;
+ mutex_lock(&rdev->stats.lock);
+ rdev->stats.qid.cur += rdev->qpmask + 1;
+ mutex_unlock(&rdev->stats.lock);
+ for (i = qid+1; i & rdev->qpmask; i++) {
+ entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ if (!entry)
+ goto out;
+ entry->qid = i;
+ list_add_tail(&entry->entry, &uctx->cqids);
+ }
+
+ /*
+ * now put the same ids on the qp list since they all
+ * map to the same db/gts page.
+ */
+ entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ if (!entry)
+ goto out;
+ entry->qid = qid;
+ list_add_tail(&entry->entry, &uctx->qpids);
+ for (i = qid+1; i & rdev->qpmask; i++) {
+ entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ if (!entry)
+ goto out;
+ entry->qid = i;
+ list_add_tail(&entry->entry, &uctx->qpids);
+ }
+ }
+out:
+ mutex_unlock(&uctx->lock);
+ CTR2(KTR_IW_CXGBE, "%s: qid 0x%x", __func__, qid);
+ mutex_lock(&rdev->stats.lock);
+ if (rdev->stats.qid.cur > rdev->stats.qid.max)
+ rdev->stats.qid.max = rdev->stats.qid.cur;
+ mutex_unlock(&rdev->stats.lock);
+ return qid;
+}
+
+void c4iw_put_cqid(struct c4iw_rdev *rdev, u32 qid,
+ struct c4iw_dev_ucontext *uctx)
+{
+ struct c4iw_qid_list *entry;
+
+ entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ if (!entry)
+ return;
+ CTR2(KTR_IW_CXGBE, "%s qid 0x%x", __func__, qid);
+ entry->qid = qid;
+ mutex_lock(&uctx->lock);
+ list_add_tail(&entry->entry, &uctx->cqids);
+ mutex_unlock(&uctx->lock);
+}
+
+u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
+{
+ struct c4iw_qid_list *entry;
+ u32 qid;
+ int i;
+
+ mutex_lock(&uctx->lock);
+ if (!list_empty(&uctx->qpids)) {
+ entry = list_entry(uctx->qpids.next, struct c4iw_qid_list,
+ entry);
+ list_del(&entry->entry);
+ qid = entry->qid;
+ kfree(entry);
+ } else {
+ qid = c4iw_get_resource(&rdev->resource.qid_table);
+ if (!qid)
+ goto out;
+ mutex_lock(&rdev->stats.lock);
+ rdev->stats.qid.cur += rdev->qpmask + 1;
+ mutex_unlock(&rdev->stats.lock);
+ for (i = qid+1; i & rdev->qpmask; i++) {
+ entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ if (!entry)
+ goto out;
+ entry->qid = i;
+ list_add_tail(&entry->entry, &uctx->qpids);
+ }
+
+ /*
+ * now put the same ids on the cq list since they all
+ * map to the same db/gts page.
+ */
+ entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ if (!entry)
+ goto out;
+ entry->qid = qid;
+ list_add_tail(&entry->entry, &uctx->cqids);
+ for (i = qid; i & rdev->qpmask; i++) {
+ entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ if (!entry)
+ goto out;
+ entry->qid = i;
+ list_add_tail(&entry->entry, &uctx->cqids);
+ }
+ }
+out:
+ mutex_unlock(&uctx->lock);
+ CTR2(KTR_IW_CXGBE, "%s qid 0x%x", __func__, qid);
+ mutex_lock(&rdev->stats.lock);
+ if (rdev->stats.qid.cur > rdev->stats.qid.max)
+ rdev->stats.qid.max = rdev->stats.qid.cur;
+ mutex_unlock(&rdev->stats.lock);
+ return qid;
+}
+
+void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qid,
+ struct c4iw_dev_ucontext *uctx)
+{
+ struct c4iw_qid_list *entry;
+
+ entry = kmalloc(sizeof *entry, GFP_KERNEL);
+ if (!entry)
+ return;
+ CTR2(KTR_IW_CXGBE, "%s qid 0x%x", __func__, qid);
+ entry->qid = qid;
+ mutex_lock(&uctx->lock);
+ list_add_tail(&entry->entry, &uctx->qpids);
+ mutex_unlock(&uctx->lock);
+}
+
+void c4iw_destroy_resource(struct c4iw_resource *rscp)
+{
+ c4iw_id_table_free(&rscp->tpt_table);
+ c4iw_id_table_free(&rscp->qid_table);
+ c4iw_id_table_free(&rscp->pdid_table);
+}
+
+/* PBL Memory Manager. Uses Linux generic allocator. */
+
+#define MIN_PBL_SHIFT 8 /* 256B == min PBL size (32 entries) */
+
+u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size)
+{
+ unsigned long addr = gen_pool_alloc(rdev->pbl_pool, size);
+ CTR3(KTR_IW_CXGBE, "%s addr 0x%x size %d", __func__, (u32)addr, size);
+ mutex_lock(&rdev->stats.lock);
+ if (addr) {
+ rdev->stats.pbl.cur += roundup(size, 1 << MIN_PBL_SHIFT);
+ if (rdev->stats.pbl.cur > rdev->stats.pbl.max)
+ rdev->stats.pbl.max = rdev->stats.pbl.cur;
+ } else
+ rdev->stats.pbl.fail++;
+ mutex_unlock(&rdev->stats.lock);
+ return (u32)addr;
+}
+
+void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size)
+{
+ CTR3(KTR_IW_CXGBE, "%s addr 0x%x size %d", __func__, addr, size);
+ mutex_lock(&rdev->stats.lock);
+ rdev->stats.pbl.cur -= roundup(size, 1 << MIN_PBL_SHIFT);
+ mutex_unlock(&rdev->stats.lock);
+ gen_pool_free(rdev->pbl_pool, (unsigned long)addr, size);
+}
+
+int c4iw_pblpool_create(struct c4iw_rdev *rdev)
+{
+ rdev->pbl_pool = gen_pool_create(rdev->adap->vres.pbl.start,
+ MIN_PBL_SHIFT,
+ rdev->adap->vres.pbl.size);
+ if (!rdev->pbl_pool)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void c4iw_pblpool_destroy(struct c4iw_rdev *rdev)
+{
+ gen_pool_destroy(rdev->pbl_pool);
+}
+
+/* RQT Memory Manager. Uses Linux generic allocator. */
+
+#define MIN_RQT_SHIFT 10 /* 1KB == min RQT size (16 entries) */
+
+u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size)
+{
+ unsigned long addr = gen_pool_alloc(rdev->rqt_pool, size << 6);
+ CTR3(KTR_IW_CXGBE, "%s addr 0x%x size %d", __func__, (u32)addr,
+ size << 6);
+ if (!addr)
+ printf("%s: Out of RQT memory\n",
+ device_get_nameunit(rdev->adap->dev));
+ mutex_lock(&rdev->stats.lock);
+ if (addr) {
+ rdev->stats.rqt.cur += roundup(size << 6, 1 << MIN_RQT_SHIFT);
+ if (rdev->stats.rqt.cur > rdev->stats.rqt.max)
+ rdev->stats.rqt.max = rdev->stats.rqt.cur;
+ } else
+ rdev->stats.rqt.fail++;
+ mutex_unlock(&rdev->stats.lock);
+ return (u32)addr;
+}
+
+void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size)
+{
+ CTR3(KTR_IW_CXGBE, "%s addr 0x%x size %d", __func__, addr, size << 6);
+ mutex_lock(&rdev->stats.lock);
+ rdev->stats.rqt.cur -= roundup(size << 6, 1 << MIN_RQT_SHIFT);
+ mutex_unlock(&rdev->stats.lock);
+ gen_pool_free(rdev->rqt_pool, (unsigned long)addr, size << 6);
+}
+
+int c4iw_rqtpool_create(struct c4iw_rdev *rdev)
+{
+ rdev->rqt_pool = gen_pool_create(rdev->adap->vres.rq.start,
+ MIN_RQT_SHIFT,
+ rdev->adap->vres.rq.size);
+ if (!rdev->rqt_pool)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev)
+{
+ gen_pool_destroy(rdev->rqt_pool);
+}
+#endif
diff --git a/sys/dev/cxgbe/iw_cxgbe/t4.h b/sys/dev/cxgbe/iw_cxgbe/t4.h
new file mode 100644
index 0000000..023c607
--- /dev/null
+++ b/sys/dev/cxgbe/iw_cxgbe/t4.h
@@ -0,0 +1,597 @@
+/*
+ * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $FreeBSD$
+ */
+#ifndef __T4_H__
+#define __T4_H__
+
+/*
+ * Fixme: Adding missing defines
+ */
+#define SGE_PF_KDOORBELL 0x0
+#define QID_MASK 0xffff8000U
+#define QID_SHIFT 15
+#define QID(x) ((x) << QID_SHIFT)
+#define DBPRIO 0x00004000U
+#define PIDX_MASK 0x00003fffU
+#define PIDX_SHIFT 0
+#define PIDX(x) ((x) << PIDX_SHIFT)
+
+#define SGE_PF_GTS 0x4
+#define INGRESSQID_MASK 0xffff0000U
+#define INGRESSQID_SHIFT 16
+#define INGRESSQID(x) ((x) << INGRESSQID_SHIFT)
+#define TIMERREG_MASK 0x0000e000U
+#define TIMERREG_SHIFT 13
+#define TIMERREG(x) ((x) << TIMERREG_SHIFT)
+#define SEINTARM_MASK 0x00001000U
+#define SEINTARM_SHIFT 12
+#define SEINTARM(x) ((x) << SEINTARM_SHIFT)
+#define CIDXINC_MASK 0x00000fffU
+#define CIDXINC_SHIFT 0
+#define CIDXINC(x) ((x) << CIDXINC_SHIFT)
+
+#define T4_MAX_NUM_QP (1<<16)
+#define T4_MAX_NUM_CQ (1<<15)
+#define T4_MAX_NUM_PD (1<<15)
+#define T4_EQ_STATUS_ENTRIES (L1_CACHE_BYTES > 64 ? 2 : 1)
+#define T4_MAX_EQ_SIZE (65520 - T4_EQ_STATUS_ENTRIES)
+#define T4_MAX_IQ_SIZE (65520 - 1)
+#define T4_MAX_RQ_SIZE (8192 - T4_EQ_STATUS_ENTRIES)
+#define T4_MAX_SQ_SIZE (T4_MAX_EQ_SIZE - 1)
+#define T4_MAX_QP_DEPTH (T4_MAX_RQ_SIZE - 1)
+#define T4_MAX_CQ_DEPTH (T4_MAX_IQ_SIZE - 1)
+#define T4_MAX_NUM_STAG (1<<15)
+#define T4_MAX_MR_SIZE (~0ULL - 1)
+#define T4_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */
+#define T4_STAG_UNSET 0xffffffff
+#define T4_FW_MAJ 0
+#define T4_EQ_STATUS_ENTRIES (L1_CACHE_BYTES > 64 ? 2 : 1)
+#define A_PCIE_MA_SYNC 0x30b4
+
+struct t4_status_page {
+ __be32 rsvd1; /* flit 0 - hw owns */
+ __be16 rsvd2;
+ __be16 qid;
+ __be16 cidx;
+ __be16 pidx;
+ u8 qp_err; /* flit 1 - sw owns */
+ u8 db_off;
+ u8 pad;
+ u16 host_wq_pidx;
+ u16 host_cidx;
+ u16 host_pidx;
+};
+
+#define T4_EQ_ENTRY_SIZE 64
+
+#define T4_SQ_NUM_SLOTS 5
+#define T4_SQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_SQ_NUM_SLOTS)
+#define T4_MAX_SEND_SGE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_send_wr) - \
+ sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge))
+#define T4_MAX_SEND_INLINE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_send_wr) - \
+ sizeof(struct fw_ri_immd)))
+#define T4_MAX_WRITE_INLINE ((T4_SQ_NUM_BYTES - \
+ sizeof(struct fw_ri_rdma_write_wr) - \
+ sizeof(struct fw_ri_immd)))
+#define T4_MAX_WRITE_SGE ((T4_SQ_NUM_BYTES - \
+ sizeof(struct fw_ri_rdma_write_wr) - \
+ sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge))
+#define T4_MAX_FR_IMMD ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_fr_nsmr_wr) - \
+ sizeof(struct fw_ri_immd)) & ~31UL)
+#define T4_MAX_FR_DEPTH (T4_MAX_FR_IMMD / sizeof(u64))
+
+#define T4_RQ_NUM_SLOTS 2
+#define T4_RQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_RQ_NUM_SLOTS)
+#define T4_MAX_RECV_SGE 4
+
+union t4_wr {
+ struct fw_ri_res_wr res;
+ struct fw_ri_wr ri;
+ struct fw_ri_rdma_write_wr write;
+ struct fw_ri_send_wr send;
+ struct fw_ri_rdma_read_wr read;
+ struct fw_ri_bind_mw_wr bind;
+ struct fw_ri_fr_nsmr_wr fr;
+ struct fw_ri_inv_lstag_wr inv;
+ struct t4_status_page status;
+ __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_SQ_NUM_SLOTS];
+};
+
+union t4_recv_wr {
+ struct fw_ri_recv_wr recv;
+ struct t4_status_page status;
+ __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_RQ_NUM_SLOTS];
+};
+
+static inline void init_wr_hdr(union t4_wr *wqe, u16 wrid,
+ enum fw_wr_opcodes opcode, u8 flags, u8 len16)
+{
+ wqe->send.opcode = (u8)opcode;
+ wqe->send.flags = flags;
+ wqe->send.wrid = wrid;
+ wqe->send.r1[0] = 0;
+ wqe->send.r1[1] = 0;
+ wqe->send.r1[2] = 0;
+ wqe->send.len16 = len16;
+}
+
+/* CQE/AE status codes */
+#define T4_ERR_SUCCESS 0x0
+#define T4_ERR_STAG 0x1 /* STAG invalid: either the */
+ /* STAG is offlimt, being 0, */
+ /* or STAG_key mismatch */
+#define T4_ERR_PDID 0x2 /* PDID mismatch */
+#define T4_ERR_QPID 0x3 /* QPID mismatch */
+#define T4_ERR_ACCESS 0x4 /* Invalid access right */
+#define T4_ERR_WRAP 0x5 /* Wrap error */
+#define T4_ERR_BOUND 0x6 /* base and bounds voilation */
+#define T4_ERR_INVALIDATE_SHARED_MR 0x7 /* attempt to invalidate a */
+ /* shared memory region */
+#define T4_ERR_INVALIDATE_MR_WITH_MW_BOUND 0x8 /* attempt to invalidate a */
+ /* shared memory region */
+#define T4_ERR_ECC 0x9 /* ECC error detected */
+#define T4_ERR_ECC_PSTAG 0xA /* ECC error detected when */
+ /* reading PSTAG for a MW */
+ /* Invalidate */
+#define T4_ERR_PBL_ADDR_BOUND 0xB /* pbl addr out of bounds: */
+ /* software error */
+#define T4_ERR_SWFLUSH 0xC /* SW FLUSHED */
+#define T4_ERR_CRC 0x10 /* CRC error */
+#define T4_ERR_MARKER 0x11 /* Marker error */
+#define T4_ERR_PDU_LEN_ERR 0x12 /* invalid PDU length */
+#define T4_ERR_OUT_OF_RQE 0x13 /* out of RQE */
+#define T4_ERR_DDP_VERSION 0x14 /* wrong DDP version */
+#define T4_ERR_RDMA_VERSION 0x15 /* wrong RDMA version */
+#define T4_ERR_OPCODE 0x16 /* invalid rdma opcode */
+#define T4_ERR_DDP_QUEUE_NUM 0x17 /* invalid ddp queue number */
+#define T4_ERR_MSN 0x18 /* MSN error */
+#define T4_ERR_TBIT 0x19 /* tag bit not set correctly */
+#define T4_ERR_MO 0x1A /* MO not 0 for TERMINATE */
+ /* or READ_REQ */
+#define T4_ERR_MSN_GAP 0x1B
+#define T4_ERR_MSN_RANGE 0x1C
+#define T4_ERR_IRD_OVERFLOW 0x1D
+#define T4_ERR_RQE_ADDR_BOUND 0x1E /* RQE addr out of bounds: */
+ /* software error */
+#define T4_ERR_INTERNAL_ERR 0x1F /* internal error (opcode */
+ /* mismatch) */
+/*
+ * CQE defs
+ */
+struct t4_cqe {
+ __be32 header;
+ __be32 len;
+ union {
+ struct {
+ __be32 stag;
+ __be32 msn;
+ } rcqe;
+ struct {
+ u32 nada1;
+ u16 nada2;
+ u16 cidx;
+ } scqe;
+ struct {
+ __be32 wrid_hi;
+ __be32 wrid_low;
+ } gen;
+ } u;
+ __be64 reserved;
+ __be64 bits_type_ts;
+};
+
+/* macros for flit 0 of the cqe */
+
+#define S_CQE_QPID 12
+#define M_CQE_QPID 0xFFFFF
+#define G_CQE_QPID(x) ((((x) >> S_CQE_QPID)) & M_CQE_QPID)
+#define V_CQE_QPID(x) ((x)<<S_CQE_QPID)
+
+#define S_CQE_SWCQE 11
+#define M_CQE_SWCQE 0x1
+#define G_CQE_SWCQE(x) ((((x) >> S_CQE_SWCQE)) & M_CQE_SWCQE)
+#define V_CQE_SWCQE(x) ((x)<<S_CQE_SWCQE)
+
+#define S_CQE_STATUS 5
+#define M_CQE_STATUS 0x1F
+#define G_CQE_STATUS(x) ((((x) >> S_CQE_STATUS)) & M_CQE_STATUS)
+#define V_CQE_STATUS(x) ((x)<<S_CQE_STATUS)
+
+#define S_CQE_TYPE 4
+#define M_CQE_TYPE 0x1
+#define G_CQE_TYPE(x) ((((x) >> S_CQE_TYPE)) & M_CQE_TYPE)
+#define V_CQE_TYPE(x) ((x)<<S_CQE_TYPE)
+
+#define S_CQE_OPCODE 0
+#define M_CQE_OPCODE 0xF
+#define G_CQE_OPCODE(x) ((((x) >> S_CQE_OPCODE)) & M_CQE_OPCODE)
+#define V_CQE_OPCODE(x) ((x)<<S_CQE_OPCODE)
+
+#define SW_CQE(x) (G_CQE_SWCQE(be32_to_cpu((x)->header)))
+#define CQE_QPID(x) (G_CQE_QPID(be32_to_cpu((x)->header)))
+#define CQE_TYPE(x) (G_CQE_TYPE(be32_to_cpu((x)->header)))
+#define SQ_TYPE(x) (CQE_TYPE((x)))
+#define RQ_TYPE(x) (!CQE_TYPE((x)))
+#define CQE_STATUS(x) (G_CQE_STATUS(be32_to_cpu((x)->header)))
+#define CQE_OPCODE(x) (G_CQE_OPCODE(be32_to_cpu((x)->header)))
+
+#define CQE_SEND_OPCODE(x)(\
+ (G_CQE_OPCODE(be32_to_cpu((x)->header)) == FW_RI_SEND) || \
+ (G_CQE_OPCODE(be32_to_cpu((x)->header)) == FW_RI_SEND_WITH_SE) || \
+ (G_CQE_OPCODE(be32_to_cpu((x)->header)) == FW_RI_SEND_WITH_INV) || \
+ (G_CQE_OPCODE(be32_to_cpu((x)->header)) == FW_RI_SEND_WITH_SE_INV))
+
+#define CQE_LEN(x) (be32_to_cpu((x)->len))
+
+/* used for RQ completion processing */
+#define CQE_WRID_STAG(x) (be32_to_cpu((x)->u.rcqe.stag))
+#define CQE_WRID_MSN(x) (be32_to_cpu((x)->u.rcqe.msn))
+
+/* used for SQ completion processing */
+#define CQE_WRID_SQ_IDX(x) ((x)->u.scqe.cidx)
+
+/* generic accessor macros */
+#define CQE_WRID_HI(x) ((x)->u.gen.wrid_hi)
+#define CQE_WRID_LOW(x) ((x)->u.gen.wrid_low)
+
+/* macros for flit 3 of the cqe */
+#define S_CQE_GENBIT 63
+#define M_CQE_GENBIT 0x1
+#define G_CQE_GENBIT(x) (((x) >> S_CQE_GENBIT) & M_CQE_GENBIT)
+#define V_CQE_GENBIT(x) ((x)<<S_CQE_GENBIT)
+
+#define S_CQE_OVFBIT 62
+#define M_CQE_OVFBIT 0x1
+#define G_CQE_OVFBIT(x) ((((x) >> S_CQE_OVFBIT)) & M_CQE_OVFBIT)
+
+#define S_CQE_IQTYPE 60
+#define M_CQE_IQTYPE 0x3
+#define G_CQE_IQTYPE(x) ((((x) >> S_CQE_IQTYPE)) & M_CQE_IQTYPE)
+
+#define M_CQE_TS 0x0fffffffffffffffULL
+#define G_CQE_TS(x) ((x) & M_CQE_TS)
+
+#define CQE_OVFBIT(x) ((unsigned)G_CQE_OVFBIT(be64_to_cpu((x)->bits_type_ts)))
+#define CQE_GENBIT(x) ((unsigned)G_CQE_GENBIT(be64_to_cpu((x)->bits_type_ts)))
+#define CQE_TS(x) (G_CQE_TS(be64_to_cpu((x)->bits_type_ts)))
+
+struct t4_swsqe {
+ u64 wr_id;
+ struct t4_cqe cqe;
+ int read_len;
+ int opcode;
+ int complete;
+ int signaled;
+ u16 idx;
+};
+
+struct t4_sq {
+ union t4_wr *queue;
+ bus_addr_t dma_addr;
+ DECLARE_PCI_UNMAP_ADDR(mapping);
+ unsigned long phys_addr;
+ struct t4_swsqe *sw_sq;
+ struct t4_swsqe *oldest_read;
+ u64 udb;
+ size_t memsize;
+ u32 qid;
+ u16 in_use;
+ u16 size;
+ u16 cidx;
+ u16 pidx;
+ u16 wq_pidx;
+ u16 flags;
+};
+
+struct t4_swrqe {
+ u64 wr_id;
+};
+
+struct t4_rq {
+ union t4_recv_wr *queue;
+ bus_addr_t dma_addr;
+ DECLARE_PCI_UNMAP_ADDR(mapping);
+ struct t4_swrqe *sw_rq;
+ u64 udb;
+ size_t memsize;
+ u32 qid;
+ u32 msn;
+ u32 rqt_hwaddr;
+ u16 rqt_size;
+ u16 in_use;
+ u16 size;
+ u16 cidx;
+ u16 pidx;
+ u16 wq_pidx;
+};
+
+struct t4_wq {
+ struct t4_sq sq;
+ struct t4_rq rq;
+ void __iomem *db;
+ void __iomem *gts;
+ struct c4iw_rdev *rdev;
+};
+
+static inline int t4_rqes_posted(struct t4_wq *wq)
+{
+ return wq->rq.in_use;
+}
+
+static inline int t4_rq_empty(struct t4_wq *wq)
+{
+ return wq->rq.in_use == 0;
+}
+
+static inline int t4_rq_full(struct t4_wq *wq)
+{
+ return wq->rq.in_use == (wq->rq.size - 1);
+}
+
+static inline u32 t4_rq_avail(struct t4_wq *wq)
+{
+ return wq->rq.size - 1 - wq->rq.in_use;
+}
+
+static inline void t4_rq_produce(struct t4_wq *wq, u8 len16)
+{
+ wq->rq.in_use++;
+ if (++wq->rq.pidx == wq->rq.size)
+ wq->rq.pidx = 0;
+ wq->rq.wq_pidx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
+ if (wq->rq.wq_pidx >= wq->rq.size * T4_RQ_NUM_SLOTS)
+ wq->rq.wq_pidx %= wq->rq.size * T4_RQ_NUM_SLOTS;
+}
+
+static inline void t4_rq_consume(struct t4_wq *wq)
+{
+ wq->rq.in_use--;
+ wq->rq.msn++;
+ if (++wq->rq.cidx == wq->rq.size)
+ wq->rq.cidx = 0;
+}
+
+static inline u16 t4_rq_host_wq_pidx(struct t4_wq *wq)
+{
+ return wq->rq.queue[wq->rq.size].status.host_wq_pidx;
+}
+
+static inline u16 t4_rq_wq_size(struct t4_wq *wq)
+{
+ return wq->rq.size * T4_RQ_NUM_SLOTS;
+}
+
+static inline int t4_sq_empty(struct t4_wq *wq)
+{
+ return wq->sq.in_use == 0;
+}
+
+static inline int t4_sq_full(struct t4_wq *wq)
+{
+ return wq->sq.in_use == (wq->sq.size - 1);
+}
+
+static inline u32 t4_sq_avail(struct t4_wq *wq)
+{
+ return wq->sq.size - 1 - wq->sq.in_use;
+}
+
+static inline void t4_sq_produce(struct t4_wq *wq, u8 len16)
+{
+ wq->sq.in_use++;
+ if (++wq->sq.pidx == wq->sq.size)
+ wq->sq.pidx = 0;
+ wq->sq.wq_pidx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
+ if (wq->sq.wq_pidx >= wq->sq.size * T4_SQ_NUM_SLOTS)
+ wq->sq.wq_pidx %= wq->sq.size * T4_SQ_NUM_SLOTS;
+}
+
+static inline void t4_sq_consume(struct t4_wq *wq)
+{
+ wq->sq.in_use--;
+ if (++wq->sq.cidx == wq->sq.size)
+ wq->sq.cidx = 0;
+}
+
+static inline u16 t4_sq_host_wq_pidx(struct t4_wq *wq)
+{
+ return wq->sq.queue[wq->sq.size].status.host_wq_pidx;
+}
+
+static inline u16 t4_sq_wq_size(struct t4_wq *wq)
+{
+ return wq->sq.size * T4_SQ_NUM_SLOTS;
+}
+
+static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc)
+{
+ wmb();
+ writel(QID(wq->sq.qid) | PIDX(inc), wq->db);
+}
+
+static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc)
+{
+ wmb();
+ writel(QID(wq->rq.qid) | PIDX(inc), wq->db);
+}
+
+static inline int t4_wq_in_error(struct t4_wq *wq)
+{
+ return wq->rq.queue[wq->rq.size].status.qp_err;
+}
+
+static inline void t4_set_wq_in_error(struct t4_wq *wq)
+{
+ wq->rq.queue[wq->rq.size].status.qp_err = 1;
+}
+
+static inline void t4_disable_wq_db(struct t4_wq *wq)
+{
+ wq->rq.queue[wq->rq.size].status.db_off = 1;
+}
+
+static inline void t4_enable_wq_db(struct t4_wq *wq)
+{
+ wq->rq.queue[wq->rq.size].status.db_off = 0;
+}
+
+static inline int t4_wq_db_enabled(struct t4_wq *wq)
+{
+ return !wq->rq.queue[wq->rq.size].status.db_off;
+}
+
+struct t4_cq {
+ struct t4_cqe *queue;
+ bus_addr_t dma_addr;
+ DECLARE_PCI_UNMAP_ADDR(mapping);
+ struct t4_cqe *sw_queue;
+ void __iomem *gts;
+ struct c4iw_rdev *rdev;
+ u64 ugts;
+ size_t memsize;
+ __be64 bits_type_ts;
+ u32 cqid;
+ u16 size; /* including status page */
+ u16 cidx;
+ u16 sw_pidx;
+ u16 sw_cidx;
+ u16 sw_in_use;
+ u16 cidx_inc;
+ u8 gen;
+ u8 error;
+};
+
+static inline int t4_arm_cq(struct t4_cq *cq, int se)
+{
+ u32 val;
+
+ while (cq->cidx_inc > CIDXINC_MASK) {
+ val = SEINTARM(0) | CIDXINC(CIDXINC_MASK) | TIMERREG(7) |
+ INGRESSQID(cq->cqid);
+ writel(val, cq->gts);
+ cq->cidx_inc -= CIDXINC_MASK;
+ }
+ val = SEINTARM(se) | CIDXINC(cq->cidx_inc) | TIMERREG(6) |
+ INGRESSQID(cq->cqid);
+ writel(val, cq->gts);
+ cq->cidx_inc = 0;
+ return 0;
+}
+
+static inline void t4_swcq_produce(struct t4_cq *cq)
+{
+ cq->sw_in_use++;
+ if (++cq->sw_pidx == cq->size)
+ cq->sw_pidx = 0;
+}
+
+static inline void t4_swcq_consume(struct t4_cq *cq)
+{
+ cq->sw_in_use--;
+ if (++cq->sw_cidx == cq->size)
+ cq->sw_cidx = 0;
+}
+
+static inline void t4_hwcq_consume(struct t4_cq *cq)
+{
+ cq->bits_type_ts = cq->queue[cq->cidx].bits_type_ts;
+ if (++cq->cidx_inc == (cq->size >> 4)) {
+ u32 val;
+
+ val = SEINTARM(0) | CIDXINC(cq->cidx_inc) | TIMERREG(7) |
+ INGRESSQID(cq->cqid);
+ writel(val, cq->gts);
+ cq->cidx_inc = 0;
+ }
+ if (++cq->cidx == cq->size) {
+ cq->cidx = 0;
+ cq->gen ^= 1;
+ }
+}
+
+static inline int t4_valid_cqe(struct t4_cq *cq, struct t4_cqe *cqe)
+{
+ return (CQE_GENBIT(cqe) == cq->gen);
+}
+
+static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
+{
+ int ret;
+ u16 prev_cidx;
+
+ if (cq->cidx == 0)
+ prev_cidx = cq->size - 1;
+ else
+ prev_cidx = cq->cidx - 1;
+
+ if (cq->queue[prev_cidx].bits_type_ts != cq->bits_type_ts) {
+ ret = -EOVERFLOW;
+ cq->error = 1;
+ printk(KERN_ERR MOD "cq overflow cqid %u\n", cq->cqid);
+ } else if (t4_valid_cqe(cq, &cq->queue[cq->cidx])) {
+ *cqe = &cq->queue[cq->cidx];
+ ret = 0;
+ } else
+ ret = -ENODATA;
+ return ret;
+}
+
+static inline struct t4_cqe *t4_next_sw_cqe(struct t4_cq *cq)
+{
+ if (cq->sw_in_use)
+ return &cq->sw_queue[cq->sw_cidx];
+ return NULL;
+}
+
+static inline int t4_next_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
+{
+ int ret = 0;
+
+ if (cq->error)
+ ret = -ENODATA;
+ else if (cq->sw_in_use)
+ *cqe = &cq->sw_queue[cq->sw_cidx];
+ else
+ ret = t4_next_hw_cqe(cq, cqe);
+ return ret;
+}
+
+static inline int t4_cq_in_error(struct t4_cq *cq)
+{
+ return ((struct t4_status_page *)&cq->queue[cq->size])->qp_err;
+}
+
+static inline void t4_set_cq_in_error(struct t4_cq *cq)
+{
+ ((struct t4_status_page *)&cq->queue[cq->size])->qp_err = 1;
+}
+#endif
diff --git a/sys/dev/cxgbe/iw_cxgbe/user.h b/sys/dev/cxgbe/iw_cxgbe/user.h
new file mode 100644
index 0000000..59a1f43
--- /dev/null
+++ b/sys/dev/cxgbe/iw_cxgbe/user.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $FreeBSD$
+ */
+#ifndef __C4IW_USER_H__
+#define __C4IW_USER_H__
+
+#define C4IW_UVERBS_ABI_VERSION 2
+
+/*
+ * Make sure that all structs defined in this file remain laid out so
+ * that they pack the same way on 32-bit and 64-bit architectures (to
+ * avoid incompatibility between 32-bit userspace and 64-bit kernels).
+ * In particular do not use pointer types -- pass pointers in __u64
+ * instead.
+ */
+struct c4iw_create_cq_resp {
+ __u64 key;
+ __u64 gts_key;
+ __u64 memsize;
+ __u32 cqid;
+ __u32 size;
+ __u32 qid_mask;
+};
+
+struct c4iw_create_qp_resp {
+ __u64 ma_sync_key;
+ __u64 sq_key;
+ __u64 rq_key;
+ __u64 sq_db_gts_key;
+ __u64 rq_db_gts_key;
+ __u64 sq_memsize;
+ __u64 rq_memsize;
+ __u32 sqid;
+ __u32 rqid;
+ __u32 sq_size;
+ __u32 rq_size;
+ __u32 qid_mask;
+ __u32 flags;
+};
+#endif
diff --git a/sys/modules/cxgbe/Makefile b/sys/modules/cxgbe/Makefile
index fb75485..d7ce647 100644
--- a/sys/modules/cxgbe/Makefile
+++ b/sys/modules/cxgbe/Makefile
@@ -6,9 +6,16 @@ SUBDIR = if_cxgbe
SUBDIR+= t4_firmware
SUBDIR+= t5_firmware
SUBDIR+= ${_tom}
+SUBDIR+= ${_iw_cxgbe}
-.if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386"
+.if ${MACHINE_CPUARCH} == "amd64"
_tom= tom
+_iw_cxgbe= iw_cxgbe
.endif
+.if ${MACHINE_CPUARCH} == "i386"
+_tom= tom
+.endif
+
+
.include <bsd.subdir.mk>
diff --git a/sys/modules/cxgbe/iw_cxgbe/Makefile b/sys/modules/cxgbe/iw_cxgbe/Makefile
new file mode 100644
index 0000000..7704650
--- /dev/null
+++ b/sys/modules/cxgbe/iw_cxgbe/Makefile
@@ -0,0 +1,27 @@
+# $FreeBSD$
+
+.include <bsd.own.mk>
+
+CXGBE = ${.CURDIR}/../../../dev/cxgbe
+.PATH: ${CXGBE}/iw_cxgbe
+
+KMOD= iw_cxgbe
+SRCS= device.c cm.c provider.c mem.c cq.c qp.c resource.c ev.c id_table.c
+SRCS+= bus_if.h device_if.h opt_sched.h pci_if.h pcib_if.h opt_ktr.h
+SRCS+= opt_inet.h opt_ofed.h vnode_if.h
+CFLAGS+= -I${CXGBE} -I${.CURDIR}/../../../ofed/include -DLINUX_TYPES_DEFINED
+
+.if !defined(KERNBUILDDIR)
+.if ${MK_INET_SUPPORT} != "no"
+opt_inet.h:
+ @echo "#define INET 1" > ${.TARGET}
+ @echo "#define TCP_OFFLOAD 1" >> ${.TARGET}
+.endif
+
+.if ${MK_INET6_SUPPORT} != "no"
+opt_inet6.h:
+ @echo "#define INET6 1" > ${.TARGET}
+.endif
+.endif
+
+.include <bsd.kmod.mk>
OpenPOWER on IntegriCloud