diff options
author | kmacy <kmacy@FreeBSD.org> | 2008-05-05 18:46:18 +0000 |
---|---|---|
committer | kmacy <kmacy@FreeBSD.org> | 2008-05-05 18:46:18 +0000 |
commit | 2ad219aa54ae0dd6df17a319286e834f57a26728 (patch) | |
tree | 1fc14146ce2bb4be1d42e8ad0ded68371a4f095b | |
parent | e8c33242a59704aa01f83bdd6f3ba4b6978ff210 (diff) | |
download | FreeBSD-src-2ad219aa54ae0dd6df17a319286e834f57a26728.zip FreeBSD-src-2ad219aa54ae0dd6df17a319286e834f57a26728.tar.gz |
import support for iwarp on Chelsio T3 card
Supported by Chelsio Inc.
-rw-r--r-- | sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.c | 294 | ||||
-rw-r--r-- | sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h | 168 | ||||
-rw-r--r-- | sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c | 1779 | ||||
-rw-r--r-- | sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h | 249 | ||||
-rw-r--r-- | sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cq.c | 276 | ||||
-rw-r--r-- | sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_dbg.c | 255 | ||||
-rw-r--r-- | sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ev.c | 265 | ||||
-rw-r--r-- | sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.c | 1418 | ||||
-rw-r--r-- | sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h | 330 | ||||
-rw-r--r-- | sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_mem.c | 219 | ||||
-rw-r--r-- | sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c | 1295 | ||||
-rw-r--r-- | sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h | 362 | ||||
-rw-r--r-- | sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c | 1052 | ||||
-rw-r--r-- | sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.c | 382 | ||||
-rw-r--r-- | sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.h | 59 | ||||
-rw-r--r-- | sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_user.h | 68 | ||||
-rw-r--r-- | sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h | 684 |
17 files changed, 9155 insertions, 0 deletions
diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.c new file mode 100644 index 0000000..b198904 --- /dev/null +++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.c @@ -0,0 +1,294 @@ +/************************************************************************** + +Copyright (c) 2007, Chelsio Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Neither the name of the Chelsio Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +***************************************************************************/ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/bus.h> +#include <sys/module.h> +#include <sys/pciio.h> +#include <sys/conf.h> +#include <machine/bus.h> +#include <machine/resource.h> +#include <sys/bus_dma.h> +#include <sys/rman.h> +#include <sys/ioccom.h> +#include <sys/mbuf.h> +#include <sys/rwlock.h> +#include <sys/linker.h> +#include <sys/firmware.h> +#include <sys/socket.h> +#include <sys/sockio.h> +#include <sys/smp.h> +#include <sys/sysctl.h> +#include <sys/queue.h> +#include <sys/taskqueue.h> +#include <sys/proc.h> +#include <sys/eventhandler.h> + +#include <net/if.h> +#include <net/if_var.h> + +#include <netinet/in.h> + +#include <contrib/rdma/ib_verbs.h> + + +#ifdef CONFIG_DEFINED +#include <cxgb_include.h> +#include <ulp/iw_cxgb/iw_cxgb_wr.h> +#include <ulp/iw_cxgb/iw_cxgb_hal.h> +#include <ulp/iw_cxgb/iw_cxgb_provider.h> +#include <ulp/iw_cxgb/iw_cxgb_cm.h> +#include <ulp/iw_cxgb/iw_cxgb.h> +#else +#include <dev/cxgb/cxgb_include.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb.h> +#endif + +/* + * XXX :-/ + * + */ + +#define idr_init(x) + +cxgb_cpl_handler_func t3c_handlers[NUM_CPL_CMDS]; + +static void open_rnic_dev(struct t3cdev *); +static void close_rnic_dev(struct t3cdev *); + +static TAILQ_HEAD( ,iwch_dev) dev_list; +static struct mtx dev_mutex; +static eventhandler_tag event_tag; + +static void +rnic_init(struct iwch_dev *rnicp) +{ + CTR2(KTR_IW_CXGB, "%s iwch_dev %p", __FUNCTION__, rnicp); + idr_init(&rnicp->cqidr); + idr_init(&rnicp->qpidr); + idr_init(&rnicp->mmidr); + mtx_init(&rnicp->lock, "iwch rnic lock", NULL, MTX_DEF|MTX_DUPOK); + + rnicp->attr.vendor_id = 0x168; + rnicp->attr.vendor_part_id = 7; + rnicp->attr.max_qps = T3_MAX_NUM_QP - 32; + rnicp->attr.max_wrs = (1UL << 24) - 1; + rnicp->attr.max_sge_per_wr = T3_MAX_SGE; + rnicp->attr.max_sge_per_rdma_write_wr = T3_MAX_SGE; + rnicp->attr.max_cqs = T3_MAX_NUM_CQ - 1; + rnicp->attr.max_cqes_per_cq = (1UL << 24) - 1; + rnicp->attr.max_mem_regs = cxio_num_stags(&rnicp->rdev); + rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE; + rnicp->attr.max_pds = T3_MAX_NUM_PD - 1; + rnicp->attr.mem_pgsizes_bitmask = 0x7FFF; /* 4KB-128MB */ + rnicp->attr.can_resize_wq = 0; + rnicp->attr.max_rdma_reads_per_qp = 8; + rnicp->attr.max_rdma_read_resources = + rnicp->attr.max_rdma_reads_per_qp * rnicp->attr.max_qps; + rnicp->attr.max_rdma_read_qp_depth = 8; /* IRD */ + rnicp->attr.max_rdma_read_depth = + rnicp->attr.max_rdma_read_qp_depth * rnicp->attr.max_qps; + rnicp->attr.rq_overflow_handled = 0; + rnicp->attr.can_modify_ird = 0; + rnicp->attr.can_modify_ord = 0; + rnicp->attr.max_mem_windows = rnicp->attr.max_mem_regs - 1; + rnicp->attr.stag0_value = 1; + rnicp->attr.zbva_support = 1; + rnicp->attr.local_invalidate_fence = 1; + rnicp->attr.cq_overflow_detection = 1; + return; +} + +static void +open_rnic_dev(struct t3cdev *tdev) +{ + struct iwch_dev *rnicp; + static int vers_printed; + + CTR2(KTR_IW_CXGB, "%s t3cdev %p", __FUNCTION__, tdev); + if (!vers_printed++) + printf("Chelsio T3 RDMA Driver - version %s\n", + DRV_VERSION); + rnicp = (struct iwch_dev *)ib_alloc_device(sizeof(*rnicp)); + if (!rnicp) { + printf("Cannot allocate ib device\n"); + return; + } + rnicp->rdev.ulp = rnicp; + rnicp->rdev.t3cdev_p = tdev; + + mtx_lock(&dev_mutex); + + if (cxio_rdev_open(&rnicp->rdev)) { + mtx_unlock(&dev_mutex); + printf("Unable to open CXIO rdev\n"); + ib_dealloc_device(&rnicp->ibdev); + return; + } + + rnic_init(rnicp); + + TAILQ_INSERT_TAIL(&dev_list, rnicp, entry); + mtx_unlock(&dev_mutex); + + if (iwch_register_device(rnicp)) { + printf("Unable to register device\n"); + close_rnic_dev(tdev); + } +#ifdef notyet + printf("Initialized device %s\n", + pci_name(rnicp->rdev.rnic_info.pdev)); +#endif + return; +} + +static void +close_rnic_dev(struct t3cdev *tdev) +{ + struct iwch_dev *dev, *tmp; + CTR2(KTR_IW_CXGB, "%s t3cdev %p", __FUNCTION__, tdev); + mtx_lock(&dev_mutex); + + TAILQ_FOREACH_SAFE(dev, &dev_list, entry, tmp) { + if (dev->rdev.t3cdev_p == tdev) { +#ifdef notyet + list_del(&dev->entry); + iwch_unregister_device(dev); + cxio_rdev_close(&dev->rdev); + idr_destroy(&dev->cqidr); + idr_destroy(&dev->qpidr); + idr_destroy(&dev->mmidr); + ib_dealloc_device(&dev->ibdev); +#endif + break; + } + } + mtx_unlock(&dev_mutex); +} + +static ifaddr_event_handler_t +ifaddr_event_handler(void *arg, struct ifnet *ifp) +{ + printf("%s if name %s \n", __FUNCTION__, ifp->if_xname); + if (ifp->if_capabilities & IFCAP_TOE4) { + KASSERT(T3CDEV(ifp) != NULL, ("null t3cdev ptr!")); + if (cxio_hal_find_rdev_by_t3cdev(T3CDEV(ifp)) == NULL) + open_rnic_dev(T3CDEV(ifp)); + } + return 0; +} + + +static int +iwch_init_module(void) +{ + int err; + struct ifnet *ifp; + + printf("%s enter\n", __FUNCTION__); + TAILQ_INIT(&dev_list); + mtx_init(&dev_mutex, "iwch dev_list lock", NULL, MTX_DEF); + + err = cxio_hal_init(); + if (err) + return err; + err = iwch_cm_init(); + if (err) + return err; + cxio_register_ev_cb(iwch_ev_dispatch); + + /* Register for ifaddr events to dynamically add TOE devs */ + event_tag = EVENTHANDLER_REGISTER(ifaddr_event, ifaddr_event_handler, + NULL, EVENTHANDLER_PRI_ANY); + + /* Register existing TOE interfaces by walking the ifnet chain */ + IFNET_RLOCK(); + TAILQ_FOREACH(ifp, &ifnet, if_link) { + (void)ifaddr_event_handler(NULL, ifp); + } + IFNET_RUNLOCK(); + return 0; +} + +static void +iwch_exit_module(void) +{ + EVENTHANDLER_DEREGISTER(ifaddr_event, event_tag); + cxio_unregister_ev_cb(iwch_ev_dispatch); + iwch_cm_term(); + cxio_hal_exit(); +} + +static int +iwch_load(module_t mod, int cmd, void *arg) +{ + int err = 0; + + switch (cmd) { + case MOD_LOAD: + printf("Loading iw_cxgb.\n"); + + iwch_init_module(); + break; + case MOD_QUIESCE: + break; + case MOD_UNLOAD: + printf("Unloading iw_cxgb.\n"); + iwch_exit_module(); + break; + case MOD_SHUTDOWN: + break; + default: + err = EOPNOTSUPP; + break; + } + + return (err); +} + +static moduledata_t mod_data = { + "iw_cxgb", + iwch_load, + 0 +}; + +MODULE_VERSION(iw_cxgb, 1); +DECLARE_MODULE(iw_cxgb, mod_data, SI_SUB_EXEC, SI_ORDER_ANY); +MODULE_DEPEND(iw_cxgb, rdma_core, 1, 1, 1); +MODULE_DEPEND(iw_cxgb, if_cxgb, 1, 1, 1); +MODULE_DEPEND(iw_cxgb, t3_tom, 1, 1, 1); + diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h new file mode 100644 index 0000000..f4b2856 --- /dev/null +++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb.h @@ -0,0 +1,168 @@ +/************************************************************************** + +Copyright (c) 2007, 2008 Chelsio Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Neither the name of the Chelsio Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +$FreeBSD$ + +***************************************************************************/ + +#ifndef __IWCH_H__ +#define __IWCH_H__ + +struct iwch_pd; +struct iwch_cq; +struct iwch_qp; +struct iwch_mr; + + +struct iwch_rnic_attributes { + u32 vendor_id; + u32 vendor_part_id; + u32 max_qps; + u32 max_wrs; /* Max for any SQ/RQ */ + u32 max_sge_per_wr; + u32 max_sge_per_rdma_write_wr; /* for RDMA Write WR */ + u32 max_cqs; + u32 max_cqes_per_cq; + u32 max_mem_regs; + u32 max_phys_buf_entries; /* for phys buf list */ + u32 max_pds; + + /* + * The memory page sizes supported by this RNIC. + * Bit position i in bitmap indicates page of + * size (4k)^i. Phys block list mode unsupported. + */ + u32 mem_pgsizes_bitmask; + u8 can_resize_wq; + + /* + * The maximum number of RDMA Reads that can be outstanding + * per QP with this RNIC as the target. + */ + u32 max_rdma_reads_per_qp; + + /* + * The maximum number of resources used for RDMA Reads + * by this RNIC with this RNIC as the target. + */ + u32 max_rdma_read_resources; + + /* + * The max depth per QP for initiation of RDMA Read + * by this RNIC. + */ + u32 max_rdma_read_qp_depth; + + /* + * The maximum depth for initiation of RDMA Read + * operations by this RNIC on all QPs + */ + u32 max_rdma_read_depth; + u8 rq_overflow_handled; + u32 can_modify_ird; + u32 can_modify_ord; + u32 max_mem_windows; + u32 stag0_value; + u8 zbva_support; + u8 local_invalidate_fence; + u32 cq_overflow_detection; +}; + +struct iwch_dev { + struct ib_device ibdev; + struct cxio_rdev rdev; + u32 device_cap_flags; + struct iwch_rnic_attributes attr; + struct kvl cqidr; + struct kvl qpidr; + struct kvl mmidr; + struct mtx lock; + TAILQ_ENTRY(iwch_dev) entry; +}; + +#ifndef container_of +#define container_of(p, stype, field) ((stype *)(((uint8_t *)(p)) - offsetof(stype, field))) +#endif + +static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev) +{ + return container_of(ibdev, struct iwch_dev, ibdev); +} + +static inline int t3b_device(const struct iwch_dev *rhp) +{ + return rhp->rdev.t3cdev_p->type == T3B; +} + +static inline int t3a_device(const struct iwch_dev *rhp) +{ + return rhp->rdev.t3cdev_p->type == T3A; +} + +static inline struct iwch_cq *get_chp(struct iwch_dev *rhp, u32 cqid) +{ + return kvl_lookup(&rhp->cqidr, cqid); +} + +static inline struct iwch_qp *get_qhp(struct iwch_dev *rhp, u32 qpid) +{ + return kvl_lookup(&rhp->qpidr, qpid); +} + +static inline struct iwch_mr *get_mhp(struct iwch_dev *rhp, u32 mmid) +{ + return kvl_lookup(&rhp->mmidr, mmid); +} + +static inline int insert_handle(struct iwch_dev *rhp, struct kvl *kvlp, + void *handle, u32 id) +{ + int ret; + u32 newid; + + do { + mtx_lock(&rhp->lock); + ret = kvl_alloc_above(kvlp, handle, id, &newid); + WARN_ON(ret != 0); + WARN_ON(!ret && newid != id); + mtx_unlock(&rhp->lock); + } while (ret == -EAGAIN); + + return ret; +} + +static inline void remove_handle(struct iwch_dev *rhp, struct kvl *kvlp, u32 id) +{ + mtx_lock(&rhp->lock); + kvl_delete(kvlp, id); + mtx_unlock(&rhp->lock); +} + +extern struct cxgb_client t3c_client; +extern cxgb_cpl_handler_func t3c_handlers[NUM_CPL_CMDS]; +extern void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct mbuf *m); +#endif diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c new file mode 100644 index 0000000..cec4611 --- /dev/null +++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.c @@ -0,0 +1,1779 @@ +/************************************************************************** + +Copyright (c) 2007, Chelsio Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Neither the name of the Chelsio Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +***************************************************************************/ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/bus.h> +#include <sys/module.h> +#include <sys/pciio.h> +#include <sys/conf.h> +#include <machine/bus.h> +#include <machine/resource.h> +#include <sys/bus_dma.h> +#include <sys/rman.h> +#include <sys/ioccom.h> +#include <sys/mbuf.h> +#include <sys/rwlock.h> +#include <sys/linker.h> +#include <sys/firmware.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/sockio.h> +#include <sys/smp.h> +#include <sys/sysctl.h> +#include <sys/syslog.h> +#include <sys/queue.h> +#include <sys/taskqueue.h> +#include <sys/proc.h> +#include <sys/uio.h> + +#include <net/route.h> +#include <netinet/in_systm.h> +#include <netinet/in.h> +#include <netinet/in_pcb.h> +#include <netinet/ip.h> +#include <netinet/ip_var.h> +#include <netinet/tcp_var.h> +#include <netinet/tcp.h> +#include <netinet/tcpip.h> + +#include <contrib/rdma/ib_verbs.h> + + +#ifdef CONFIG_DEFINED +#include <cxgb_include.h> +#include <ulp/tom/cxgb_tom.h> +#include <ulp/tom/cxgb_t3_ddp.h> +#include <ulp/tom/cxgb_defs.h> +#include <ulp/tom/cxgb_toepcb.h> +#include <ulp/iw_cxgb/iw_cxgb_wr.h> +#include <ulp/iw_cxgb/iw_cxgb_hal.h> +#include <ulp/iw_cxgb/iw_cxgb_provider.h> +#include <ulp/iw_cxgb/iw_cxgb_cm.h> +#include <ulp/iw_cxgb/iw_cxgb.h> +#else +#include <dev/cxgb/cxgb_include.h> +#include <dev/cxgb/ulp/tom/cxgb_tom.h> +#include <dev/ulp/tom/cxgb_t3_ddp.h> +#include <dev/cxgb/ulp/tom/cxgb_defs.h> +#include <dev/cxgb/ulp/tom/cxgb_toepcb.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb.h> +#endif + +#ifdef KTR +static char *states[] = { + "idle", + "listen", + "connecting", + "mpa_wait_req", + "mpa_req_sent", + "mpa_req_rcvd", + "mpa_rep_sent", + "fpdu_mode", + "aborting", + "closing", + "moribund", + "dead", + NULL, +}; +#endif + +SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD, 0, "iw_cxgb driver parameters"); + +static int ep_timeout_secs = 10; +TUNABLE_INT("hw.iw_cxgb.ep_timeout_secs", &ep_timeout_secs); +SYSCTL_UINT(_hw_cxgb, OID_AUTO, ep_timeout_secs, CTLFLAG_RDTUN, &ep_timeout_secs, 0, + "CM Endpoint operation timeout in seconds (default=10)"); + +static int mpa_rev = 1; +TUNABLE_INT("hw.iw_cxgb.mpa_rev", &mpa_rev); +SYSCTL_UINT(_hw_cxgb, OID_AUTO, mpa_rev, CTLFLAG_RDTUN, &mpa_rev, 0, + "MPA Revision, 0 supports amso1100, 1 is spec compliant. (default=1)"); + +static int markers_enabled = 0; +TUNABLE_INT("hw.iw_cxgb.markers_enabled", &markers_enabled); +SYSCTL_UINT(_hw_cxgb, OID_AUTO, markers_enabled, CTLFLAG_RDTUN, &markers_enabled, 0, + "Enable MPA MARKERS (default(0)=disabled)"); + +static int crc_enabled = 1; +TUNABLE_INT("hw.iw_cxgb.crc_enabled", &crc_enabled); +SYSCTL_UINT(_hw_cxgb, OID_AUTO, crc_enabled, CTLFLAG_RDTUN, &crc_enabled, 0, + "Enable MPA CRC (default(1)=enabled)"); + +static int rcv_win = 256 * 1024; +TUNABLE_INT("hw.iw_cxgb.rcv_win", &rcv_win); +SYSCTL_UINT(_hw_cxgb, OID_AUTO, rcv_win, CTLFLAG_RDTUN, &rcv_win, 0, + "TCP receive window in bytes (default=256KB)"); + +static int snd_win = 32 * 1024; +TUNABLE_INT("hw.iw_cxgb.snd_win", &snd_win); +SYSCTL_UINT(_hw_cxgb, OID_AUTO, snd_win, CTLFLAG_RDTUN, &snd_win, 0, + "TCP send window in bytes (default=32KB)"); + +static unsigned int nocong = 0; +TUNABLE_INT("hw.iw_cxgb.nocong", &nocong); +SYSCTL_UINT(_hw_cxgb, OID_AUTO, nocong, CTLFLAG_RDTUN, &nocong, 0, + "Turn off congestion control (default=0)"); + +static unsigned int cong_flavor = 1; +TUNABLE_INT("hw.iw_cxgb.cong_flavor", &cong_flavor); +SYSCTL_UINT(_hw_cxgb, OID_AUTO, cong_flavor, CTLFLAG_RDTUN, &cong_flavor, 0, + "TCP Congestion control flavor (default=1)"); + +static void ep_timeout(void *arg); +static void connect_reply_upcall(struct iwch_ep *ep, int status); +static void iwch_so_upcall(struct socket *so, void *arg, int waitflag); + +/* + * Cruft to offload socket upcalls onto thread. + */ +static struct mtx req_lock; +static TAILQ_HEAD(iwch_ep_list, iwch_ep_common) req_list; +static struct task iw_cxgb_task; +static struct taskqueue *iw_cxgb_taskq; +static void process_req(void *ctx, int pending); + +static void +start_ep_timer(struct iwch_ep *ep) +{ + CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); + if (callout_pending(&ep->timer)) { + CTR2(KTR_IW_CXGB, "%s stopped / restarted timer ep %p", __FUNCTION__, ep); + callout_deactivate(&ep->timer); + callout_drain(&ep->timer); + } else { + /* + * XXX this looks racy + */ + get_ep(&ep->com); + callout_init(&ep->timer, TRUE); + } + callout_reset(&ep->timer, ep_timeout_secs * hz, ep_timeout, ep); +} + +static void +stop_ep_timer(struct iwch_ep *ep) +{ + CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); + callout_drain(&ep->timer); + put_ep(&ep->com); +} + +static int set_tcpinfo(struct iwch_ep *ep) +{ + struct tcp_info ti; + struct sockopt sopt; + int err; + + sopt.sopt_dir = SOPT_GET; + sopt.sopt_level = IPPROTO_TCP; + sopt.sopt_name = TCP_INFO; + sopt.sopt_val = (caddr_t)&ti; + sopt.sopt_valsize = sizeof ti; + sopt.sopt_td = NULL; + + err = sogetopt(ep->com.so, &sopt); + if (err) { + printf("%s can't get tcpinfo\n", __FUNCTION__); + return -err; + } + if (!(ti.tcpi_options & TCPI_OPT_TOE)) { + printf("%s connection NOT OFFLOADED!\n", __FUNCTION__); + return -EINVAL; + } + + ep->snd_seq = ti.tcpi_snd_nxt; + ep->rcv_seq = ti.tcpi_rcv_nxt; + ep->emss = ti.__tcpi_snd_mss - sizeof(struct tcpiphdr); + ep->hwtid = TOEPCB(ep->com.so)->tp_tid; /* XXX */ + if (ti.tcpi_options & TCPI_OPT_TIMESTAMPS) + ep->emss -= 12; + if (ep->emss < 128) + ep->emss = 128; + return 0; +} + +static enum iwch_ep_state +state_read(struct iwch_ep_common *epc) +{ + enum iwch_ep_state state; + + mtx_lock(&epc->lock); + state = epc->state; + mtx_unlock(&epc->lock); + return state; +} + +static void +__state_set(struct iwch_ep_common *epc, enum iwch_ep_state new) +{ + epc->state = new; +} + +static void +state_set(struct iwch_ep_common *epc, enum iwch_ep_state new) +{ + + mtx_lock(&epc->lock); + CTR3(KTR_IW_CXGB, "%s - %s -> %s", __FUNCTION__, states[epc->state], states[new]); + __state_set(epc, new); + mtx_unlock(&epc->lock); + return; +} + +static void * +alloc_ep(int size, int flags) +{ + struct iwch_ep_common *epc; + + epc = malloc(size, M_DEVBUF, flags); + if (epc) { + memset(epc, 0, size); + refcount_init(&epc->refcount, 1); + mtx_init(&epc->lock, "iwch_epc lock", NULL, MTX_DEF|MTX_DUPOK); + cv_init(&epc->waitq, "iwch_epc cv"); + } + CTR2(KTR_IW_CXGB, "%s alloc ep %p", __FUNCTION__, epc); + return epc; +} + +void __free_ep(struct iwch_ep_common *epc) +{ + CTR3(KTR_IW_CXGB, "%s ep %p state %s", __FUNCTION__, epc, states[state_read(epc)]); + KASSERT(!epc->so, ("%s warning ep->so %p \n", __FUNCTION__, epc->so)); + KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list!\n", __FUNCTION__, epc)); + free(epc, M_DEVBUF); +} + +int +iwch_quiesce_tid(struct iwch_ep *ep) +{ +#ifdef notyet + struct cpl_set_tcb_field *req; + struct mbuf *m = get_mbuf(NULL, sizeof(*req), M_NOWAIT); + + if (m == NULL) + return (-ENOMEM); + req = (struct cpl_set_tcb_field *) mbuf_put(m, sizeof(*req)); + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid)); + req->reply = 0; + req->cpu_idx = 0; + req->word = htons(W_TCB_RX_QUIESCE); + req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE); + req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE); + + m_set_priority(m, CPL_PRIORITY_DATA); + cxgb_ofld_send(ep->com.tdev, m); +#endif + return 0; +} + +int +iwch_resume_tid(struct iwch_ep *ep) +{ +#ifdef notyet + struct cpl_set_tcb_field *req; + struct mbuf *m = get_mbuf(NULL, sizeof(*req), M_NOWAIT); + + if (m == NULL) + return (-ENOMEM); + req = (struct cpl_set_tcb_field *) mbuf_put(m, sizeof(*req)); + req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); + req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid)); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, ep->hwtid)); + req->reply = 0; + req->cpu_idx = 0; + req->word = htons(W_TCB_RX_QUIESCE); + req->mask = cpu_to_be64(1ULL << S_TCB_RX_QUIESCE); + req->val = 0; + + m_set_priority(m, CPL_PRIORITY_DATA); + cxgb_ofld_send(ep->com.tdev, m); +#endif + return 0; +} + +static struct rtentry * +find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port, + __be16 peer_port, u8 tos) +{ + struct route iproute; + struct sockaddr_in *dst = (struct sockaddr_in *)&iproute.ro_dst; + + bzero(&iproute, sizeof iproute); + dst->sin_family = AF_INET; + dst->sin_len = sizeof *dst; + dst->sin_addr.s_addr = peer_ip; + + rtalloc(&iproute); + return iproute.ro_rt; +} + +static void +close_socket(struct iwch_ep_common *epc) +{ + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]); + SOCK_LOCK(epc->so); + epc->so->so_upcall = NULL; + epc->so->so_upcallarg = NULL; + epc->so->so_rcv.sb_flags &= ~SB_UPCALL; + SOCK_UNLOCK(epc->so); + soshutdown(epc->so, SHUT_WR|SHUT_RD); + epc->so = NULL; +} + +static void +shutdown_socket(struct iwch_ep_common *epc) +{ + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, epc, epc->so, states[epc->state]); + soshutdown(epc->so, SHUT_WR); +} + +static void +abort_socket(struct iwch_ep *ep) +{ + struct sockopt sopt; + int err; + struct linger l; + + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); + l.l_onoff = 1; + l.l_linger = 0; + + /* linger_time of 0 forces RST to be sent */ + sopt.sopt_dir = SOPT_SET; + sopt.sopt_level = SOL_SOCKET; + sopt.sopt_name = SO_LINGER; + sopt.sopt_val = (caddr_t)&l; + sopt.sopt_valsize = sizeof l; + sopt.sopt_td = NULL; + err = sosetopt(ep->com.so, &sopt); + if (err) + printf("%s can't set linger to 0, no RST! err %d\n", __FUNCTION__, err); +} + +static void +send_mpa_req(struct iwch_ep *ep) +{ + int mpalen; + struct mpa_message *mpa; + struct mbuf *m; + int err; + + CTR3(KTR_IW_CXGB, "%s ep %p pd_len %d", __FUNCTION__, ep, ep->plen); + + mpalen = sizeof(*mpa) + ep->plen; + m = m_gethdr(mpalen, M_NOWAIT); + if (m == NULL) { + connect_reply_upcall(ep, -ENOMEM); + return; + } + mpa = mtod(m, struct mpa_message *); + m->m_len = mpalen; + m->m_pkthdr.len = mpalen; + memset(mpa, 0, sizeof(*mpa)); + memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); + mpa->flags = (crc_enabled ? MPA_CRC : 0) | + (markers_enabled ? MPA_MARKERS : 0); + mpa->private_data_size = htons(ep->plen); + mpa->revision = mpa_rev; + if (ep->plen) + memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen); + + err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread); + if (err) { + m_freem(m); + connect_reply_upcall(ep, -ENOMEM); + return; + } + + start_ep_timer(ep); + state_set(&ep->com, MPA_REQ_SENT); + return; +} + +static int +send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen) +{ + int mpalen; + struct mpa_message *mpa; + struct mbuf *m; + int err; + + CTR3(KTR_IW_CXGB, "%s ep %p plen %d", __FUNCTION__, ep, plen); + + mpalen = sizeof(*mpa) + plen; + + m = m_gethdr(mpalen, M_NOWAIT); + if (m == NULL) { + printf("%s - cannot alloc mbuf!\n", __FUNCTION__); + return (-ENOMEM); + } + mpa = mtod(m, struct mpa_message *); + m->m_len = mpalen; + m->m_pkthdr.len = mpalen; + memset(mpa, 0, sizeof(*mpa)); + memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); + mpa->flags = MPA_REJECT; + mpa->revision = mpa_rev; + mpa->private_data_size = htons(plen); + if (plen) + memcpy(mpa->private_data, pdata, plen); + err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread); + PANIC_IF(err); + return 0; +} + +static int +send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen) +{ + int mpalen; + struct mpa_message *mpa; + struct mbuf *m; + + CTR4(KTR_IW_CXGB, "%s ep %p so %p plen %d", __FUNCTION__, ep, ep->com.so, plen); + + mpalen = sizeof(*mpa) + plen; + + m = m_gethdr(mpalen, M_NOWAIT); + if (m == NULL) { + printf("%s - cannot alloc mbuf!\n", __FUNCTION__); + return (-ENOMEM); + } + mpa = mtod(m, struct mpa_message *); + m->m_len = mpalen; + m->m_pkthdr.len = mpalen; + memset(mpa, 0, sizeof(*mpa)); + memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); + mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) | + (markers_enabled ? MPA_MARKERS : 0); + mpa->revision = mpa_rev; + mpa->private_data_size = htons(plen); + if (plen) + memcpy(mpa->private_data, pdata, plen); + + state_set(&ep->com, MPA_REP_SENT); + return sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, + ep->com.thread); +} + +static void +close_complete_upcall(struct iwch_ep *ep) +{ + struct iw_cm_event event; + + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_CLOSE; + if (ep->com.cm_id) { + CTR3(KTR_IW_CXGB, "close complete delivered ep %p cm_id %p tid %d", + ep, ep->com.cm_id, ep->hwtid); + ep->com.cm_id->event_handler(ep->com.cm_id, &event); + ep->com.cm_id->rem_ref(ep->com.cm_id); + ep->com.cm_id = NULL; + ep->com.qp = NULL; + } +} + +static void +abort_connection(struct iwch_ep *ep) +{ + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); + state_set(&ep->com, ABORTING); + abort_socket(ep); + close_socket(&ep->com); + close_complete_upcall(ep); + state_set(&ep->com, DEAD); + put_ep(&ep->com); +} + +static void +peer_close_upcall(struct iwch_ep *ep) +{ + struct iw_cm_event event; + + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_DISCONNECT; + if (ep->com.cm_id) { + CTR3(KTR_IW_CXGB, "peer close delivered ep %p cm_id %p tid %d", + ep, ep->com.cm_id, ep->hwtid); + ep->com.cm_id->event_handler(ep->com.cm_id, &event); + } +} + +static void +peer_abort_upcall(struct iwch_ep *ep) +{ + struct iw_cm_event event; + + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_CLOSE; + event.status = ECONNRESET; + if (ep->com.cm_id) { + CTR3(KTR_IW_CXGB, "abort delivered ep %p cm_id %p tid %d", ep, + ep->com.cm_id, ep->hwtid); + ep->com.cm_id->event_handler(ep->com.cm_id, &event); + ep->com.cm_id->rem_ref(ep->com.cm_id); + ep->com.cm_id = NULL; + ep->com.qp = NULL; + } +} + +static void +connect_reply_upcall(struct iwch_ep *ep, int status) +{ + struct iw_cm_event event; + + CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s status %d", __FUNCTION__, ep, ep->com.so, states[ep->com.state], status); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_CONNECT_REPLY; + event.status = status; + event.local_addr = ep->com.local_addr; + event.remote_addr = ep->com.remote_addr; + + if ((status == 0) || (status == ECONNREFUSED)) { + event.private_data_len = ep->plen; + event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); + } + if (ep->com.cm_id) { + CTR4(KTR_IW_CXGB, "%s ep %p tid %d status %d", __FUNCTION__, ep, + ep->hwtid, status); + ep->com.cm_id->event_handler(ep->com.cm_id, &event); + } + if (status < 0) { + ep->com.cm_id->rem_ref(ep->com.cm_id); + ep->com.cm_id = NULL; + ep->com.qp = NULL; + } +} + +static void +connect_request_upcall(struct iwch_ep *ep) +{ + struct iw_cm_event event; + + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_CONNECT_REQUEST; + event.local_addr = ep->com.local_addr; + event.remote_addr = ep->com.remote_addr; + event.private_data_len = ep->plen; + event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); + event.provider_data = ep; + event.so = ep->com.so; + if (state_read(&ep->parent_ep->com) != DEAD) + ep->parent_ep->com.cm_id->event_handler( + ep->parent_ep->com.cm_id, + &event); + put_ep(&ep->parent_ep->com); + ep->parent_ep = NULL; +} + +static void +established_upcall(struct iwch_ep *ep) +{ + struct iw_cm_event event; + + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); + memset(&event, 0, sizeof(event)); + event.event = IW_CM_EVENT_ESTABLISHED; + if (ep->com.cm_id) { + CTR3(KTR_IW_CXGB, "%s ep %p tid %d", __FUNCTION__, ep, ep->hwtid); + ep->com.cm_id->event_handler(ep->com.cm_id, &event); + } +} + +static void +process_mpa_reply(struct iwch_ep *ep) +{ + struct mpa_message *mpa; + u16 plen; + struct iwch_qp_attributes attrs; + enum iwch_qp_attr_mask mask; + int err; + struct mbuf *top, *m; + int flags = MSG_DONTWAIT; + struct uio uio; + int len; + + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); + + /* + * Stop mpa timer. If it expired, then the state has + * changed and we bail since ep_timeout already aborted + * the connection. + */ + stop_ep_timer(ep); + if (state_read(&ep->com) != MPA_REQ_SENT) + return; + + uio.uio_resid = len = 1000000; + uio.uio_td = ep->com.thread; + err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags); + if (err) { + if (err == EWOULDBLOCK) { + start_ep_timer(ep); + return; + } + err = -err; + goto err; + } + + if (ep->com.so->so_rcv.sb_mb) { + printf("%s data after soreceive called! so %p sb_mb %p top %p\n", + __FUNCTION__, ep->com.so, ep->com.so->so_rcv.sb_mb, top); + } + + m = top; + do { + /* + * If we get more than the supported amount of private data + * then we must fail this connection. + */ + if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) { + err = (-EINVAL); + goto err; + } + + /* + * copy the new data into our accumulation buffer. + */ + m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len])); + ep->mpa_pkt_len += m->m_len; + if (!m->m_next) + m = m->m_nextpkt; + else + m = m->m_next; + } while (m); + + m_freem(top); + + /* + * if we don't even have the mpa message, then bail. + */ + if (ep->mpa_pkt_len < sizeof(*mpa)) + return; + mpa = (struct mpa_message *)ep->mpa_pkt; + + /* Validate MPA header. */ + if (mpa->revision != mpa_rev) { + CTR2(KTR_IW_CXGB, "%s bad mpa rev %d", __FUNCTION__, mpa->revision); + err = EPROTO; + goto err; + } + if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) { + CTR2(KTR_IW_CXGB, "%s bad mpa key |%16s|", __FUNCTION__, mpa->key); + err = EPROTO; + goto err; + } + + plen = ntohs(mpa->private_data_size); + + /* + * Fail if there's too much private data. + */ + if (plen > MPA_MAX_PRIVATE_DATA) { + CTR2(KTR_IW_CXGB, "%s plen too big %d", __FUNCTION__, plen); + err = EPROTO; + goto err; + } + + /* + * If plen does not account for pkt size + */ + if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { + CTR2(KTR_IW_CXGB, "%s pkt too big %d", __FUNCTION__, ep->mpa_pkt_len); + err = EPROTO; + goto err; + } + + ep->plen = (u8) plen; + + /* + * If we don't have all the pdata yet, then bail. + * We'll continue process when more data arrives. + */ + if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) + return; + + if (mpa->flags & MPA_REJECT) { + err = ECONNREFUSED; + goto err; + } + + /* + * If we get here we have accumulated the entire mpa + * start reply message including private data. And + * the MPA header is valid. + */ + CTR1(KTR_IW_CXGB, "%s mpa rpl looks good!", __FUNCTION__); + state_set(&ep->com, FPDU_MODE); + ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; + ep->mpa_attr.recv_marker_enabled = markers_enabled; + ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; + ep->mpa_attr.version = mpa_rev; + if (set_tcpinfo(ep)) { + printf("%s set_tcpinfo error\n", __FUNCTION__); + goto err; + } + CTR5(KTR_IW_CXGB, "%s - crc_enabled=%d, recv_marker_enabled=%d, " + "xmit_marker_enabled=%d, version=%d", __FUNCTION__, + ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, + ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); + + attrs.mpa_attr = ep->mpa_attr; + attrs.max_ird = ep->ird; + attrs.max_ord = ep->ord; + attrs.llp_stream_handle = ep; + attrs.next_state = IWCH_QP_STATE_RTS; + + mask = IWCH_QP_ATTR_NEXT_STATE | + IWCH_QP_ATTR_LLP_STREAM_HANDLE | IWCH_QP_ATTR_MPA_ATTR | + IWCH_QP_ATTR_MAX_IRD | IWCH_QP_ATTR_MAX_ORD; + + /* bind QP and TID with INIT_WR */ + err = iwch_modify_qp(ep->com.qp->rhp, + ep->com.qp, mask, &attrs, 1); + if (!err) + goto out; +err: + abort_connection(ep); +out: + connect_reply_upcall(ep, err); + return; +} + +static void +process_mpa_request(struct iwch_ep *ep) +{ + struct mpa_message *mpa; + u16 plen; + int flags = MSG_DONTWAIT; + struct mbuf *top, *m; + int err; + struct uio uio; + int len; + + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); + + /* + * Stop mpa timer. If it expired, then the state has + * changed and we bail since ep_timeout already aborted + * the connection. + */ + stop_ep_timer(ep); + if (state_read(&ep->com) != MPA_REQ_WAIT) + return; + + uio.uio_resid = len = 1000000; + uio.uio_td = ep->com.thread; + err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags); + if (err) { + if (err == EWOULDBLOCK) { + start_ep_timer(ep); + return; + } + err = -err; + goto err; + } + + m = top; + do { + + /* + * If we get more than the supported amount of private data + * then we must fail this connection. + */ + if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) { + CTR2(KTR_IW_CXGB, "%s mpa message too big %d", __FUNCTION__, + ep->mpa_pkt_len + m->m_len); + goto err; + } + + + /* + * Copy the new data into our accumulation buffer. + */ + m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len])); + ep->mpa_pkt_len += m->m_len; + + if (!m->m_next) + m = m->m_nextpkt; + else + m = m->m_next; + } while (m); + + m_freem(top); + + /* + * If we don't even have the mpa message, then bail. + * We'll continue process when more data arrives. + */ + if (ep->mpa_pkt_len < sizeof(*mpa)) { + start_ep_timer(ep); + CTR2(KTR_IW_CXGB, "%s not enough header %d...waiting...", __FUNCTION__, + ep->mpa_pkt_len); + return; + } + mpa = (struct mpa_message *) ep->mpa_pkt; + + /* + * Validate MPA Header. + */ + if (mpa->revision != mpa_rev) { + CTR2(KTR_IW_CXGB, "%s bad mpa rev %d", __FUNCTION__, mpa->revision); + goto err; + } + + if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) { + CTR2(KTR_IW_CXGB, "%s bad mpa key |%16s|", __FUNCTION__, mpa->key); + goto err; + } + + plen = ntohs(mpa->private_data_size); + + /* + * Fail if there's too much private data. + */ + if (plen > MPA_MAX_PRIVATE_DATA) { + CTR2(KTR_IW_CXGB, "%s plen too big %d", __FUNCTION__, plen); + goto err; + } + + /* + * If plen does not account for pkt size + */ + if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { + CTR2(KTR_IW_CXGB, "%s more data after private data %d", __FUNCTION__, + ep->mpa_pkt_len); + goto err; + } + ep->plen = (u8) plen; + + /* + * If we don't have all the pdata yet, then bail. + */ + if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) { + start_ep_timer(ep); + CTR2(KTR_IW_CXGB, "%s more mpa msg to come %d", __FUNCTION__, + ep->mpa_pkt_len); + return; + } + + /* + * If we get here we have accumulated the entire mpa + * start reply message including private data. + */ + ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; + ep->mpa_attr.recv_marker_enabled = markers_enabled; + ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; + ep->mpa_attr.version = mpa_rev; + if (set_tcpinfo(ep)) { + printf("%s set_tcpinfo error\n", __FUNCTION__); + goto err; + } + CTR5(KTR_IW_CXGB, "%s - crc_enabled=%d, recv_marker_enabled=%d, " + "xmit_marker_enabled=%d, version=%d", __FUNCTION__, + ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, + ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); + + state_set(&ep->com, MPA_REQ_RCVD); + + /* drive upcall */ + connect_request_upcall(ep); + return; +err: + abort_connection(ep); + return; +} + +static void +process_peer_close(struct iwch_ep *ep) +{ + struct iwch_qp_attributes attrs; + int disconnect = 1; + int release = 0; + + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); + + mtx_lock(&ep->com.lock); + switch (ep->com.state) { + case MPA_REQ_WAIT: + __state_set(&ep->com, CLOSING); + break; + case MPA_REQ_SENT: + __state_set(&ep->com, CLOSING); + connect_reply_upcall(ep, -ECONNRESET); + break; + case MPA_REQ_RCVD: + + /* + * We're gonna mark this puppy DEAD, but keep + * the reference on it until the ULP accepts or + * rejects the CR. + */ + __state_set(&ep->com, CLOSING); + get_ep(&ep->com); + break; + case MPA_REP_SENT: + __state_set(&ep->com, CLOSING); + break; + case FPDU_MODE: + start_ep_timer(ep); + __state_set(&ep->com, CLOSING); + attrs.next_state = IWCH_QP_STATE_CLOSING; + iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, + IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); + peer_close_upcall(ep); + break; + case ABORTING: + disconnect = 0; + break; + case CLOSING: + __state_set(&ep->com, MORIBUND); + disconnect = 0; + break; + case MORIBUND: + stop_ep_timer(ep); + if (ep->com.cm_id && ep->com.qp) { + attrs.next_state = IWCH_QP_STATE_IDLE; + iwch_modify_qp(ep->com.qp->rhp, ep->com.qp, + IWCH_QP_ATTR_NEXT_STATE, &attrs, 1); + } + close_socket(&ep->com); + close_complete_upcall(ep); + __state_set(&ep->com, DEAD); + release = 1; + disconnect = 0; + break; + case DEAD: + disconnect = 0; + break; + default: + PANIC_IF(1); + } + mtx_unlock(&ep->com.lock); + if (disconnect) + iwch_ep_disconnect(ep, 0, M_NOWAIT); + if (release) + put_ep(&ep->com); + return; +} + +static void +process_conn_error(struct iwch_ep *ep) +{ + struct iwch_qp_attributes attrs; + int ret; + int state; + + state = state_read(&ep->com); + CTR5(KTR_IW_CXGB, "%s ep %p so %p so->so_error %u state %s", __FUNCTION__, ep, ep->com.so, ep->com.so->so_error, states[ep->com.state]); + switch (state) { + case MPA_REQ_WAIT: + stop_ep_timer(ep); + break; + case MPA_REQ_SENT: + stop_ep_timer(ep); + connect_reply_upcall(ep, -ECONNRESET); + break; + case MPA_REP_SENT: + ep->com.rpl_err = ECONNRESET; + CTR1(KTR_IW_CXGB, "waking up ep %p", ep); + break; + case MPA_REQ_RCVD: + + /* + * We're gonna mark this puppy DEAD, but keep + * the reference on it until the ULP accepts or + * rejects the CR. + */ + get_ep(&ep->com); + break; + case MORIBUND: + case CLOSING: + stop_ep_timer(ep); + /*FALLTHROUGH*/ + case FPDU_MODE: + if (ep->com.cm_id && ep->com.qp) { + attrs.next_state = IWCH_QP_STATE_ERROR; + ret = iwch_modify_qp(ep->com.qp->rhp, + ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, + &attrs, 1); + if (ret) + log(LOG_ERR, + "%s - qp <- error failed!\n", + __FUNCTION__); + } + peer_abort_upcall(ep); + break; + case ABORTING: + break; + case DEAD: + CTR2(KTR_IW_CXGB, "%s so_error %d IN DEAD STATE!!!!", __FUNCTION__, + ep->com.so->so_error); + return; + default: + PANIC_IF(1); + break; + } + + if (state != ABORTING) { + close_socket(&ep->com); + state_set(&ep->com, DEAD); + put_ep(&ep->com); + } + return; +} + +static void +process_close_complete(struct iwch_ep *ep) +{ + struct iwch_qp_attributes attrs; + int release = 0; + + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); + PANIC_IF(!ep); + + /* The cm_id may be null if we failed to connect */ + mtx_lock(&ep->com.lock); + switch (ep->com.state) { + case CLOSING: + __state_set(&ep->com, MORIBUND); + break; + case MORIBUND: + stop_ep_timer(ep); + if ((ep->com.cm_id) && (ep->com.qp)) { + attrs.next_state = IWCH_QP_STATE_IDLE; + iwch_modify_qp(ep->com.qp->rhp, + ep->com.qp, + IWCH_QP_ATTR_NEXT_STATE, + &attrs, 1); + } + close_socket(&ep->com); + close_complete_upcall(ep); + __state_set(&ep->com, DEAD); + release = 1; + break; + case ABORTING: + break; + case DEAD: + default: + PANIC_IF(1); + break; + } + mtx_unlock(&ep->com.lock); + if (release) + put_ep(&ep->com); + return; +} + +/* + * T3A does 3 things when a TERM is received: + * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet + * 2) generate an async event on the QP with the TERMINATE opcode + * 3) post a TERMINATE opcde cqe into the associated CQ. + * + * For (1), we save the message in the qp for later consumer consumption. + * For (2), we move the QP into TERMINATE, post a QP event and disconnect. + * For (3), we toss the CQE in cxio_poll_cq(). + * + * terminate() handles case (1)... + */ +static int +terminate(struct t3cdev *tdev, struct mbuf *m, void *ctx) +{ + struct toepcb *toep = (struct toepcb *)ctx; + struct socket *so = toeptoso(toep); + struct iwch_ep *ep = so->so_upcallarg; + + CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); + m_adj(m, sizeof(struct cpl_rdma_terminate)); + CTR2(KTR_IW_CXGB, "%s saving %d bytes of term msg", __FUNCTION__, m->m_len); + m_copydata(m, 0, m->m_len, ep->com.qp->attr.terminate_buffer); + ep->com.qp->attr.terminate_msg_len = m->m_len; + ep->com.qp->attr.is_terminate_local = 0; + return CPL_RET_BUF_DONE; +} + +static int +ec_status(struct t3cdev *tdev, struct mbuf *m, void *ctx) +{ + struct toepcb *toep = (struct toepcb *)ctx; + struct socket *so = toeptoso(toep); + struct cpl_rdma_ec_status *rep = cplhdr(m); + struct iwch_ep *ep; + struct iwch_qp_attributes attrs; + int release = 0; + + ep = so->so_upcallarg; + CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s ec_status %d", __FUNCTION__, ep, ep->com.so, states[ep->com.state], rep->status); + if (!so || !ep) { + panic("bogosity ep %p state %d, so %p state %x\n", ep, ep ? ep->com.state : -1, so, so ? so->so_state : -1); + } + mtx_lock(&ep->com.lock); + switch (ep->com.state) { + case CLOSING: + if (!rep->status) + __state_set(&ep->com, MORIBUND); + else + __state_set(&ep->com, ABORTING); + break; + case MORIBUND: + stop_ep_timer(ep); + if (!rep->status) { + if ((ep->com.cm_id) && (ep->com.qp)) { + attrs.next_state = IWCH_QP_STATE_IDLE; + iwch_modify_qp(ep->com.qp->rhp, + ep->com.qp, + IWCH_QP_ATTR_NEXT_STATE, + &attrs, 1); + } + close_socket(&ep->com); + close_complete_upcall(ep); + __state_set(&ep->com, DEAD); + release = 1; + } + break; + case DEAD: + break; + default: + panic("unknown state: %d\n", ep->com.state); + } + mtx_unlock(&ep->com.lock); + if (rep->status) { + log(LOG_ERR, "%s BAD CLOSE - Aborting tid %u\n", + __FUNCTION__, ep->hwtid); + attrs.next_state = IWCH_QP_STATE_ERROR; + iwch_modify_qp(ep->com.qp->rhp, + ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, + &attrs, 1); + } + if (release) + put_ep(&ep->com); + return CPL_RET_BUF_DONE; +} + +static void +ep_timeout(void *arg) +{ + struct iwch_ep *ep = (struct iwch_ep *)arg; + struct iwch_qp_attributes attrs; + int err = 0; + + mtx_lock(&ep->com.lock); + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); + switch (ep->com.state) { + case MPA_REQ_SENT: + connect_reply_upcall(ep, -ETIMEDOUT); + break; + case MPA_REQ_WAIT: + break; + case CLOSING: + case MORIBUND: + if (ep->com.cm_id && ep->com.qp) + err = 1; + break; + default: + panic("unknown state: %d\n", ep->com.state); + } + __state_set(&ep->com, ABORTING); + mtx_unlock(&ep->com.lock); + if (err){ + attrs.next_state = IWCH_QP_STATE_ERROR; + iwch_modify_qp(ep->com.qp->rhp, + ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, + &attrs, 1); + } + abort_connection(ep); + put_ep(&ep->com); +} + +int +iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) +{ + int err; + struct iwch_ep *ep = to_ep(cm_id); + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); + + if (state_read(&ep->com) == DEAD) { + put_ep(&ep->com); + return (-ECONNRESET); + } + PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD); + if (mpa_rev == 0) { + abort_connection(ep); + } else { + err = send_mpa_reject(ep, pdata, pdata_len); + err = soshutdown(ep->com.so, 3); + } + return 0; +} + +int +iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) +{ + int err; + struct iwch_qp_attributes attrs; + enum iwch_qp_attr_mask mask; + struct iwch_ep *ep = to_ep(cm_id); + struct iwch_dev *h = to_iwch_dev(cm_id->device); + struct iwch_qp *qp = get_qhp(h, conn_param->qpn); + + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); + if (state_read(&ep->com) == DEAD) + return (-ECONNRESET); + + PANIC_IF(state_read(&ep->com) != MPA_REQ_RCVD); + PANIC_IF(!qp); + + if ((conn_param->ord > qp->rhp->attr.max_rdma_read_qp_depth) || + (conn_param->ird > qp->rhp->attr.max_rdma_reads_per_qp)) { + abort_connection(ep); + return (-EINVAL); + } + + cm_id->add_ref(cm_id); + ep->com.cm_id = cm_id; + ep->com.qp = qp; + + ep->com.rpl_err = 0; + ep->com.rpl_done = 0; + ep->ird = conn_param->ird; + ep->ord = conn_param->ord; + CTR3(KTR_IW_CXGB, "%s ird %d ord %d", __FUNCTION__, ep->ird, ep->ord); + get_ep(&ep->com); + + /* bind QP to EP and move to RTS */ + attrs.mpa_attr = ep->mpa_attr; + attrs.max_ird = ep->ord; + attrs.max_ord = ep->ord; + attrs.llp_stream_handle = ep; + attrs.next_state = IWCH_QP_STATE_RTS; + + /* bind QP and TID with INIT_WR */ + mask = IWCH_QP_ATTR_NEXT_STATE | + IWCH_QP_ATTR_LLP_STREAM_HANDLE | + IWCH_QP_ATTR_MPA_ATTR | + IWCH_QP_ATTR_MAX_IRD | + IWCH_QP_ATTR_MAX_ORD; + + err = iwch_modify_qp(ep->com.qp->rhp, + ep->com.qp, mask, &attrs, 1); + + if (err) + goto err; + + err = send_mpa_reply(ep, conn_param->private_data, + conn_param->private_data_len); + if (err) + goto err; + state_set(&ep->com, FPDU_MODE); + established_upcall(ep); + put_ep(&ep->com); + return 0; +err: + ep->com.cm_id = NULL; + ep->com.qp = NULL; + cm_id->rem_ref(cm_id); + put_ep(&ep->com); + return err; +} + +static int init_sock(struct iwch_ep_common *epc) +{ + int err; + struct sockopt sopt; + int on=1; + + epc->so->so_upcall = iwch_so_upcall; + epc->so->so_upcallarg = epc; + epc->so->so_rcv.sb_flags |= SB_UPCALL; + epc->so->so_state |= SS_NBIO; + sopt.sopt_dir = SOPT_SET; + sopt.sopt_level = SOL_SOCKET; + sopt.sopt_name = SO_NO_DDP; + sopt.sopt_val = (caddr_t)&on; + sopt.sopt_valsize = sizeof on; + sopt.sopt_td = NULL; + err = sosetopt(epc->so, &sopt); + if (err) + printf("%s can't set SO_NO_DDP err %d\n", __FUNCTION__, err); + sopt.sopt_dir = SOPT_SET; + sopt.sopt_level = IPPROTO_TCP; + sopt.sopt_name = TCP_NODELAY; + sopt.sopt_val = (caddr_t)&on; + sopt.sopt_valsize = sizeof on; + sopt.sopt_td = NULL; + err = sosetopt(epc->so, &sopt); + if (err) + printf("%s can't set TCP_NODELAY err %d\n", __FUNCTION__, err); + + return 0; +} + +static int +is_loopback_dst(struct iw_cm_id *cm_id) +{ + uint16_t port = cm_id->remote_addr.sin_port; + struct ifaddr *ifa; + + cm_id->remote_addr.sin_port = 0; + ifa = ifa_ifwithaddr((struct sockaddr *)&cm_id->remote_addr); + cm_id->remote_addr.sin_port = port; + return (ifa != NULL); +} + +int +iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) +{ + int err = 0; + struct iwch_dev *h = to_iwch_dev(cm_id->device); + struct iwch_ep *ep; + struct rtentry *rt; + struct toedev *tdev; + + if (is_loopback_dst(cm_id)) { + err = -ENOSYS; + goto out; + } + + ep = alloc_ep(sizeof(*ep), M_NOWAIT); + if (!ep) { + printf("%s - cannot alloc ep.\n", __FUNCTION__); + err = (-ENOMEM); + goto out; + } + callout_init(&ep->timer, TRUE); + ep->plen = conn_param->private_data_len; + if (ep->plen) + memcpy(ep->mpa_pkt + sizeof(struct mpa_message), + conn_param->private_data, ep->plen); + ep->ird = conn_param->ird; + ep->ord = conn_param->ord; + + cm_id->add_ref(cm_id); + ep->com.cm_id = cm_id; + ep->com.qp = get_qhp(h, conn_param->qpn); + ep->com.thread = curthread; + PANIC_IF(!ep->com.qp); + CTR4(KTR_IW_CXGB, "%s qpn 0x%x qp %p cm_id %p", __FUNCTION__, conn_param->qpn, + ep->com.qp, cm_id); + + ep->com.so = cm_id->so; + err = init_sock(&ep->com); + if (err) + goto fail2; + + /* find a route */ + rt = find_route(cm_id->local_addr.sin_addr.s_addr, + cm_id->remote_addr.sin_addr.s_addr, + cm_id->local_addr.sin_port, + cm_id->remote_addr.sin_port, IPTOS_LOWDELAY); + if (!rt) { + printf("%s - cannot find route.\n", __FUNCTION__); + err = EHOSTUNREACH; + goto fail2; + } + + if (!(rt->rt_ifp->if_flags & IFCAP_TOE)) { + printf("%s - interface not TOE capable.\n", __FUNCTION__); + goto fail3; + } + tdev = TOEDEV(rt->rt_ifp); + if (tdev == NULL) { + printf("%s - No toedev for interface.\n", __FUNCTION__); + goto fail3; + } + if (!tdev->tod_can_offload(tdev, ep->com.so)) { + printf("%s - interface cannot offload!.\n", __FUNCTION__); + goto fail3; + } + RTFREE(rt); + + state_set(&ep->com, CONNECTING); + ep->com.local_addr = cm_id->local_addr; + ep->com.remote_addr = cm_id->remote_addr; + err = soconnect(ep->com.so, (struct sockaddr *)&ep->com.remote_addr, + ep->com.thread); + if (!err) + goto out; +fail3: + RTFREE(ep->dst); +fail2: + put_ep(&ep->com); +out: + return err; +} + +int +iwch_create_listen(struct iw_cm_id *cm_id, int backlog) +{ + int err = 0; + struct iwch_listen_ep *ep; + + ep = alloc_ep(sizeof(*ep), M_NOWAIT); + if (!ep) { + printf("%s - cannot alloc ep.\n", __FUNCTION__); + err = ENOMEM; + goto out; + } + CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); + cm_id->add_ref(cm_id); + ep->com.cm_id = cm_id; + ep->backlog = backlog; + ep->com.local_addr = cm_id->local_addr; + ep->com.thread = curthread; + state_set(&ep->com, LISTEN); + + ep->com.so = cm_id->so; + err = init_sock(&ep->com); + if (err) + goto fail; + + err = solisten(ep->com.so, ep->backlog, ep->com.thread); + if (!err) { + cm_id->provider_data = ep; + goto out; + } + close_socket(&ep->com); +fail: + cm_id->rem_ref(cm_id); + put_ep(&ep->com); +out: + return err; +} + +int +iwch_destroy_listen(struct iw_cm_id *cm_id) +{ + struct iwch_listen_ep *ep = to_listen_ep(cm_id); + + CTR2(KTR_IW_CXGB, "%s ep %p", __FUNCTION__, ep); + + state_set(&ep->com, DEAD); + close_socket(&ep->com); + cm_id->rem_ref(cm_id); + put_ep(&ep->com); + return 0; +} + +int +iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, int flags) +{ + int close = 0; + + mtx_lock(&ep->com.lock); + + PANIC_IF(!ep); + PANIC_IF(!ep->com.so); + + CTR5(KTR_IW_CXGB, "%s ep %p so %p state %s, abrupt %d", __FUNCTION__, ep, + ep->com.so, states[ep->com.state], abrupt); + + if (ep->com.state == DEAD) { + CTR2(KTR_IW_CXGB, "%s already dead ep %p", __FUNCTION__, ep); + goto out; + } + + if (abrupt) { + if (ep->com.state != ABORTING) { + ep->com.state = ABORTING; + close = 1; + } + goto out; + } + + switch (ep->com.state) { + case MPA_REQ_WAIT: + case MPA_REQ_SENT: + case MPA_REQ_RCVD: + case MPA_REP_SENT: + case FPDU_MODE: + start_ep_timer(ep); + ep->com.state = CLOSING; + close = 1; + break; + case CLOSING: + ep->com.state = MORIBUND; + close = 1; + break; + case MORIBUND: + case ABORTING: + break; + default: + panic("unknown state: %d\n", ep->com.state); + break; + } +out: + mtx_unlock(&ep->com.lock); + if (close) { + if (abrupt) + abort_connection(ep); + else + shutdown_socket(&ep->com); + } + return 0; +} + +static void +process_data(struct iwch_ep *ep) +{ + struct sockaddr_in *local, *remote; + + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); + + switch (state_read(&ep->com)) { + case MPA_REQ_SENT: + process_mpa_reply(ep); + break; + case MPA_REQ_WAIT: + + /* + * XXX + * Set local and remote addrs here because when we + * dequeue the newly accepted socket, they aren't set + * yet in the pcb! + */ + in_getsockaddr(ep->com.so, (struct sockaddr **)&local); + in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote); + CTR3(KTR_IW_CXGB, "%s local %s remote %s", __FUNCTION__, + inet_ntoa(local->sin_addr), + inet_ntoa(remote->sin_addr)); + ep->com.local_addr = *local; + ep->com.remote_addr = *remote; + free(local, M_SONAME); + free(remote, M_SONAME); + process_mpa_request(ep); + break; + default: + if (ep->com.so->so_rcv.sb_cc) + printf("%s Unexpected streaming data." + " ep %p state %d so %p so_state %x so_rcv.sb_cc %u so_rcv.sb_mb %p\n", + __FUNCTION__, ep, state_read(&ep->com), ep->com.so, ep->com.so->so_state, + ep->com.so->so_rcv.sb_cc, ep->com.so->so_rcv.sb_mb); + break; + } + return; +} + +static void +process_connected(struct iwch_ep *ep) +{ + CTR4(KTR_IW_CXGB, "%s ep %p so %p state %s", __FUNCTION__, ep, ep->com.so, states[ep->com.state]); + if ((ep->com.so->so_state & SS_ISCONNECTED) && !ep->com.so->so_error) { + send_mpa_req(ep); + } else { + connect_reply_upcall(ep, -ep->com.so->so_error); + close_socket(&ep->com); + state_set(&ep->com, DEAD); + put_ep(&ep->com); + } +} + +static struct socket * +dequeue_socket(struct socket *head, struct sockaddr_in **remote, struct iwch_ep *child_ep) +{ + struct socket *so; + + ACCEPT_LOCK(); + so = TAILQ_FIRST(&head->so_comp); + if (!so) { + ACCEPT_UNLOCK(); + return NULL; + } + TAILQ_REMOVE(&head->so_comp, so, so_list); + head->so_qlen--; + SOCK_LOCK(so); + so->so_qstate &= ~SQ_COMP; + so->so_head = NULL; + soref(so); + so->so_rcv.sb_flags |= SB_UPCALL; + so->so_state |= SS_NBIO; + so->so_upcall = iwch_so_upcall; + so->so_upcallarg = child_ep; + PANIC_IF(!(so->so_state & SS_ISCONNECTED)); + PANIC_IF(so->so_error); + SOCK_UNLOCK(so); + ACCEPT_UNLOCK(); + soaccept(so, (struct sockaddr **)remote); + return so; +} + +static void +process_newconn(struct iwch_ep *parent_ep) +{ + struct socket *child_so; + struct iwch_ep *child_ep; + struct sockaddr_in *remote; + + CTR3(KTR_IW_CXGB, "%s parent ep %p so %p", __FUNCTION__, parent_ep, parent_ep->com.so); + child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT); + if (!child_ep) { + log(LOG_ERR, "%s - failed to allocate ep entry!\n", + __FUNCTION__); + return; + } + child_so = dequeue_socket(parent_ep->com.so, &remote, child_ep); + if (!child_so) { + log(LOG_ERR, "%s - failed to dequeue child socket!\n", + __FUNCTION__); + __free_ep(&child_ep->com); + return; + } + CTR3(KTR_IW_CXGB, "%s remote addr %s port %d", __FUNCTION__, + inet_ntoa(remote->sin_addr), ntohs(remote->sin_port)); + child_ep->com.so = child_so; + child_ep->com.cm_id = NULL; + child_ep->com.thread = parent_ep->com.thread; + child_ep->parent_ep = parent_ep; + free(remote, M_SONAME); + get_ep(&parent_ep->com); + child_ep->parent_ep = parent_ep; + callout_init(&child_ep->timer, TRUE); + state_set(&child_ep->com, MPA_REQ_WAIT); + start_ep_timer(child_ep); + + /* maybe the request has already been queued up on the socket... */ + process_mpa_request(child_ep); +} + +static void +iwch_so_upcall(struct socket *so, void *arg, int waitflag) +{ + struct iwch_ep *ep = arg; + + CTR6(KTR_IW_CXGB, "%s so %p so state %x ep %p ep state(%d)=%s", __FUNCTION__, so, so->so_state, ep, ep->com.state, states[ep->com.state]); + mtx_lock(&req_lock); + if (ep && ep->com.so && !ep->com.entry.tqe_prev) { + get_ep(&ep->com); + TAILQ_INSERT_TAIL(&req_list, &ep->com, entry); + taskqueue_enqueue(iw_cxgb_taskq, &iw_cxgb_task); + } + mtx_unlock(&req_lock); +} + +static void +process_socket_event(struct iwch_ep *ep) +{ + int state = state_read(&ep->com); + struct socket *so = ep->com.so; + + CTR6(KTR_IW_CXGB, "%s so %p so state %x ep %p ep state(%d)=%s", __FUNCTION__, so, so->so_state, ep, ep->com.state, states[ep->com.state]); + if (state == CONNECTING) { + process_connected(ep); + return; + } + + if (state == LISTEN) { + process_newconn(ep); + return; + } + + /* connection error */ + if (so->so_error) { + process_conn_error(ep); + return; + } + + /* peer close */ + if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && state < CLOSING) { + process_peer_close(ep); + return; + } + + /* close complete */ + if (so->so_state & (SS_ISDISCONNECTED)) { + process_close_complete(ep); + return; + } + + /* rx data */ + process_data(ep); + return; +} + +static void +process_req(void *ctx, int pending) +{ + struct iwch_ep_common *epc; + + CTR1(KTR_IW_CXGB, "%s enter", __FUNCTION__); + mtx_lock(&req_lock); + while (!TAILQ_EMPTY(&req_list)) { + epc = TAILQ_FIRST(&req_list); + TAILQ_REMOVE(&req_list, epc, entry); + epc->entry.tqe_prev = NULL; + mtx_unlock(&req_lock); + if (epc->so) + process_socket_event((struct iwch_ep *)epc); + put_ep(epc); + mtx_lock(&req_lock); + } + mtx_unlock(&req_lock); +} + +int +iwch_cm_init(void) +{ + TAILQ_INIT(&req_list); + mtx_init(&req_lock, "iw_cxgb req_list lock", NULL, MTX_DEF); + iw_cxgb_taskq = taskqueue_create("iw_cxgb_taskq", M_NOWAIT, + taskqueue_thread_enqueue, &iw_cxgb_taskq); + if (iw_cxgb_taskq == NULL) { + printf("failed to allocate iw_cxgb taskqueue\n"); + return (ENOMEM); + } + taskqueue_start_threads(&iw_cxgb_taskq, 1, PI_NET, "iw_cxgb taskq"); + TASK_INIT(&iw_cxgb_task, 0, process_req, NULL); + t3tom_register_cpl_handler(CPL_RDMA_TERMINATE, terminate); + t3tom_register_cpl_handler(CPL_RDMA_EC_STATUS, ec_status); + return 0; +} + +void +iwch_cm_term(void) +{ + t3tom_register_cpl_handler(CPL_RDMA_TERMINATE, NULL); + t3tom_register_cpl_handler(CPL_RDMA_EC_STATUS, NULL); + taskqueue_drain(iw_cxgb_taskq, &iw_cxgb_task); + taskqueue_free(iw_cxgb_taskq); +} + diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h new file mode 100644 index 0000000..4250be3 --- /dev/null +++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h @@ -0,0 +1,249 @@ +/************************************************************************** + +Copyright (c) 2007, 2008 Chelsio Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Neither the name of the Chelsio Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +$FreeBSD$ + +***************************************************************************/ + +#ifndef _IWCH_CM_H_ +#define _IWCH_CM_H_ +#include <contrib/rdma/ib_verbs.h> +#include <contrib/rdma/iw_cm.h> +#include <sys/refcount.h> +#include <sys/condvar.h> +#include <sys/proc.h> + + +#define MPA_KEY_REQ "MPA ID Req Frame" +#define MPA_KEY_REP "MPA ID Rep Frame" + +#define MPA_MAX_PRIVATE_DATA 256 +#define MPA_REV o0 /* XXX - amso1100 uses rev 0 ! */ +#define MPA_REJECT 0x20 +#define MPA_CRC 0x40 +#define MPA_MARKERS 0x80 +#define MPA_FLAGS_MASK 0xE0 + +#define put_ep(ep) { \ + CTR4(KTR_IW_CXGB, "put_ep (via %s:%u) ep %p refcnt %d\n", __FUNCTION__, __LINE__, \ + ep, atomic_load_acq_int(&((ep)->refcount))); \ + if (refcount_release(&((ep)->refcount))) \ + __free_ep(ep); \ +} + +#define get_ep(ep) { \ + CTR4(KTR_IW_CXGB, "get_ep (via %s:%u) ep %p, refcnt %d\n", __FUNCTION__, __LINE__, \ + ep, atomic_load_acq_int(&((ep)->refcount))); \ + refcount_acquire(&((ep)->refcount)); \ +} + +struct mpa_message { + u8 key[16]; + u8 flags; + u8 revision; + __be16 private_data_size; + u8 private_data[0]; +}; + +struct terminate_message { + u8 layer_etype; + u8 ecode; + __be16 hdrct_rsvd; + u8 len_hdrs[0]; +}; + +#define TERM_MAX_LENGTH (sizeof(struct terminate_message) + 2 + 18 + 28) + +enum iwch_layers_types { + LAYER_RDMAP = 0x00, + LAYER_DDP = 0x10, + LAYER_MPA = 0x20, + RDMAP_LOCAL_CATA = 0x00, + RDMAP_REMOTE_PROT = 0x01, + RDMAP_REMOTE_OP = 0x02, + DDP_LOCAL_CATA = 0x00, + DDP_TAGGED_ERR = 0x01, + DDP_UNTAGGED_ERR = 0x02, + DDP_LLP = 0x03 +}; + +enum iwch_rdma_ecodes { + RDMAP_INV_STAG = 0x00, + RDMAP_BASE_BOUNDS = 0x01, + RDMAP_ACC_VIOL = 0x02, + RDMAP_STAG_NOT_ASSOC = 0x03, + RDMAP_TO_WRAP = 0x04, + RDMAP_INV_VERS = 0x05, + RDMAP_INV_OPCODE = 0x06, + RDMAP_STREAM_CATA = 0x07, + RDMAP_GLOBAL_CATA = 0x08, + RDMAP_CANT_INV_STAG = 0x09, + RDMAP_UNSPECIFIED = 0xff +}; + +enum iwch_ddp_ecodes { + DDPT_INV_STAG = 0x00, + DDPT_BASE_BOUNDS = 0x01, + DDPT_STAG_NOT_ASSOC = 0x02, + DDPT_TO_WRAP = 0x03, + DDPT_INV_VERS = 0x04, + DDPU_INV_QN = 0x01, + DDPU_INV_MSN_NOBUF = 0x02, + DDPU_INV_MSN_RANGE = 0x03, + DDPU_INV_MO = 0x04, + DDPU_MSG_TOOBIG = 0x05, + DDPU_INV_VERS = 0x06 +}; + +enum iwch_mpa_ecodes { + MPA_CRC_ERR = 0x02, + MPA_MARKER_ERR = 0x03 +}; + +enum iwch_ep_state { + IDLE = 0, + LISTEN, + CONNECTING, + MPA_REQ_WAIT, + MPA_REQ_SENT, + MPA_REQ_RCVD, + MPA_REP_SENT, + FPDU_MODE, + ABORTING, + CLOSING, + MORIBUND, + DEAD, +}; + +enum iwch_ep_flags { + PEER_ABORT_IN_PROGRESS = (1 << 0), + ABORT_REQ_IN_PROGRESS = (1 << 1), +}; + +struct iwch_ep_common { + TAILQ_ENTRY(iwch_ep_common) entry; + struct iw_cm_id *cm_id; + struct iwch_qp *qp; + struct t3cdev *tdev; + enum iwch_ep_state state; + u_int refcount; + struct cv waitq; + struct mtx lock; + struct sockaddr_in local_addr; + struct sockaddr_in remote_addr; + int rpl_err; + int rpl_done; + struct thread *thread; + struct socket *so; +}; + +struct iwch_listen_ep { + struct iwch_ep_common com; + unsigned int stid; + int backlog; +}; + +struct iwch_ep { + struct iwch_ep_common com; + struct iwch_ep *parent_ep; + struct callout timer; + unsigned int atid; + u32 hwtid; + u32 snd_seq; + u32 rcv_seq; + struct l2t_entry *l2t; + struct rtentry *dst; + struct mbuf *mpa_mbuf; + struct iwch_mpa_attributes mpa_attr; + unsigned int mpa_pkt_len; + u8 mpa_pkt[sizeof(struct mpa_message) + MPA_MAX_PRIVATE_DATA]; + u8 tos; + u16 emss; + u16 plen; + u32 ird; + u32 ord; + u32 flags; +}; + +static inline struct iwch_ep *to_ep(struct iw_cm_id *cm_id) +{ + return cm_id->provider_data; +} + +static inline struct iwch_listen_ep *to_listen_ep(struct iw_cm_id *cm_id) +{ + return cm_id->provider_data; +} + +static inline int compute_wscale(int win) +{ + int wscale = 0; + + while (wscale < 14 && (65535<<wscale) < win) + wscale++; + return wscale; +} + +static __inline void +iwch_wait(struct cv *cv, struct mtx *lock, int *rpl_done) +{ + mtx_lock(lock); + if (!*rpl_done) { + CTR0(KTR_IW_CXGB, "sleeping for rpl_done\n"); + cv_wait_unlock(cv, lock); + } + CTR1(KTR_IW_CXGB, "*rpl_done=%d\n", *rpl_done); +} + +static __inline void +iwch_wakeup(struct cv *cv, struct mtx *lock, int *rpl_done) +{ + mtx_lock(lock); + *rpl_done=1; + CTR0(KTR_IW_CXGB, "wakeup for rpl_done\n"); + cv_broadcast(cv); + mtx_unlock(lock); +} + +/* CM prototypes */ + +int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); +int iwch_create_listen(struct iw_cm_id *cm_id, int backlog); +int iwch_destroy_listen(struct iw_cm_id *cm_id); +int iwch_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len); +int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); +int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, int flags); +int iwch_quiesce_tid(struct iwch_ep *ep); +int iwch_resume_tid(struct iwch_ep *ep); +void __free_ep(struct iwch_ep_common *ep); +void iwch_rearp(struct iwch_ep *ep); +int iwch_ep_redirect(void *ctx, struct rtentry *old, struct rtentry *new, struct l2t_entry *l2t); + +int iwch_cm_init(void); +void iwch_cm_term(void); + +#endif /* _IWCH_CM_H_ */ diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cq.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cq.c new file mode 100644 index 0000000..c6836d5 --- /dev/null +++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_cq.c @@ -0,0 +1,276 @@ + +/************************************************************************** + +Copyright (c) 2007, Chelsio Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Neither the name of the Chelsio Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +***************************************************************************/ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/bus.h> +#include <sys/module.h> +#include <sys/pciio.h> +#include <sys/conf.h> +#include <machine/bus.h> +#include <machine/resource.h> +#include <sys/bus_dma.h> +#include <sys/rman.h> +#include <sys/ioccom.h> +#include <sys/mbuf.h> +#include <sys/mutex.h> +#include <sys/rwlock.h> +#include <sys/linker.h> +#include <sys/firmware.h> +#include <sys/socket.h> +#include <sys/sockio.h> +#include <sys/smp.h> +#include <sys/sysctl.h> +#include <sys/syslog.h> +#include <sys/queue.h> +#include <sys/taskqueue.h> +#include <sys/proc.h> +#include <sys/queue.h> +#include <sys/libkern.h> + +#include <netinet/in.h> +#include <contrib/rdma/ib_verbs.h> +#include <contrib/rdma/ib_umem.h> +#include <contrib/rdma/ib_user_verbs.h> + + +#ifdef CONFIG_DEFINED +#include <cxgb_include.h> +#include <ulp/iw_cxgb/iw_cxgb_wr.h> +#include <ulp/iw_cxgb/iw_cxgb_hal.h> +#include <ulp/iw_cxgb/iw_cxgb_provider.h> +#include <ulp/iw_cxgb/iw_cxgb_cm.h> +#include <ulp/iw_cxgb/iw_cxgb.h> +#include <ulp/iw_cxgb/iw_cxgb_resource.h> +#include <ulp/iw_cxgb/iw_cxgb_user.h> +#else +#include <dev/cxgb/cxgb_include.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_user.h> +#endif + +/* + * Get one cq entry from cxio and map it to openib. + * + * Returns: + * 0 cqe returned + * -ENOBUFS EMPTY; + * -EAGAIN caller must try again + * any other neg errno fatal error + */ +static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp, + struct ib_wc *wc) +{ + struct iwch_qp *qhp = NULL; + struct t3_cqe cqe, *rd_cqe; + struct t3_wq *wq; + u32 credit = 0; + u8 cqe_flushed; + u64 cookie; + int ret = 1; + + rd_cqe = cxio_next_cqe(&chp->cq); + + if (!rd_cqe) + return 0; + + qhp = get_qhp(rhp, CQE_QPID(*rd_cqe)); + if (!qhp) + wq = NULL; + else { + mtx_lock(&qhp->lock); + wq = &(qhp->wq); + } + ret = cxio_poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, + &credit); + if (t3a_device(chp->rhp) && credit) { + CTR3(KTR_IW_CXGB, "%s updating %d cq credits on id %d", __FUNCTION__, + credit, chp->cq.cqid); + cxio_hal_cq_op(&rhp->rdev, &chp->cq, CQ_CREDIT_UPDATE, credit); + } + + if (ret) { + ret = -EAGAIN; + goto out; + } + ret = 1; + + wc->wr_id = cookie; + wc->qp = &qhp->ibqp; + wc->vendor_err = CQE_STATUS(cqe); + + CTR4(KTR_IW_CXGB, "iwch_poll_cq_one qpid 0x%x type %d opcode %d status 0x%x", + CQE_QPID(cqe), CQE_TYPE(cqe), + CQE_OPCODE(cqe), CQE_STATUS(cqe)); + CTR3(KTR_IW_CXGB, "wrid hi 0x%x lo 0x%x cookie 0x%llx", + CQE_WRID_HI(cqe), CQE_WRID_LOW(cqe), (unsigned long long) cookie); + + if (CQE_TYPE(cqe) == 0) { + if (!CQE_STATUS(cqe)) + wc->byte_len = CQE_LEN(cqe); + else + wc->byte_len = 0; + wc->opcode = IB_WC_RECV; + } else { + switch (CQE_OPCODE(cqe)) { + case T3_RDMA_WRITE: + wc->opcode = IB_WC_RDMA_WRITE; + break; + case T3_READ_REQ: + wc->opcode = IB_WC_RDMA_READ; + wc->byte_len = CQE_LEN(cqe); + break; + case T3_SEND: + case T3_SEND_WITH_SE: + wc->opcode = IB_WC_SEND; + break; + case T3_BIND_MW: + wc->opcode = IB_WC_BIND_MW; + break; + + /* these aren't supported yet */ + case T3_SEND_WITH_INV: + case T3_SEND_WITH_SE_INV: + case T3_LOCAL_INV: + case T3_FAST_REGISTER: + default: + log(LOG_ERR, "Unexpected opcode %d " + "in the CQE received for QPID=0x%0x\n", + CQE_OPCODE(cqe), CQE_QPID(cqe)); + ret = -EINVAL; + goto out; + } + } + + if (cqe_flushed) + wc->status = IB_WC_WR_FLUSH_ERR; + else { + + switch (CQE_STATUS(cqe)) { + case TPT_ERR_SUCCESS: + wc->status = IB_WC_SUCCESS; + break; + case TPT_ERR_STAG: + wc->status = IB_WC_LOC_ACCESS_ERR; + break; + case TPT_ERR_PDID: + wc->status = IB_WC_LOC_PROT_ERR; + break; + case TPT_ERR_QPID: + case TPT_ERR_ACCESS: + wc->status = IB_WC_LOC_ACCESS_ERR; + break; + case TPT_ERR_WRAP: + wc->status = IB_WC_GENERAL_ERR; + break; + case TPT_ERR_BOUND: + wc->status = IB_WC_LOC_LEN_ERR; + break; + case TPT_ERR_INVALIDATE_SHARED_MR: + case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND: + wc->status = IB_WC_MW_BIND_ERR; + break; + case TPT_ERR_CRC: + case TPT_ERR_MARKER: + case TPT_ERR_PDU_LEN_ERR: + case TPT_ERR_OUT_OF_RQE: + case TPT_ERR_DDP_VERSION: + case TPT_ERR_RDMA_VERSION: + case TPT_ERR_DDP_QUEUE_NUM: + case TPT_ERR_MSN: + case TPT_ERR_TBIT: + case TPT_ERR_MO: + case TPT_ERR_MSN_RANGE: + case TPT_ERR_IRD_OVERFLOW: + case TPT_ERR_OPCODE: + wc->status = IB_WC_FATAL_ERR; + break; + case TPT_ERR_SWFLUSH: + wc->status = IB_WC_WR_FLUSH_ERR; + break; + default: + log(LOG_ERR, "Unexpected cqe_status 0x%x for " + "QPID=0x%0x\n", CQE_STATUS(cqe), CQE_QPID(cqe)); + ret = -EINVAL; + } + } +out: + if (wq) + mtx_unlock(&qhp->lock); + return ret; +} + +int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) +{ + struct iwch_dev *rhp; + struct iwch_cq *chp; + int npolled; + int err = 0; + + chp = to_iwch_cq(ibcq); + rhp = chp->rhp; + + mtx_lock(&chp->lock); + for (npolled = 0; npolled < num_entries; ++npolled) { +#ifdef DEBUG + int i=0; +#endif + + /* + * Because T3 can post CQEs that are _not_ associated + * with a WR, we might have to poll again after removing + * one of these. + */ + do { + err = iwch_poll_cq_one(rhp, chp, wc + npolled); +#ifdef DEBUG + BUG_ON(++i > 1000); +#endif + } while (err == -EAGAIN); + if (err <= 0) + break; + } + mtx_unlock(&chp->lock); + + if (err < 0) { + return err; + } else { + return npolled; + } +} + diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_dbg.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_dbg.c new file mode 100644 index 0000000..4cd7ca3 --- /dev/null +++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_dbg.c @@ -0,0 +1,255 @@ + +/************************************************************************** + +Copyright (c) 2007, Chelsio Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Neither the name of the Chelsio Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +***************************************************************************/ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/bus.h> +#include <sys/module.h> +#include <sys/pciio.h> +#include <sys/conf.h> +#include <machine/bus.h> +#include <machine/resource.h> +#include <sys/bus_dma.h> +#include <sys/rman.h> +#include <sys/ioccom.h> +#include <sys/mbuf.h> +#include <sys/mutex.h> +#include <sys/rwlock.h> +#include <sys/linker.h> +#include <sys/firmware.h> +#include <sys/socket.h> +#include <sys/sockio.h> +#include <sys/smp.h> +#include <sys/sysctl.h> +#include <sys/syslog.h> +#include <sys/queue.h> +#include <sys/taskqueue.h> +#include <sys/proc.h> +#include <sys/queue.h> +#include <sys/libkern.h> + +#include <netinet/in.h> + +#include <contrib/rdma/ib_verbs.h> +#include <contrib/rdma/ib_umem.h> +#include <contrib/rdma/ib_user_verbs.h> + +#ifdef DEBUG +#ifdef CONFIG_DEFINED +#include <cxgb_include.h> +#include <ulp/iw_cxgb/iw_cxgb_wr.h> +#include <ulp/iw_cxgb/iw_cxgb_hal.h> +#include <ulp/iw_cxgb/iw_cxgb_provider.h> +#include <ulp/iw_cxgb/iw_cxgb_cm.h> +#include <ulp/iw_cxgb/iw_cxgb.h> +#include <ulp/iw_cxgb/iw_cxgb_resource.h> +#include <ulp/iw_cxgb/iw_cxgb_user.h> +#else +#include <dev/cxgb/cxgb_include.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_user.h> +#endif + +void cxio_dump_tpt(struct cxio_rdev *rdev, uint32_t stag) +{ + struct ch_mem_range *m; + u64 *data; + int rc; + int size = 32; + + m = kmalloc(sizeof(*m) + size, M_NOWAIT); + if (!m) { + CTR1(KTR_IW_CXGB, "%s couldn't allocate memory.", __FUNCTION__); + return; + } + m->mem_id = MEM_PMRX; + m->addr = (stag>>8) * 32 + rdev->rnic_info.tpt_base; + m->len = size; + CTR3(KTR_IW_CXGB, "%s TPT addr 0x%x len %d", __FUNCTION__, m->addr, m->len); + rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m); + if (rc) { + CTR2(KTR_IW_CXGB, "%s toectl returned error %d", __FUNCTION__, rc); + free(m, M_DEVBUF); + return; + } + + data = (u64 *)m->buf; + while (size > 0) { + CTR2(KTR_IW_CXGB, "TPT %08x: %016llx", m->addr, (unsigned long long) *data); + size -= 8; + data++; + m->addr += 8; + } + free(m, M_DEVBUF); +} + +void cxio_dump_pbl(struct cxio_rdev *rdev, uint32_t pbl_addr, uint32_t len, u8 shift) +{ + struct ch_mem_range *m; + u64 *data; + int rc; + int size, npages; + + shift += 12; + npages = (len + (1ULL << shift) - 1) >> shift; + size = npages * sizeof(u64); + + m = kmalloc(sizeof(*m) + size, M_NOWAIT); + if (!m) { + CTR1(KTR_IW_CXGB, "%s couldn't allocate memory.", __FUNCTION__); + return; + } + m->mem_id = MEM_PMRX; + m->addr = pbl_addr; + m->len = size; + CTR4(KTR_IW_CXGB, "%s PBL addr 0x%x len %d depth %d", + __FUNCTION__, m->addr, m->len, npages); + rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m); + if (rc) { + CTR2(KTR_IW_CXGB, "%s toectl returned error %d", __FUNCTION__, rc); + free(m, M_DEVBUF); + return; + } + + data = (u64 *)m->buf; + while (size > 0) { + CTR2(KTR_IW_CXGB, "PBL %08x: %016llx", m->addr, (unsigned long long) *data); + size -= 8; + data++; + m->addr += 8; + } + free(m, M_DEVBUF); +} + +void cxio_dump_wqe(union t3_wr *wqe) +{ + uint64_t *data = (uint64_t *)wqe; + uint32_t size = (uint32_t)(be64_to_cpu(*data) & 0xff); + + if (size == 0) + size = 8; + while (size > 0) { + CTR2(KTR_IW_CXGB, "WQE %p: %016llx", data, + (unsigned long long) be64_to_cpu(*data)); + size--; + data++; + } +} + +void cxio_dump_wce(struct t3_cqe *wce) +{ + uint64_t *data = (uint64_t *)wce; + int size = sizeof(*wce); + + while (size > 0) { + CTR2(KTR_IW_CXGB, "WCE %p: %016llx", data, + (unsigned long long) be64_to_cpu(*data)); + size -= 8; + data++; + } +} + +void cxio_dump_rqt(struct cxio_rdev *rdev, uint32_t hwtid, int nents) +{ + struct ch_mem_range *m; + int size = nents * 64; + u64 *data; + int rc; + + m = kmalloc(sizeof(*m) + size, M_NOWAIT); + if (!m) { + CTR1(KTR_IW_CXGB, "%s couldn't allocate memory.", __FUNCTION__); + return; + } + m->mem_id = MEM_PMRX; + m->addr = ((hwtid)<<10) + rdev->rnic_info.rqt_base; + m->len = size; + CTR3(KTR_IW_CXGB, "%s RQT addr 0x%x len %d", __FUNCTION__, m->addr, m->len); + rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m); + if (rc) { + CTR2(KTR_IW_CXGB, "%s toectl returned error %d", __FUNCTION__, rc); + free(m, M_DEVBUF); + return; + } + + data = (u64 *)m->buf; + while (size > 0) { + CTR2(KTR_IW_CXGB, "RQT %08x: %016llx", m->addr, (unsigned long long) *data); + size -= 8; + data++; + m->addr += 8; + } + free(m, M_DEVBUF); +} + +void cxio_dump_tcb(struct cxio_rdev *rdev, uint32_t hwtid) +{ + struct ch_mem_range *m; + int size = TCB_SIZE; + uint32_t *data; + int rc; + + m = kmalloc(sizeof(*m) + size, M_NOWAIT); + if (!m) { + CTR1(KTR_IW_CXGB, "%s couldn't allocate memory.", __FUNCTION__); + return; + } + m->mem_id = MEM_CM; + m->addr = hwtid * size; + m->len = size; + CTR3(KTR_IW_CXGB, "%s TCB %d len %d", __FUNCTION__, m->addr, m->len); + rc = rdev->t3cdev_p->ctl(rdev->t3cdev_p, RDMA_GET_MEM, m); + if (rc) { + CTR2(KTR_IW_CXGB, "%s toectl returned error %d", __FUNCTION__, rc); + free(m, M_DEVBUF); + return; + } + + data = (uint32_t *)m->buf; + while (size > 0) { + printf("%2u: %08x %08x %08x %08x %08x %08x %08x %08x\n", + m->addr, + *(data+2), *(data+3), *(data),*(data+1), + *(data+6), *(data+7), *(data+4), *(data+5)); + size -= 32; + data += 8; + m->addr += 32; + } + free(m, M_DEVBUF); +} +#endif diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ev.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ev.c new file mode 100644 index 0000000..2e81547 --- /dev/null +++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_ev.c @@ -0,0 +1,265 @@ +/************************************************************************** + +Copyright (c) 2007, Chelsio Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Neither the name of the Chelsio Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +***************************************************************************/ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/bus.h> +#include <sys/module.h> +#include <sys/pciio.h> +#include <sys/conf.h> +#include <machine/bus.h> +#include <machine/resource.h> +#include <sys/bus_dma.h> +#include <sys/rman.h> +#include <sys/ioccom.h> +#include <sys/mbuf.h> +#include <sys/mutex.h> +#include <sys/rwlock.h> +#include <sys/linker.h> +#include <sys/firmware.h> +#include <sys/socket.h> +#include <sys/sockio.h> +#include <sys/smp.h> +#include <sys/sysctl.h> +#include <sys/syslog.h> +#include <sys/queue.h> +#include <sys/taskqueue.h> +#include <sys/proc.h> +#include <sys/queue.h> +#include <sys/libkern.h> + +#include <netinet/in.h> + +#include <contrib/rdma/ib_verbs.h> +#include <contrib/rdma/ib_umem.h> +#include <contrib/rdma/ib_user_verbs.h> + + +#ifdef CONFIG_DEFINED +#include <cxgb_include.h> +#include <ulp/iw_cxgb/iw_cxgb_wr.h> +#include <ulp/iw_cxgb/iw_cxgb_hal.h> +#include <ulp/iw_cxgb/iw_cxgb_provider.h> +#include <ulp/iw_cxgb/iw_cxgb_cm.h> +#include <ulp/iw_cxgb/iw_cxgb.h> +#include <ulp/iw_cxgb/iw_cxgb_resource.h> +#include <ulp/iw_cxgb/iw_cxgb_user.h> +#else +#include <dev/cxgb/cxgb_include.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_user.h> +#endif + +static void +post_qp_event(struct iwch_dev *rnicp, struct iwch_qp *qhp, struct iwch_cq *chp, + struct respQ_msg_t *rsp_msg, + enum ib_event_type ib_event, + int send_term) +{ + struct ib_event event; + struct iwch_qp_attributes attrs; + + if ((qhp->attr.state == IWCH_QP_STATE_ERROR) || + (qhp->attr.state == IWCH_QP_STATE_TERMINATE)) { + CTR4(KTR_IW_CXGB, "%s AE received after RTS - " + "qp state %d qpid 0x%x status 0x%x", __FUNCTION__, + qhp->attr.state, qhp->wq.qpid, CQE_STATUS(rsp_msg->cqe)); + return; + } + + log(LOG_ERR, "%s - AE qpid 0x%x opcode %d status 0x%x " + "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __FUNCTION__, + CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe), + CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe), + CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe)); + + + event.event = ib_event; + event.device = chp->ibcq.device; + if (ib_event == IB_EVENT_CQ_ERR) + event.element.cq = &chp->ibcq; + else + event.element.qp = &qhp->ibqp; + + if (qhp->ibqp.event_handler) + (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context); + + if (qhp->attr.state == IWCH_QP_STATE_RTS) { + attrs.next_state = IWCH_QP_STATE_TERMINATE; + iwch_modify_qp(qhp->rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, + &attrs, 1); + if (send_term) + iwch_post_terminate(qhp, rsp_msg); + } +} + +void +iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct mbuf *m) +{ + struct iwch_dev *rnicp; + struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) m->m_data; + struct iwch_cq *chp; + struct iwch_qp *qhp; + u32 cqid = RSPQ_CQID(rsp_msg); + + rnicp = (struct iwch_dev *) rdev_p->ulp; + mtx_lock(&rnicp->lock); + chp = get_chp(rnicp, cqid); + qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe)); + if (!chp || !qhp) { + log(LOG_ERR,"BAD AE cqid 0x%x qpid 0x%x opcode %d " + "status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x \n", + cqid, CQE_QPID(rsp_msg->cqe), + CQE_OPCODE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe), + CQE_TYPE(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe), + CQE_WRID_LOW(rsp_msg->cqe)); + mtx_unlock(&rnicp->lock); + goto out; + } + iwch_qp_add_ref(&qhp->ibqp); + mtx_lock(&chp->lock); + ++chp->refcnt; + mtx_unlock(&chp->lock); + mtx_unlock(&rnicp->lock); + + /* + * 1) completion of our sending a TERMINATE. + * 2) incoming TERMINATE message. + */ + if ((CQE_OPCODE(rsp_msg->cqe) == T3_TERMINATE) && + (CQE_STATUS(rsp_msg->cqe) == 0)) { + if (SQ_TYPE(rsp_msg->cqe)) { + CTR3(KTR_IW_CXGB, "%s QPID 0x%x ep %p disconnecting", + __FUNCTION__, qhp->wq.qpid, qhp->ep); + iwch_ep_disconnect(qhp->ep, 0, M_NOWAIT); + } else { + CTR2(KTR_IW_CXGB, "%s post REQ_ERR AE QPID 0x%x", __FUNCTION__, + qhp->wq.qpid); + post_qp_event(rnicp, qhp, chp, rsp_msg, + IB_EVENT_QP_REQ_ERR, 0); + iwch_ep_disconnect(qhp->ep, 0, M_NOWAIT); + } + goto done; + } + + /* Bad incoming Read request */ + if (SQ_TYPE(rsp_msg->cqe) && + (CQE_OPCODE(rsp_msg->cqe) == T3_READ_RESP)) { + post_qp_event(rnicp, qhp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1); + goto done; + } + + /* Bad incoming write */ + if (RQ_TYPE(rsp_msg->cqe) && + (CQE_OPCODE(rsp_msg->cqe) == T3_RDMA_WRITE)) { + post_qp_event(rnicp, qhp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1); + goto done; + } + + switch (CQE_STATUS(rsp_msg->cqe)) { + + /* Completion Events */ + case TPT_ERR_SUCCESS: +#if 0 + /* + * Confirm the destination entry if this is a RECV completion. + */ + if (qhp->ep && SQ_TYPE(rsp_msg->cqe)) + dst_confirm(qhp->ep->dst); +#endif + (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); + break; + + case TPT_ERR_STAG: + case TPT_ERR_PDID: + case TPT_ERR_QPID: + case TPT_ERR_ACCESS: + case TPT_ERR_WRAP: + case TPT_ERR_BOUND: + case TPT_ERR_INVALIDATE_SHARED_MR: + case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND: + log(LOG_ERR, "%s - CQE Err qpid 0x%x opcode %d status 0x%x " + "type %d wrid.hi 0x%x wrid.lo 0x%x \n", __FUNCTION__, + CQE_QPID(rsp_msg->cqe), CQE_OPCODE(rsp_msg->cqe), + CQE_STATUS(rsp_msg->cqe), CQE_TYPE(rsp_msg->cqe), + CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe)); + (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); + post_qp_event(rnicp, qhp, chp, rsp_msg, IB_EVENT_QP_ACCESS_ERR, 1); + break; + + /* Device Fatal Errors */ + case TPT_ERR_ECC: + case TPT_ERR_ECC_PSTAG: + case TPT_ERR_INTERNAL_ERR: + post_qp_event(rnicp, qhp, chp, rsp_msg, IB_EVENT_DEVICE_FATAL, 1); + break; + + /* QP Fatal Errors */ + case TPT_ERR_OUT_OF_RQE: + case TPT_ERR_PBL_ADDR_BOUND: + case TPT_ERR_CRC: + case TPT_ERR_MARKER: + case TPT_ERR_PDU_LEN_ERR: + case TPT_ERR_DDP_VERSION: + case TPT_ERR_RDMA_VERSION: + case TPT_ERR_OPCODE: + case TPT_ERR_DDP_QUEUE_NUM: + case TPT_ERR_MSN: + case TPT_ERR_TBIT: + case TPT_ERR_MO: + case TPT_ERR_MSN_GAP: + case TPT_ERR_MSN_RANGE: + case TPT_ERR_RQE_ADDR_BOUND: + case TPT_ERR_IRD_OVERFLOW: + post_qp_event(rnicp, qhp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1); + break; + + default: + log(LOG_ERR,"Unknown T3 status 0x%x QPID 0x%x\n", + CQE_STATUS(rsp_msg->cqe), qhp->wq.qpid); + post_qp_event(rnicp, qhp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1); + break; + } +done: + mtx_lock(&chp->lock); + if (--chp->refcnt == 0) + wakeup(chp); + mtx_unlock(&chp->lock); + iwch_qp_rem_ref(&qhp->ibqp); +out: + m_free(m); +} diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.c new file mode 100644 index 0000000..0309b53 --- /dev/null +++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.c @@ -0,0 +1,1418 @@ + +/************************************************************************** + +Copyright (c) 2007, Chelsio Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Neither the name of the Chelsio Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +***************************************************************************/ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/bus.h> +#include <sys/module.h> +#include <sys/pciio.h> +#include <sys/conf.h> +#include <machine/bus.h> +#include <machine/resource.h> +#include <sys/bus_dma.h> +#include <sys/rman.h> +#include <sys/ioccom.h> +#include <sys/mbuf.h> +#include <sys/rwlock.h> +#include <sys/linker.h> +#include <sys/firmware.h> +#include <sys/socket.h> +#include <sys/sockio.h> +#include <sys/smp.h> +#include <sys/sysctl.h> +#include <sys/syslog.h> +#include <sys/queue.h> +#include <sys/taskqueue.h> +#include <sys/proc.h> +#include <sys/queue.h> + +#include <vm/vm.h> +#include <vm/pmap.h> + +#include <netinet/in.h> + +#include <contrib/rdma/ib_verbs.h> + + +#ifdef CONFIG_DEFINED +#include <cxgb_include.h> +#include <ulp/iw_cxgb/iw_cxgb_wr.h> +#include <ulp/iw_cxgb/iw_cxgb_hal.h> +#include <ulp/iw_cxgb/iw_cxgb_provider.h> +#include <ulp/iw_cxgb/iw_cxgb_cm.h> +#include <ulp/iw_cxgb/iw_cxgb.h> +#include <ulp/iw_cxgb/iw_cxgb_resource.h> +#else +#include <dev/cxgb/cxgb_include.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.h> +#endif + +static TAILQ_HEAD( ,cxio_rdev) rdev_list; +static cxio_hal_ev_callback_func_t cxio_ev_cb = NULL; + +static struct cxio_rdev * +cxio_hal_find_rdev_by_name(char *dev_name) +{ + struct cxio_rdev *rdev; + + TAILQ_FOREACH(rdev, &rdev_list, entry) + if (!strcmp(rdev->dev_name, dev_name)) + return rdev; + return NULL; +} + +struct cxio_rdev * +cxio_hal_find_rdev_by_t3cdev(struct t3cdev *tdev) +{ + struct cxio_rdev *rdev; + + TAILQ_FOREACH(rdev, &rdev_list, entry) + if (rdev->t3cdev_p == tdev) + return rdev; + return NULL; +} + +int +cxio_hal_cq_op(struct cxio_rdev *rdev_p, struct t3_cq *cq, + enum t3_cq_opcode op, u32 credit) +{ + int ret; + struct t3_cqe *cqe; + u32 rptr; + + struct rdma_cq_op setup; + setup.id = cq->cqid; + setup.credits = (op == CQ_CREDIT_UPDATE) ? credit : 0; + setup.op = op; + ret = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_OP, &setup); + + if ((ret < 0) || (op == CQ_CREDIT_UPDATE)) + return (ret); + + /* + * If the rearm returned an index other than our current index, + * then there might be CQE's in flight (being DMA'd). We must wait + * here for them to complete or the consumer can miss a notification. + */ + if (Q_PTR2IDX((cq->rptr), cq->size_log2) != ret) { + int i=0; + + rptr = cq->rptr; + + /* + * Keep the generation correct by bumping rptr until it + * matches the index returned by the rearm - 1. + */ + while (Q_PTR2IDX((rptr+1), cq->size_log2) != ret) + rptr++; + + /* + * Now rptr is the index for the (last) cqe that was + * in-flight at the time the HW rearmed the CQ. We + * spin until that CQE is valid. + */ + cqe = cq->queue + Q_PTR2IDX(rptr, cq->size_log2); + while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) { + DELAY(1); + if (i++ > 1000000) { + PANIC_IF(1); + log(LOG_ERR, "%s: stalled rnic\n", + rdev_p->dev_name); + return (-EIO); + } + } + + return 1; + } + + return 0; +} + +static int +cxio_hal_clear_cq_ctx(struct cxio_rdev *rdev_p, u32 cqid) +{ + struct rdma_cq_setup setup; + setup.id = cqid; + setup.base_addr = 0; /* NULL address */ + setup.size = 0; /* disaable the CQ */ + setup.credits = 0; + setup.credit_thres = 0; + setup.ovfl_mode = 0; + return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup)); +} + +static int +cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid) +{ + u64 sge_cmd; + struct t3_modify_qp_wr *wqe; + struct mbuf *m = m_gethdr(MT_DATA, M_NOWAIT); + if (m == NULL) { + CTR1(KTR_IW_CXGB, "%s m_gethdr failed", __FUNCTION__); + return (-ENOMEM); + } + wqe = mtod(m, struct t3_modify_qp_wr *); + m->m_len = m->m_pkthdr.len = sizeof(*wqe); + memset(wqe, 0, sizeof(*wqe)); + build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 0, qpid, 7); + wqe->flags = htobe32(MODQP_WRITE_EC); + sge_cmd = qpid << 8 | 3; + wqe->sge_cmd = htobe64(sge_cmd); + m_set_priority(m, CPL_PRIORITY_CONTROL); + m_set_sgl(m, NULL); + m_set_sgllen(m, 0); + return (cxgb_ofld_send(rdev_p->t3cdev_p, m)); +} + +int +cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq) +{ + struct rdma_cq_setup setup; + int size = (1UL << (cq->size_log2)) * sizeof(struct t3_cqe); + + cq->cqid = cxio_hal_get_cqid(rdev_p->rscp); + if (!cq->cqid) + return (-ENOMEM); + cq->sw_queue = malloc(size, M_DEVBUF, M_NOWAIT|M_ZERO); + if (!cq->sw_queue) + return (-ENOMEM); +#if 0 + cq->queue = dma_alloc_coherent(rdev_p->rnic_info.pdev, + (1UL << (cq->size_log2)) * + sizeof(struct t3_cqe), + &(cq->dma_addr), M_NOWAIT); +#else + cq->queue = contigmalloc((1UL << (cq->size_log2))*sizeof(struct t3_cqe), + M_DEVBUF, M_NOWAIT, 0ul, ~0ul, 4096, 0); + if (cq->queue) + cq->dma_addr = vtophys(cq->queue); + else { + free(cq->sw_queue, M_DEVBUF); + return (-ENOMEM); + } +#endif + +#ifdef notyet + pci_unmap_addr_set(cq, mapping, cq->dma_addr); +#endif + memset(cq->queue, 0, size); + setup.id = cq->cqid; + setup.base_addr = (u64) (cq->dma_addr); + setup.size = 1UL << cq->size_log2; + setup.credits = 65535; + setup.credit_thres = 1; + if (rdev_p->t3cdev_p->type != T3A) + setup.ovfl_mode = 0; + else + setup.ovfl_mode = 1; + return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup)); +} + +int +cxio_resize_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq) +{ + struct rdma_cq_setup setup; + setup.id = cq->cqid; + setup.base_addr = (u64) (cq->dma_addr); + setup.size = 1UL << cq->size_log2; + setup.credits = setup.size; + setup.credit_thres = setup.size; /* TBD: overflow recovery */ + setup.ovfl_mode = 1; + return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup)); +} + +static u32 +get_qpid(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx) +{ + struct cxio_qpid *entry; + u32 qpid; + int i; + + mtx_lock(&uctx->lock); + if (!TAILQ_EMPTY(&uctx->qpids)) { + + entry = TAILQ_FIRST(&uctx->qpids); + TAILQ_REMOVE(&uctx->qpids, entry, entry); + qpid = entry->qpid; + free(entry, M_DEVBUF); + } else { + qpid = cxio_hal_get_qpid(rdev_p->rscp); + if (!qpid) + goto out; + for (i = qpid+1; i & rdev_p->qpmask; i++) { + entry = malloc(sizeof *entry, M_DEVBUF, M_NOWAIT); + if (!entry) + break; + entry->qpid = i; + TAILQ_INSERT_TAIL(&uctx->qpids, entry, entry); + } + } +out: + mtx_unlock(&uctx->lock); + CTR2(KTR_IW_CXGB, "%s qpid 0x%x", __FUNCTION__, qpid); + return qpid; +} + +static void +put_qpid(struct cxio_rdev *rdev_p, u32 qpid, + struct cxio_ucontext *uctx) +{ + struct cxio_qpid *entry; + + entry = malloc(sizeof *entry, M_DEVBUF, M_NOWAIT); + CTR2(KTR_IW_CXGB, "%s qpid 0x%x", __FUNCTION__, qpid); + entry->qpid = qpid; + mtx_lock(&uctx->lock); + TAILQ_INSERT_TAIL(&uctx->qpids, entry, entry); + mtx_unlock(&uctx->lock); +} + +void +cxio_release_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx) +{ + struct cxio_qpid *pos, *tmp; + + mtx_lock(&uctx->lock); + TAILQ_FOREACH_SAFE(pos, &uctx->qpids, entry, tmp) { + TAILQ_REMOVE(&uctx->qpids, pos, entry); + if (!(pos->qpid & rdev_p->qpmask)) + cxio_hal_put_qpid(rdev_p->rscp, pos->qpid); + free(pos, M_DEVBUF); + } + mtx_unlock(&uctx->lock); +} + +void +cxio_init_ucontext(struct cxio_rdev *rdev_p, struct cxio_ucontext *uctx) +{ + TAILQ_INIT(&uctx->qpids); + mtx_init(&uctx->lock, "cxio uctx", NULL, MTX_DEF|MTX_DUPOK); +} + +int +cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain, + struct t3_wq *wq, struct cxio_ucontext *uctx) +{ + int depth = 1UL << wq->size_log2; + int rqsize = 1UL << wq->rq_size_log2; + + wq->qpid = get_qpid(rdev_p, uctx); + if (!wq->qpid) + return (-ENOMEM); + + wq->rq = malloc(depth * sizeof(u64), M_DEVBUF, M_NOWAIT|M_ZERO); + if (!wq->rq) + goto err1; + + wq->rq_addr = cxio_hal_rqtpool_alloc(rdev_p, rqsize); + if (!wq->rq_addr) + goto err2; + + wq->sq = malloc(depth * sizeof(struct t3_swsq), M_DEVBUF, M_NOWAIT|M_ZERO); + if (!wq->sq) + goto err3; +#if 0 + wq->queue = dma_alloc_coherent(rdev_p->rnic_info.pdev, + depth * sizeof(union t3_wr), + &(wq->dma_addr), M_NOWAIT); +#else + wq->queue = contigmalloc(depth *sizeof(union t3_wr), + M_DEVBUF, M_NOWAIT, 0ul, ~0ul, 4096, 0); + if (wq->queue) + wq->dma_addr = vtophys(wq->queue); + +#endif + if (!wq->queue) + goto err4; + + memset(wq->queue, 0, depth * sizeof(union t3_wr)); +#ifdef notyet + pci_unmap_addr_set(wq, mapping, wq->dma_addr); +#endif + wq->doorbell = rdev_p->rnic_info.kdb_addr; + if (!kernel_domain) + wq->udb = (u64)rdev_p->rnic_info.udbell_physbase + + (wq->qpid << rdev_p->qpshift); + CTR4(KTR_IW_CXGB, "%s qpid 0x%x doorbell 0x%p udb 0x%llx", __FUNCTION__, + wq->qpid, wq->doorbell, (unsigned long long) wq->udb); + return 0; +err4: + free(wq->sq, M_DEVBUF); +err3: + cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, rqsize); +err2: + free(wq->rq, M_DEVBUF); +err1: + put_qpid(rdev_p, wq->qpid, uctx); + return (-ENOMEM); +} + +int +cxio_destroy_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq) +{ + int err; + err = cxio_hal_clear_cq_ctx(rdev_p, cq->cqid); + free(cq->sw_queue, M_DEVBUF); +#if 0 + dma_free_coherent(&(rdev_p->rnic_info.pdev), + (1UL << (cq->size_log2)) + * sizeof(struct t3_cqe), cq->queue, + /* pci_unmap_addr(cq, mapping)*/ 0); +#else + contigfree(cq->queue,(1UL << (cq->size_log2)) + * sizeof(struct t3_cqe), M_DEVBUF); +#endif + cxio_hal_put_cqid(rdev_p->rscp, cq->cqid); + return err; +} + +int +cxio_destroy_qp(struct cxio_rdev *rdev_p, struct t3_wq *wq, + struct cxio_ucontext *uctx) +{ + +#if 0 + dma_free_coherent(&(rdev_p->rnic_info.pdev), + (1UL << (wq->size_log2)) + * sizeof(union t3_wr), wq->queue, + /* pci_unmap_addr(wq, mapping)*/ 0); +#else + contigfree(wq->queue, (1UL << (wq->size_log2)) + * sizeof(union t3_wr), M_DEVBUF); +#endif + free(wq->sq, M_DEVBUF); + cxio_hal_rqtpool_free(rdev_p, wq->rq_addr, (1UL << wq->rq_size_log2)); + free(wq->rq, M_DEVBUF); + put_qpid(rdev_p, wq->qpid, uctx); + return 0; +} + +static void +insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq) +{ + struct t3_cqe cqe; + + CTR5(KTR_IW_CXGB, "%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x", __FUNCTION__, + wq, cq, cq->sw_rptr, cq->sw_wptr); + memset(&cqe, 0, sizeof(cqe)); + cqe.header = htobe32(V_CQE_STATUS(TPT_ERR_SWFLUSH) | + V_CQE_OPCODE(T3_SEND) | + V_CQE_TYPE(0) | + V_CQE_SWCQE(1) | + V_CQE_QPID(wq->qpid) | + V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr, + cq->size_log2))); + *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe; + cq->sw_wptr++; +} + +void +cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count) +{ + u32 ptr; + + CTR3(KTR_IW_CXGB, "%s wq %p cq %p", __FUNCTION__, wq, cq); + + /* flush RQ */ + CTR4(KTR_IW_CXGB, "%s rq_rptr %u rq_wptr %u skip count %u", __FUNCTION__, + wq->rq_rptr, wq->rq_wptr, count); + ptr = wq->rq_rptr + count; + while (ptr++ != wq->rq_wptr) + insert_recv_cqe(wq, cq); +} + +static void +insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq, + struct t3_swsq *sqp) +{ + struct t3_cqe cqe; + + CTR5(KTR_IW_CXGB, "%s wq %p cq %p sw_rptr 0x%x sw_wptr 0x%x", __FUNCTION__, + wq, cq, cq->sw_rptr, cq->sw_wptr); + memset(&cqe, 0, sizeof(cqe)); + cqe.header = htobe32(V_CQE_STATUS(TPT_ERR_SWFLUSH) | + V_CQE_OPCODE(sqp->opcode) | + V_CQE_TYPE(1) | + V_CQE_SWCQE(1) | + V_CQE_QPID(wq->qpid) | + V_CQE_GENBIT(Q_GENBIT(cq->sw_wptr, + cq->size_log2))); + cqe.u.scqe.wrid_hi = sqp->sq_wptr; + + *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) = cqe; + cq->sw_wptr++; +} + +void +cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count) +{ + __u32 ptr; + struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2); + + ptr = wq->sq_rptr + count; + sqp += count; + while (ptr != wq->sq_wptr) { + insert_sq_cqe(wq, cq, sqp); + sqp++; + ptr++; + } +} + +/* + * Move all CQEs from the HWCQ into the SWCQ. + */ +void +cxio_flush_hw_cq(struct t3_cq *cq) +{ + struct t3_cqe *cqe, *swcqe; + + CTR3(KTR_IW_CXGB, "%s cq %p cqid 0x%x", __FUNCTION__, cq, cq->cqid); + cqe = cxio_next_hw_cqe(cq); + while (cqe) { + CTR3(KTR_IW_CXGB, "%s flushing hwcq rptr 0x%x to swcq wptr 0x%x", + __FUNCTION__, cq->rptr, cq->sw_wptr); + swcqe = cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2); + *swcqe = *cqe; + swcqe->header |= htobe32(V_CQE_SWCQE(1)); + cq->sw_wptr++; + cq->rptr++; + cqe = cxio_next_hw_cqe(cq); + } +} + +static int cqe_completes_wr(struct t3_cqe *cqe, struct t3_wq *wq) +{ + if (CQE_OPCODE(*cqe) == T3_TERMINATE) + return 0; + + if ((CQE_OPCODE(*cqe) == T3_RDMA_WRITE) && RQ_TYPE(*cqe)) + return 0; + + if ((CQE_OPCODE(*cqe) == T3_READ_RESP) && SQ_TYPE(*cqe)) + return 0; + + if ((CQE_OPCODE(*cqe) == T3_SEND) && RQ_TYPE(*cqe) && + Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) + return 0; + + return 1; +} + +void +cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count) +{ + struct t3_cqe *cqe; + u32 ptr; + + *count = 0; + ptr = cq->sw_rptr; + while (!Q_EMPTY(ptr, cq->sw_wptr)) { + cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2)); + if ((SQ_TYPE(*cqe) || (CQE_OPCODE(*cqe) == T3_READ_RESP)) && + (CQE_QPID(*cqe) == wq->qpid)) + (*count)++; + ptr++; + } + CTR3(KTR_IW_CXGB, "%s cq %p count %d", __FUNCTION__, cq, *count); +} + +void +cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count) +{ + struct t3_cqe *cqe; + u32 ptr; + + *count = 0; + CTR2(KTR_IW_CXGB, "%s count zero %d", __FUNCTION__, *count); + ptr = cq->sw_rptr; + while (!Q_EMPTY(ptr, cq->sw_wptr)) { + cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2)); + if (RQ_TYPE(*cqe) && (CQE_OPCODE(*cqe) != T3_READ_RESP) && + (CQE_QPID(*cqe) == wq->qpid) && cqe_completes_wr(cqe, wq)) + (*count)++; + ptr++; + } + CTR3(KTR_IW_CXGB, "%s cq %p count %d", __FUNCTION__, cq, *count); +} + +static int +cxio_hal_init_ctrl_cq(struct cxio_rdev *rdev_p) +{ + struct rdma_cq_setup setup; + setup.id = 0; + setup.base_addr = 0; /* NULL address */ + setup.size = 1; /* enable the CQ */ + setup.credits = 0; + + /* force SGE to redirect to RspQ and interrupt */ + setup.credit_thres = 0; + setup.ovfl_mode = 1; + return (rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_CQ_SETUP, &setup)); +} + +static int +cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p) +{ + int err; + u64 sge_cmd, ctx0, ctx1; + u64 base_addr; + struct t3_modify_qp_wr *wqe; + struct mbuf *m; + + m = m_gethdr(MT_DATA, M_NOWAIT); + if (m == NULL) { + CTR1(KTR_IW_CXGB, "%s m_gethdr failed", __FUNCTION__); + return (-ENOMEM); + } + err = cxio_hal_init_ctrl_cq(rdev_p); + if (err) { + CTR2(KTR_IW_CXGB, "%s err %d initializing ctrl_cq", __FUNCTION__, err); + goto err; + } +#if 0 + rdev_p->ctrl_qp.workq = dma_alloc_coherent( + rdev_p->rnic_info.pdev, + (1 << T3_CTRL_QP_SIZE_LOG2) * + sizeof(union t3_wr), + &(rdev_p->ctrl_qp.dma_addr), + M_NOWAIT); +#else + rdev_p->ctrl_qp.workq = contigmalloc((1 << T3_CTRL_QP_SIZE_LOG2) + *sizeof(union t3_wr), M_DEVBUF, M_NOWAIT, 0ul, ~0ul, 4096, 0); + if (rdev_p->ctrl_qp.workq) + rdev_p->ctrl_qp.dma_addr = vtophys(rdev_p->ctrl_qp.workq); + +#endif + + if (!rdev_p->ctrl_qp.workq) { + CTR1(KTR_IW_CXGB, "%s dma_alloc_coherent failed", __FUNCTION__); + err = -ENOMEM; + goto err; + } +#if 0 + pci_unmap_addr_set(&rdev_p->ctrl_qp, mapping, + rdev_p->ctrl_qp.dma_addr); +#endif + rdev_p->ctrl_qp.doorbell = (void /*__iomem */ *)rdev_p->rnic_info.kdb_addr; + memset(rdev_p->ctrl_qp.workq, 0, + (1 << T3_CTRL_QP_SIZE_LOG2) * sizeof(union t3_wr)); + + mtx_init(&rdev_p->ctrl_qp.lock, "ctl-qp lock", NULL, MTX_DEF|MTX_DUPOK); + + /* update HW Ctrl QP context */ + base_addr = rdev_p->ctrl_qp.dma_addr; + base_addr >>= 12; + ctx0 = (V_EC_SIZE((1 << T3_CTRL_QP_SIZE_LOG2)) | + V_EC_BASE_LO((u32) base_addr & 0xffff)); + ctx0 <<= 32; + ctx0 |= V_EC_CREDITS(FW_WR_NUM); + base_addr >>= 16; + ctx1 = (u32) base_addr; + base_addr >>= 32; + ctx1 |= ((u64) (V_EC_BASE_HI((u32) base_addr & 0xf) | V_EC_RESPQ(0) | + V_EC_TYPE(0) | V_EC_GEN(1) | + V_EC_UP_TOKEN(T3_CTL_QP_TID) | F_EC_VALID)) << 32; + wqe = mtod(m, struct t3_modify_qp_wr *); + m->m_len = m->m_pkthdr.len = sizeof(*wqe); + memset(wqe, 0, sizeof(*wqe)); + build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 0, + T3_CTL_QP_TID, 7); + wqe->flags = htobe32(MODQP_WRITE_EC); + sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3; + wqe->sge_cmd = htobe64(sge_cmd); + wqe->ctx1 = htobe64(ctx1); + wqe->ctx0 = htobe64(ctx0); + CTR3(KTR_IW_CXGB, "CtrlQP dma_addr 0x%llx workq %p size %d", + (unsigned long long) rdev_p->ctrl_qp.dma_addr, + rdev_p->ctrl_qp.workq, 1 << T3_CTRL_QP_SIZE_LOG2); + m_set_priority(m, CPL_PRIORITY_CONTROL); + m_set_sgl(m, NULL); + m_set_sgllen(m, 0); + return (cxgb_ofld_send(rdev_p->t3cdev_p, m)); +err: + m_free(m); + return err; +} + +static int +cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p) +{ +#if 0 + + dma_free_coherent(&(rdev_p->rnic_info.pdev), + (1UL << T3_CTRL_QP_SIZE_LOG2) + * sizeof(union t3_wr), rdev_p->ctrl_qp.workq, + /* pci_unmap_addr(&rdev_p->ctrl_qp, mapping)*/ 0); +#else + contigfree(rdev_p->ctrl_qp.workq,(1UL << T3_CTRL_QP_SIZE_LOG2) + * sizeof(union t3_wr), M_DEVBUF); +#endif + return cxio_hal_clear_qp_ctx(rdev_p, T3_CTRL_QP_ID); +} + +/* write len bytes of data into addr (32B aligned address) + * If data is NULL, clear len byte of memory to zero. + * caller aquires the ctrl_qp lock before the call + */ +static int +cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr, + u32 len, void *data, int completion) +{ + u32 i, nr_wqe, copy_len; + u8 *copy_data; + u8 wr_len, utx_len; /* lenght in 8 byte flit */ + enum t3_wr_flags flag; + __be64 *wqe; + u64 utx_cmd; + addr &= 0x7FFFFFF; + nr_wqe = len % 96 ? len / 96 + 1 : len / 96; /* 96B max per WQE */ + CTR6(KTR_IW_CXGB, "cxio_hal_ctrl_qp_write_mem wptr 0x%x rptr 0x%x len %d, nr_wqe %d data %p addr 0x%0x", + rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, len, + nr_wqe, data, addr); + utx_len = 3; /* in 32B unit */ + for (i = 0; i < nr_wqe; i++) { + if (Q_FULL(rdev_p->ctrl_qp.rptr, rdev_p->ctrl_qp.wptr, + T3_CTRL_QP_SIZE_LOG2)) { + CTR4(KTR_IW_CXGB, "%s ctrl_qp full wtpr 0x%0x rptr 0x%0x, " + "wait for more space i %d", __FUNCTION__, + rdev_p->ctrl_qp.wptr, rdev_p->ctrl_qp.rptr, i); + if (cxio_wait(&rdev_p->ctrl_qp, + &rdev_p->ctrl_qp.lock, + !Q_FULL(rdev_p->ctrl_qp.rptr, + rdev_p->ctrl_qp.wptr, + T3_CTRL_QP_SIZE_LOG2))) { + CTR1(KTR_IW_CXGB, "%s ctrl_qp workq interrupted", + __FUNCTION__); + return (-ERESTART); + } + CTR2(KTR_IW_CXGB, "%s ctrl_qp wakeup, continue posting work request " + "i %d", __FUNCTION__, i); + } + wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr % + (1 << T3_CTRL_QP_SIZE_LOG2))); + flag = 0; + if (i == (nr_wqe - 1)) { + /* last WQE */ + flag = completion ? T3_COMPLETION_FLAG : 0; + if (len % 32) + utx_len = len / 32 + 1; + else + utx_len = len / 32; + } + + /* + * Force a CQE to return the credit to the workq in case + * we posted more than half the max QP size of WRs + */ + if ((i != 0) && + (i % (((1 << T3_CTRL_QP_SIZE_LOG2)) >> 1) == 0)) { + flag = T3_COMPLETION_FLAG; + CTR2(KTR_IW_CXGB, "%s force completion at i %d", __FUNCTION__, i); + } + + /* build the utx mem command */ + wqe += (sizeof(struct t3_bypass_wr) >> 3); + utx_cmd = (T3_UTX_MEM_WRITE << 28) | (addr + i * 3); + utx_cmd <<= 32; + utx_cmd |= (utx_len << 28) | ((utx_len << 2) + 1); + *wqe = htobe64(utx_cmd); + wqe++; + copy_data = (u8 *) data + i * 96; + copy_len = len > 96 ? 96 : len; + + /* clear memory content if data is NULL */ + if (data) + memcpy(wqe, copy_data, copy_len); + else + memset(wqe, 0, copy_len); + if (copy_len % 32) + memset(((u8 *) wqe) + copy_len, 0, + 32 - (copy_len % 32)); + wr_len = ((sizeof(struct t3_bypass_wr)) >> 3) + 1 + + (utx_len << 2); + wqe = (__be64 *)(rdev_p->ctrl_qp.workq + (rdev_p->ctrl_qp.wptr % + (1 << T3_CTRL_QP_SIZE_LOG2))); + + /* wptr in the WRID[31:0] */ + ((union t3_wrid *)(wqe+1))->id0.low = rdev_p->ctrl_qp.wptr; + + /* + * This must be the last write with a memory barrier + * for the genbit + */ + build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_BP, flag, + Q_GENBIT(rdev_p->ctrl_qp.wptr, + T3_CTRL_QP_SIZE_LOG2), T3_CTRL_QP_ID, + wr_len); + if (flag == T3_COMPLETION_FLAG) + ring_doorbell(rdev_p->ctrl_qp.doorbell, T3_CTRL_QP_ID); + + len -= 96; + rdev_p->ctrl_qp.wptr++; + } + return 0; +} + +/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl, and pbl_size + * OUT: stag index, actual pbl_size, pbl_addr allocated. + * TBD: shared memory region support + */ +static int +__cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry, + u32 *stag, u8 stag_state, u32 pdid, + enum tpt_mem_type type, enum tpt_mem_perm perm, + u32 zbva, u64 to, u32 len, u8 page_size, __be64 *pbl, + u32 *pbl_size, u32 *pbl_addr) +{ + int err; + struct tpt_entry tpt; + u32 stag_idx; + u32 wptr; + int rereg = (*stag != T3_STAG_UNSET); + + stag_state = stag_state > 0; + stag_idx = (*stag) >> 8; + + if ((!reset_tpt_entry) && !(*stag != T3_STAG_UNSET)) { + stag_idx = cxio_hal_get_stag(rdev_p->rscp); + if (!stag_idx) + return (-ENOMEM); + *stag = (stag_idx << 8) | ((*stag) & 0xFF); + } + CTR5(KTR_IW_CXGB, "%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x", + __FUNCTION__, stag_state, type, pdid, stag_idx); + + if (reset_tpt_entry) + cxio_hal_pblpool_free(rdev_p, *pbl_addr, *pbl_size << 3); + else if (!rereg) { + *pbl_addr = cxio_hal_pblpool_alloc(rdev_p, *pbl_size << 3); + if (!*pbl_addr) { + return (-ENOMEM); + } + } + + mtx_lock(&rdev_p->ctrl_qp.lock); + + /* write PBL first if any - update pbl only if pbl list exist */ + if (pbl) { + + CTR4(KTR_IW_CXGB, "%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d", + __FUNCTION__, *pbl_addr, rdev_p->rnic_info.pbl_base, + *pbl_size); + err = cxio_hal_ctrl_qp_write_mem(rdev_p, + (*pbl_addr >> 5), + (*pbl_size << 3), pbl, 0); + if (err) + goto ret; + } + + /* write TPT entry */ + if (reset_tpt_entry) + memset(&tpt, 0, sizeof(tpt)); + else { + tpt.valid_stag_pdid = htobe32(F_TPT_VALID | + V_TPT_STAG_KEY((*stag) & M_TPT_STAG_KEY) | + V_TPT_STAG_STATE(stag_state) | + V_TPT_STAG_TYPE(type) | V_TPT_PDID(pdid)); + PANIC_IF(page_size >= 28); + tpt.flags_pagesize_qpid = htobe32(V_TPT_PERM(perm) | + F_TPT_MW_BIND_ENABLE | + V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) | + V_TPT_PAGE_SIZE(page_size)); + tpt.rsvd_pbl_addr = reset_tpt_entry ? 0 : + htobe32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, *pbl_addr)>>3)); + tpt.len = htobe32(len); + tpt.va_hi = htobe32((u32) (to >> 32)); + tpt.va_low_or_fbo = htobe32((u32) (to & 0xFFFFFFFFULL)); + tpt.rsvd_bind_cnt_or_pstag = 0; + tpt.rsvd_pbl_size = reset_tpt_entry ? 0 : + htobe32(V_TPT_PBL_SIZE((*pbl_size) >> 2)); + } + err = cxio_hal_ctrl_qp_write_mem(rdev_p, + stag_idx + + (rdev_p->rnic_info.tpt_base >> 5), + sizeof(tpt), &tpt, 1); + + /* release the stag index to free pool */ + if (reset_tpt_entry) + cxio_hal_put_stag(rdev_p->rscp, stag_idx); +ret: + wptr = rdev_p->ctrl_qp.wptr; + mtx_unlock(&rdev_p->ctrl_qp.lock); + if (!err) + if (cxio_wait(&rdev_p->ctrl_qp, + &rdev_p->ctrl_qp.lock, + SEQ32_GE(rdev_p->ctrl_qp.rptr, wptr))) + return (-ERESTART); + return err; +} + +int +cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, + enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, + u8 page_size, __be64 *pbl, u32 *pbl_size, + u32 *pbl_addr) +{ + *stag = T3_STAG_UNSET; + return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm, + zbva, to, len, page_size, pbl, pbl_size, pbl_addr); +} + +int +cxio_reregister_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid, + enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, + u8 page_size, __be64 *pbl, u32 *pbl_size, + u32 *pbl_addr) +{ + return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm, + zbva, to, len, page_size, pbl, pbl_size, pbl_addr); +} + +int +cxio_dereg_mem(struct cxio_rdev *rdev_p, u32 stag, u32 pbl_size, + u32 pbl_addr) +{ + return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL, + &pbl_size, &pbl_addr); +} + +int +cxio_allocate_window(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid) +{ + u32 pbl_size = 0; + *stag = T3_STAG_UNSET; + return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_MW, 0, 0, 0ULL, 0, 0, + NULL, &pbl_size, NULL); +} + +int +cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag) +{ + return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL, + NULL, NULL); +} + +int +cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr) +{ + struct t3_rdma_init_wr *wqe; + struct mbuf *m = m_gethdr(MT_DATA, M_NOWAIT); + if (m == NULL) + return (-ENOMEM); + CTR2(KTR_IW_CXGB, "%s rdev_p %p", __FUNCTION__, rdev_p); + wqe = mtod(m, struct t3_rdma_init_wr *); + m->m_len = m->m_pkthdr.len = sizeof(*wqe); + wqe->wrh.op_seop_flags = htobe32(V_FW_RIWR_OP(T3_WR_INIT)); + wqe->wrh.gen_tid_len = htobe32(V_FW_RIWR_TID(attr->tid) | + V_FW_RIWR_LEN(sizeof(*wqe) >> 3)); + wqe->wrid.id1 = 0; + wqe->qpid = htobe32(attr->qpid); + wqe->pdid = htobe32(attr->pdid); + wqe->scqid = htobe32(attr->scqid); + wqe->rcqid = htobe32(attr->rcqid); + wqe->rq_addr = htobe32(attr->rq_addr - rdev_p->rnic_info.rqt_base); + wqe->rq_size = htobe32(attr->rq_size); + wqe->mpaattrs = attr->mpaattrs; + wqe->qpcaps = attr->qpcaps; + wqe->ulpdu_size = htobe16(attr->tcp_emss); + wqe->flags = htobe32(attr->flags); + wqe->ord = htobe32(attr->ord); + wqe->ird = htobe32(attr->ird); + wqe->qp_dma_addr = htobe64(attr->qp_dma_addr); + wqe->qp_dma_size = htobe32(attr->qp_dma_size); + wqe->irs = htobe32(attr->irs); + m_set_priority(m, 0); /* 0=>ToeQ; 1=>CtrlQ */ + m_set_sgl(m, NULL); + m_set_sgllen(m, 0); + return (cxgb_ofld_send(rdev_p->t3cdev_p, m)); +} + +void +cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb) +{ + cxio_ev_cb = ev_cb; +} + +void +cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb) +{ + cxio_ev_cb = NULL; +} + +static int +cxio_hal_ev_handler(struct t3cdev *t3cdev_p, struct mbuf *m) +{ + static int cnt; + struct cxio_rdev *rdev_p = NULL; + struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) m->m_data; + + CTR6(KTR_IW_CXGB, "%s cq_id 0x%x cq_ptr 0x%x genbit %0x overflow %0x an %0x", + __FUNCTION__, RSPQ_CQID(rsp_msg), RSPQ_CQPTR(rsp_msg), + RSPQ_GENBIT(rsp_msg), RSPQ_OVERFLOW(rsp_msg), RSPQ_AN(rsp_msg)); + CTR4(KTR_IW_CXGB, "se %0x notify %0x cqbranch %0x creditth %0x", + RSPQ_SE(rsp_msg), RSPQ_NOTIFY(rsp_msg), RSPQ_CQBRANCH(rsp_msg), + RSPQ_CREDIT_THRESH(rsp_msg)); + CTR4(KTR_IW_CXGB, "CQE: QPID 0x%0x type 0x%0x status 0x%0x opcode %d", + CQE_QPID(rsp_msg->cqe), + CQE_TYPE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe), + CQE_OPCODE(rsp_msg->cqe)); + CTR3(KTR_IW_CXGB, "len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x", + CQE_LEN(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe), CQE_WRID_LOW(rsp_msg->cqe)); + rdev_p = (struct cxio_rdev *)t3cdev_p->ulp; + if (!rdev_p) { + CTR2(KTR_IW_CXGB, "%s called by t3cdev %p with null ulp", __FUNCTION__, + t3cdev_p); + return 0; + } + if (CQE_QPID(rsp_msg->cqe) == T3_CTRL_QP_ID) { + mtx_lock(&rdev_p->ctrl_qp.lock); + rdev_p->ctrl_qp.rptr = CQE_WRID_LOW(rsp_msg->cqe) + 1; + wakeup(&rdev_p->ctrl_qp); + mtx_unlock(&rdev_p->ctrl_qp.lock); + m_free(m); + } else if (CQE_QPID(rsp_msg->cqe) == 0xfff8) + m_free(m); + else if (cxio_ev_cb) + (*cxio_ev_cb) (rdev_p, m); + else + m_free(m); + cnt++; + return 0; +} + +/* Caller takes care of locking if needed */ +int +cxio_rdev_open(struct cxio_rdev *rdev_p) +{ + struct ifnet *ifp; + int err = 0; + + if (strlen(rdev_p->dev_name)) { + if (cxio_hal_find_rdev_by_name(rdev_p->dev_name)) { + return (-EBUSY); + } + ifp = rdev_p->ifp; + if (ifp == NULL) + return (-EINVAL); + if_free(ifp); + } else if (rdev_p->t3cdev_p) { + if (cxio_hal_find_rdev_by_t3cdev(rdev_p->t3cdev_p)) + return (-EBUSY); + ifp = rdev_p->t3cdev_p->lldev; + strncpy(rdev_p->dev_name, rdev_p->t3cdev_p->name, + T3_MAX_DEV_NAME_LEN); + } else { + CTR1(KTR_IW_CXGB, "%s t3cdev_p or dev_name must be set", __FUNCTION__); + return (-EINVAL); + } + + TAILQ_INSERT_TAIL(&rdev_list, rdev_p, entry); + + CTR2(KTR_IW_CXGB, "%s opening rnic dev %s", __FUNCTION__, rdev_p->dev_name); + memset(&rdev_p->ctrl_qp, 0, sizeof(rdev_p->ctrl_qp)); + if (!rdev_p->t3cdev_p) + rdev_p->t3cdev_p = T3CDEV(ifp); + rdev_p->t3cdev_p->ulp = (void *) rdev_p; + err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, RDMA_GET_PARAMS, + &(rdev_p->rnic_info)); + if (err) { + log(LOG_ERR, "%s t3cdev_p(%p)->ctl returned error %d.\n", + __FUNCTION__, rdev_p->t3cdev_p, err); + goto err1; + } + err = rdev_p->t3cdev_p->ctl(rdev_p->t3cdev_p, GET_PORTS, + &(rdev_p->port_info)); + if (err) { + log(LOG_ERR, "%s t3cdev_p(%p)->ctl returned error %d.\n", + __FUNCTION__, rdev_p->t3cdev_p, err); + goto err1; + } + + /* + * qpshift is the number of bits to shift the qpid left in order + * to get the correct address of the doorbell for that qp. + */ + cxio_init_ucontext(rdev_p, &rdev_p->uctx); + rdev_p->qpshift = PAGE_SHIFT - + ilog2(65536 >> + ilog2(rdev_p->rnic_info.udbell_len >> + PAGE_SHIFT)); + rdev_p->qpnr = rdev_p->rnic_info.udbell_len >> PAGE_SHIFT; + rdev_p->qpmask = (65536 >> ilog2(rdev_p->qpnr)) - 1; + CTR4(KTR_IW_CXGB, "cxio_rdev_open rnic %s info: tpt_base 0x%0x tpt_top 0x%0x num stags %d", + rdev_p->dev_name, rdev_p->rnic_info.tpt_base, + rdev_p->rnic_info.tpt_top, cxio_num_stags(rdev_p)); + CTR4(KTR_IW_CXGB, "pbl_base 0x%0x pbl_top 0x%0x rqt_base 0x%0x, rqt_top 0x%0x", + rdev_p->rnic_info.pbl_base, + rdev_p->rnic_info.pbl_top, rdev_p->rnic_info.rqt_base, + rdev_p->rnic_info.rqt_top); + CTR6(KTR_IW_CXGB, "udbell_len 0x%0x udbell_physbase 0x%lx kdb_addr %p qpshift %lu " + "qpnr %d qpmask 0x%x", + rdev_p->rnic_info.udbell_len, + rdev_p->rnic_info.udbell_physbase, rdev_p->rnic_info.kdb_addr, + rdev_p->qpshift, rdev_p->qpnr, rdev_p->qpmask); + + err = cxio_hal_init_ctrl_qp(rdev_p); + if (err) { + log(LOG_ERR, "%s error %d initializing ctrl_qp.\n", + __FUNCTION__, err); + goto err1; + } + err = cxio_hal_init_resource(rdev_p, cxio_num_stags(rdev_p), 0, + 0, T3_MAX_NUM_QP, T3_MAX_NUM_CQ, + T3_MAX_NUM_PD); + if (err) { + log(LOG_ERR, "%s error %d initializing hal resources.\n", + __FUNCTION__, err); + goto err2; + } + err = cxio_hal_pblpool_create(rdev_p); + if (err) { + log(LOG_ERR, "%s error %d initializing pbl mem pool.\n", + __FUNCTION__, err); + goto err3; + } + err = cxio_hal_rqtpool_create(rdev_p); + if (err) { + log(LOG_ERR, "%s error %d initializing rqt mem pool.\n", + __FUNCTION__, err); + goto err4; + } + return 0; +err4: + cxio_hal_pblpool_destroy(rdev_p); +err3: + cxio_hal_destroy_resource(rdev_p->rscp); +err2: + cxio_hal_destroy_ctrl_qp(rdev_p); +err1: + TAILQ_REMOVE(&rdev_list, rdev_p, entry); + return err; +} + +void +cxio_rdev_close(struct cxio_rdev *rdev_p) +{ + if (rdev_p) { + cxio_hal_pblpool_destroy(rdev_p); + cxio_hal_rqtpool_destroy(rdev_p); + TAILQ_REMOVE(&rdev_list, rdev_p, entry); + rdev_p->t3cdev_p->ulp = NULL; + cxio_hal_destroy_ctrl_qp(rdev_p); + cxio_hal_destroy_resource(rdev_p->rscp); + } +} + +int +cxio_hal_init(void) +{ + TAILQ_INIT(&rdev_list); +#ifdef needed + if (cxio_hal_init_rhdl_resource(T3_MAX_NUM_RI)) + return (-ENOMEM); +#endif + t3_register_cpl_handler(CPL_ASYNC_NOTIF, cxio_hal_ev_handler); + return 0; +} + +void +cxio_hal_exit(void) +{ + struct cxio_rdev *rdev, *tmp; + + t3_register_cpl_handler(CPL_ASYNC_NOTIF, NULL); + TAILQ_FOREACH_SAFE(rdev, &rdev_list, entry, tmp) + cxio_rdev_close(rdev); +#ifdef needed + cxio_hal_destroy_rhdl_resource(); +#endif +} + +static void +flush_completed_wrs(struct t3_wq *wq, struct t3_cq *cq) +{ + struct t3_swsq *sqp; + __u32 ptr = wq->sq_rptr; + int count = Q_COUNT(wq->sq_rptr, wq->sq_wptr); + + sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); + while (count--) + if (!sqp->signaled) { + ptr++; + sqp = wq->sq + Q_PTR2IDX(ptr, wq->sq_size_log2); + } else if (sqp->complete) { + + /* + * Insert this completed cqe into the swcq. + */ + CTR3(KTR_IW_CXGB, "%s moving cqe into swcq sq idx %ld cq idx %ld", + __FUNCTION__, Q_PTR2IDX(ptr, wq->sq_size_log2), + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)); + sqp->cqe.header |= htonl(V_CQE_SWCQE(1)); + *(cq->sw_queue + Q_PTR2IDX(cq->sw_wptr, cq->size_log2)) + = sqp->cqe; + cq->sw_wptr++; + sqp->signaled = 0; + break; + } else + break; +} + +static void +create_read_req_cqe(struct t3_wq *wq, struct t3_cqe *hw_cqe, + struct t3_cqe *read_cqe) +{ + read_cqe->u.scqe.wrid_hi = wq->oldest_read->sq_wptr; + read_cqe->len = wq->oldest_read->read_len; + read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(*hw_cqe)) | + V_CQE_SWCQE(SW_CQE(*hw_cqe)) | + V_CQE_OPCODE(T3_READ_REQ) | + V_CQE_TYPE(1)); +} + +/* + * Return a ptr to the next read wr in the SWSQ or NULL. + */ +static void +advance_oldest_read(struct t3_wq *wq) +{ + + u32 rptr = wq->oldest_read - wq->sq + 1; + u32 wptr = Q_PTR2IDX(wq->sq_wptr, wq->sq_size_log2); + + while (Q_PTR2IDX(rptr, wq->sq_size_log2) != wptr) { + wq->oldest_read = wq->sq + Q_PTR2IDX(rptr, wq->sq_size_log2); + + if (wq->oldest_read->opcode == T3_READ_REQ) + return; + rptr++; + } + wq->oldest_read = NULL; +} + +/* + * cxio_poll_cq + * + * Caller must: + * check the validity of the first CQE, + * supply the wq assicated with the qpid. + * + * credit: cq credit to return to sge. + * cqe_flushed: 1 iff the CQE is flushed. + * cqe: copy of the polled CQE. + * + * return value: + * 0 CQE returned, + * -1 CQE skipped, try again. + */ +int +cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe, + u8 *cqe_flushed, u64 *cookie, u32 *credit) +{ + int ret = 0; + struct t3_cqe *hw_cqe, read_cqe; + + *cqe_flushed = 0; + *credit = 0; + hw_cqe = cxio_next_cqe(cq); + + CTR5(KTR_IW_CXGB, "cxio_poll_cq CQE OOO %d qpid 0x%0x genbit %d type %d status 0x%0x", + CQE_OOO(*hw_cqe), CQE_QPID(*hw_cqe), + CQE_GENBIT(*hw_cqe), CQE_TYPE(*hw_cqe), CQE_STATUS(*hw_cqe)); + CTR4(KTR_IW_CXGB, "opcode 0x%0x len 0x%0x wrid_hi_stag 0x%x wrid_low_msn 0x%x", + CQE_OPCODE(*hw_cqe), CQE_LEN(*hw_cqe), CQE_WRID_HI(*hw_cqe), + CQE_WRID_LOW(*hw_cqe)); + + /* + * skip cqe's not affiliated with a QP. + */ + if (wq == NULL) { + ret = -1; + goto skip_cqe; + } + + /* + * Gotta tweak READ completions: + * 1) the cqe doesn't contain the sq_wptr from the wr. + * 2) opcode not reflected from the wr. + * 3) read_len not reflected from the wr. + * 4) cq_type is RQ_TYPE not SQ_TYPE. + */ + if (RQ_TYPE(*hw_cqe) && (CQE_OPCODE(*hw_cqe) == T3_READ_RESP)) { + + /* + * Don't write to the HWCQ, so create a new read req CQE + * in local memory. + */ + create_read_req_cqe(wq, hw_cqe, &read_cqe); + hw_cqe = &read_cqe; + advance_oldest_read(wq); + } + + /* + * T3A: Discard TERMINATE CQEs. + */ + if (CQE_OPCODE(*hw_cqe) == T3_TERMINATE) { + ret = -1; + wq->error = 1; + goto skip_cqe; + } + + if (CQE_STATUS(*hw_cqe) || wq->error) { + *cqe_flushed = wq->error; + wq->error = 1; + + /* + * T3A inserts errors into the CQE. We cannot return + * these as work completions. + */ + /* incoming write failures */ + if ((CQE_OPCODE(*hw_cqe) == T3_RDMA_WRITE) + && RQ_TYPE(*hw_cqe)) { + ret = -1; + goto skip_cqe; + } + /* incoming read request failures */ + if ((CQE_OPCODE(*hw_cqe) == T3_READ_RESP) && SQ_TYPE(*hw_cqe)) { + ret = -1; + goto skip_cqe; + } + + /* incoming SEND with no receive posted failures */ + if ((CQE_OPCODE(*hw_cqe) == T3_SEND) && RQ_TYPE(*hw_cqe) && + Q_EMPTY(wq->rq_rptr, wq->rq_wptr)) { + ret = -1; + goto skip_cqe; + } + goto proc_cqe; + } + + /* + * RECV completion. + */ + if (RQ_TYPE(*hw_cqe)) { + + /* + * HW only validates 4 bits of MSN. So we must validate that + * the MSN in the SEND is the next expected MSN. If its not, + * then we complete this with TPT_ERR_MSN and mark the wq in + * error. + */ + if (__predict_false((CQE_WRID_MSN(*hw_cqe) != (wq->rq_rptr + 1)))) { + wq->error = 1; + hw_cqe->header |= htonl(V_CQE_STATUS(TPT_ERR_MSN)); + goto proc_cqe; + } + goto proc_cqe; + } + + /* + * If we get here its a send completion. + * + * Handle out of order completion. These get stuffed + * in the SW SQ. Then the SW SQ is walked to move any + * now in-order completions into the SW CQ. This handles + * 2 cases: + * 1) reaping unsignaled WRs when the first subsequent + * signaled WR is completed. + * 2) out of order read completions. + */ + if (!SW_CQE(*hw_cqe) && (CQE_WRID_SQ_WPTR(*hw_cqe) != wq->sq_rptr)) { + struct t3_swsq *sqp; + + CTR2(KTR_IW_CXGB, "%s out of order completion going in swsq at idx %ld", + __FUNCTION__, + Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2)); + sqp = wq->sq + + Q_PTR2IDX(CQE_WRID_SQ_WPTR(*hw_cqe), wq->sq_size_log2); + sqp->cqe = *hw_cqe; + sqp->complete = 1; + ret = -1; + goto flush_wq; + } + +proc_cqe: + *cqe = *hw_cqe; + + /* + * Reap the associated WR(s) that are freed up with this + * completion. + */ + if (SQ_TYPE(*hw_cqe)) { + wq->sq_rptr = CQE_WRID_SQ_WPTR(*hw_cqe); + CTR2(KTR_IW_CXGB, "%s completing sq idx %ld", __FUNCTION__, + Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2)); + *cookie = (wq->sq + + Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2))->wr_id; + wq->sq_rptr++; + } else { + CTR2(KTR_IW_CXGB, "%s completing rq idx %ld", __FUNCTION__, + Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)); + *cookie = *(wq->rq + Q_PTR2IDX(wq->rq_rptr, wq->rq_size_log2)); + wq->rq_rptr++; + } + +flush_wq: + /* + * Flush any completed cqes that are now in-order. + */ + flush_completed_wrs(wq, cq); + +skip_cqe: + if (SW_CQE(*hw_cqe)) { + CTR4(KTR_IW_CXGB, "%s cq %p cqid 0x%x skip sw cqe sw_rptr 0x%x", + __FUNCTION__, cq, cq->cqid, cq->sw_rptr); + ++cq->sw_rptr; + } else { + CTR4(KTR_IW_CXGB, "%s cq %p cqid 0x%x skip hw cqe rptr 0x%x", + __FUNCTION__, cq, cq->cqid, cq->rptr); + ++cq->rptr; + + /* + * T3A: compute credits. + */ + if (((cq->rptr - cq->wptr) > (1 << (cq->size_log2 - 1))) + || ((cq->rptr - cq->wptr) >= 128)) { + *credit = cq->rptr - cq->wptr; + cq->wptr = cq->rptr; + } + } + return ret; +} + + diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h new file mode 100644 index 0000000..6a401e0 --- /dev/null +++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h @@ -0,0 +1,330 @@ +/************************************************************************** + +Copyright (c) 2007, 2008 Chelsio Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Neither the name of the Chelsio Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +$FreeBSD$ + +***************************************************************************/ +#ifndef __CXIO_HAL_H__ +#define __CXIO_HAL_H__ +#include <sys/condvar.h> +#include <sys/ktr.h> + +#define T3_CTRL_QP_ID FW_RI_SGEEC_START +#define T3_CTL_QP_TID FW_RI_TID_START +#define T3_CTRL_QP_SIZE_LOG2 8 +#define T3_CTRL_CQ_ID 0 + +/* TBD */ +#define T3_MAX_NUM_RI (1<<15) +#define T3_MAX_NUM_QP (1<<15) +#define T3_MAX_NUM_CQ (1<<15) +#define T3_MAX_NUM_PD (1<<15) +#define T3_MAX_PBL_SIZE 256 +#define T3_MAX_RQ_SIZE 1024 +#define T3_MAX_NUM_STAG (1<<15) + +#define T3_STAG_UNSET 0xffffffff + +#define T3_MAX_DEV_NAME_LEN 32 + +struct cxio_hal_ctrl_qp { + u32 wptr; + u32 rptr; + struct mtx lock; /* for the wtpr, can sleep */ +#ifdef notyet + DECLARE_PCI_UNMAP_ADDR(mapping) +#endif + union t3_wr *workq; /* the work request queue */ + bus_addr_t dma_addr; /* pci bus address of the workq */ + void /* __iomem */ *doorbell; +}; + +struct cxio_hal_resource { + struct buf_ring *tpt_fifo; + struct mtx tpt_fifo_lock; + struct buf_ring *qpid_fifo; + struct mtx qpid_fifo_lock; + struct buf_ring *cqid_fifo; + struct mtx cqid_fifo_lock; + struct buf_ring *pdid_fifo; + struct mtx pdid_fifo_lock; +}; + +struct cxio_qpid { + TAILQ_ENTRY(cxio_qpid) entry; + u32 qpid; +}; + +struct cxio_ucontext { + TAILQ_HEAD(, cxio_qpid) qpids; + struct mtx lock; +}; + +struct cxio_rdev { + char dev_name[T3_MAX_DEV_NAME_LEN]; + struct t3cdev *t3cdev_p; + struct rdma_info rnic_info; + struct adap_ports port_info; + struct cxio_hal_resource *rscp; + struct cxio_hal_ctrl_qp ctrl_qp; + void *ulp; + unsigned long qpshift; + u32 qpnr; + u32 qpmask; + struct cxio_ucontext uctx; + struct gen_pool *pbl_pool; + struct gen_pool *rqt_pool; + struct ifnet *ifp; + TAILQ_ENTRY(cxio_rdev) entry; +}; + +static __inline int +cxio_num_stags(struct cxio_rdev *rdev_p) +{ + return min((int)T3_MAX_NUM_STAG, (int)((rdev_p->rnic_info.tpt_top - rdev_p->rnic_info.tpt_base) >> 5)); +} + +typedef void (*cxio_hal_ev_callback_func_t) (struct cxio_rdev * rdev_p, + struct mbuf * m); + +#define RSPQ_CQID(rsp) (be32toh(rsp->cq_ptrid) & 0xffff) +#define RSPQ_CQPTR(rsp) ((be32toh(rsp->cq_ptrid) >> 16) & 0xffff) +#define RSPQ_GENBIT(rsp) ((be32toh(rsp->flags) >> 16) & 1) +#define RSPQ_OVERFLOW(rsp) ((be32toh(rsp->flags) >> 17) & 1) +#define RSPQ_AN(rsp) ((be32toh(rsp->flags) >> 18) & 1) +#define RSPQ_SE(rsp) ((be32toh(rsp->flags) >> 19) & 1) +#define RSPQ_NOTIFY(rsp) ((be32toh(rsp->flags) >> 20) & 1) +#define RSPQ_CQBRANCH(rsp) ((be32toh(rsp->flags) >> 21) & 1) +#define RSPQ_CREDIT_THRESH(rsp) ((be32toh(rsp->flags) >> 22) & 1) + +struct respQ_msg_t { + __be32 flags; /* flit 0 */ + __be32 cq_ptrid; + __be64 rsvd; /* flit 1 */ + struct t3_cqe cqe; /* flits 2-3 */ +}; + +enum t3_cq_opcode { + CQ_ARM_AN = 0x2, + CQ_ARM_SE = 0x6, + CQ_FORCE_AN = 0x3, + CQ_CREDIT_UPDATE = 0x7 +}; + +int cxio_rdev_open(struct cxio_rdev *rdev); +void cxio_rdev_close(struct cxio_rdev *rdev); +int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq, + enum t3_cq_opcode op, u32 credit); +int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq); +int cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq); +int cxio_resize_cq(struct cxio_rdev *rdev, struct t3_cq *cq); +void cxio_release_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx); +void cxio_init_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx); +int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq, + struct cxio_ucontext *uctx); +int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq, + struct cxio_ucontext *uctx); +int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode); +int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid, + enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, + u8 page_size, __be64 *pbl, u32 *pbl_size, + u32 *pbl_addr); +int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid, + enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len, + u8 page_size, __be64 *pbl, u32 *pbl_size, + u32 *pbl_addr); +int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size, + u32 pbl_addr); +int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid); +int cxio_deallocate_window(struct cxio_rdev *rdev, u32 stag); +int cxio_rdma_init(struct cxio_rdev *rdev, struct t3_rdma_init_attr *attr); +void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb); +void cxio_unregister_ev_cb(cxio_hal_ev_callback_func_t ev_cb); +u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp); +void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid); +int cxio_hal_init(void); +void cxio_hal_exit(void); +void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count); +void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count); +void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count); +void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count); +void cxio_flush_hw_cq(struct t3_cq *cq); +int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe, + u8 *cqe_flushed, u64 *cookie, u32 *credit); + +#define MOD "iw_cxgb: " + +#ifdef DEBUG +void cxio_dump_tpt(struct cxio_rdev *rev, u32 stag); +void cxio_dump_pbl(struct cxio_rdev *rev, u32 pbl_addr, uint32_t len, u8 shift); +void cxio_dump_wqe(union t3_wr *wqe); +void cxio_dump_wce(struct t3_cqe *wce); +void cxio_dump_rqt(struct cxio_rdev *rdev, u32 hwtid, int nents); +void cxio_dump_tcb(struct cxio_rdev *rdev, u32 hwtid); +#endif + + + static unsigned char hiBitSetTab[] = { + 0, 1, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7 + +}; + + +static __inline +int ilog2(unsigned long val) +{ + unsigned long tmp; + + tmp = val >> 24; + if (tmp) { + return hiBitSetTab[tmp] + 23; + } + tmp = (val >> 16) & 0xff; + if (tmp) { + return hiBitSetTab[tmp] + 15; + } + tmp = (val >> 8) & 0xff; + if (tmp) { + return hiBitSetTab[tmp] + 7; + + } + return hiBitSetTab[val & 0xff] - 1; +} + +#define cxfree(a) free((a), M_DEVBUF); +#define kmalloc(a, b) malloc((a), M_DEVBUF, (b)) +#define kzalloc(a, b) malloc((a), M_DEVBUF, (b)|M_ZERO) + +static __inline __attribute__((const)) +unsigned long roundup_pow_of_two(unsigned long n) +{ + return 1UL << flsl(n - 1); +} + +#define PAGE_ALIGN(x) roundup2((x), PAGE_SIZE) + +#include <sys/blist.h> +struct gen_pool { + blist_t gen_list; + daddr_t gen_base; + int gen_chunk_shift; + struct mtx gen_lock; +}; + +static __inline struct gen_pool * +gen_pool_create(daddr_t base, u_int chunk_shift, u_int len) +{ + struct gen_pool *gp; + + gp = malloc(sizeof(struct gen_pool), M_DEVBUF, M_NOWAIT); + if (gp == NULL) + return (NULL); + + gp->gen_list = blist_create(len >> chunk_shift, M_NOWAIT); + if (gp->gen_list == NULL) { + free(gp, M_DEVBUF); + return (NULL); + } + blist_free(gp->gen_list, 0, len >> chunk_shift); + gp->gen_base = base; + gp->gen_chunk_shift = chunk_shift; + mtx_init(&gp->gen_lock, "genpool", NULL, MTX_DUPOK|MTX_DEF); + + return (gp); +} + +static __inline unsigned long +gen_pool_alloc(struct gen_pool *gp, int size) +{ + int chunks; + daddr_t blkno; + + chunks = (size + (1<<gp->gen_chunk_shift) - 1) >> gp->gen_chunk_shift; + mtx_lock(&gp->gen_lock); + blkno = blist_alloc(gp->gen_list, chunks); + mtx_unlock(&gp->gen_lock); + + if (blkno == SWAPBLK_NONE) + return (0); + + return (gp->gen_base + ((1 << gp->gen_chunk_shift) * blkno)); +} + +static __inline void +gen_pool_free(struct gen_pool *gp, daddr_t address, int size) +{ + int chunks; + daddr_t blkno; + + chunks = (size + (1<<gp->gen_chunk_shift) - 1) >> gp->gen_chunk_shift; + blkno = (address - gp->gen_base) / (1 << gp->gen_chunk_shift); + mtx_lock(&gp->gen_lock); + blist_free(gp->gen_list, blkno, chunks); + mtx_unlock(&gp->gen_lock); +} + +static __inline void +gen_pool_destroy(struct gen_pool *gp) +{ + blist_destroy(gp->gen_list); + free(gp, M_DEVBUF); +} + +#define cxio_wait(ctx, lockp, cond) \ +({ \ + int __ret = 0; \ + mtx_lock(lockp); \ + while (!cond) { \ + msleep(ctx, lockp, 0, "cxio_wait", hz); \ + if (SIGPENDING(curthread)) { \ + __ret = ERESTART; \ + break; \ + } \ + } \ + mtx_unlock(lockp); \ + __ret; \ +}) +extern struct cxio_rdev *cxio_hal_find_rdev_by_t3cdev(struct t3cdev *tdev); + +#define KTR_IW_CXGB KTR_SPARE4 + +#endif diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_mem.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_mem.c new file mode 100644 index 0000000..df06f87 --- /dev/null +++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_mem.c @@ -0,0 +1,219 @@ +/************************************************************************** + +Copyright (c) 2007, Chelsio Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Neither the name of the Chelsio Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +***************************************************************************/ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/bus.h> +#include <sys/module.h> +#include <sys/pciio.h> +#include <sys/conf.h> +#include <machine/bus.h> +#include <machine/resource.h> +#include <sys/bus_dma.h> +#include <sys/rman.h> +#include <sys/ioccom.h> +#include <sys/mbuf.h> +#include <sys/mutex.h> +#include <sys/rwlock.h> +#include <sys/linker.h> +#include <sys/firmware.h> +#include <sys/socket.h> +#include <sys/sockio.h> +#include <sys/smp.h> +#include <sys/sysctl.h> +#include <sys/syslog.h> +#include <sys/queue.h> +#include <sys/taskqueue.h> +#include <sys/proc.h> +#include <sys/queue.h> +#include <sys/libkern.h> + +#include <netinet/in.h> + +#include <contrib/rdma/ib_verbs.h> +#include <contrib/rdma/ib_umem.h> +#include <contrib/rdma/ib_user_verbs.h> + + +#ifdef CONFIG_DEFINED +#include <cxgb_include.h> +#include <ulp/iw_cxgb/iw_cxgb_wr.h> +#include <ulp/iw_cxgb/iw_cxgb_hal.h> +#include <ulp/iw_cxgb/iw_cxgb_provider.h> +#include <ulp/iw_cxgb/iw_cxgb_cm.h> +#include <ulp/iw_cxgb/iw_cxgb.h> +#include <ulp/iw_cxgb/iw_cxgb_resource.h> +#include <ulp/iw_cxgb/iw_cxgb_user.h> +#else +#include <dev/cxgb/cxgb_include.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_user.h> +#endif + + +int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, + struct iwch_mr *mhp, + int shift, + __be64 *page_list) +{ + u32 stag; + u32 mmid; + + + if (cxio_register_phys_mem(&rhp->rdev, + &stag, mhp->attr.pdid, + mhp->attr.perms, + mhp->attr.zbva, + mhp->attr.va_fbo, + mhp->attr.len, + shift-12, + page_list, + &mhp->attr.pbl_size, &mhp->attr.pbl_addr)) + return (-ENOMEM); + mhp->attr.state = 1; + mhp->attr.stag = stag; + mmid = stag >> 8; + mhp->ibmr.rkey = mhp->ibmr.lkey = stag; + insert_handle(rhp, &rhp->mmidr, mhp, mmid); + CTR3(KTR_IW_CXGB, "%s mmid 0x%x mhp %p", __FUNCTION__, mmid, mhp); + return 0; +} + +int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php, + struct iwch_mr *mhp, + int shift, + __be64 *page_list, + int npages) +{ + u32 stag; + u32 mmid; + + + /* We could support this... */ + if (npages > mhp->attr.pbl_size) + return (-ENOMEM); + + stag = mhp->attr.stag; + if (cxio_reregister_phys_mem(&rhp->rdev, + &stag, mhp->attr.pdid, + mhp->attr.perms, + mhp->attr.zbva, + mhp->attr.va_fbo, + mhp->attr.len, + shift-12, + page_list, + &mhp->attr.pbl_size, &mhp->attr.pbl_addr)) + return (-ENOMEM); + mhp->attr.state = 1; + mhp->attr.stag = stag; + mmid = stag >> 8; + mhp->ibmr.rkey = mhp->ibmr.lkey = stag; + insert_handle(rhp, &rhp->mmidr, mhp, mmid); + CTR3(KTR_IW_CXGB, "%s mmid 0x%x mhp %p", __FUNCTION__, mmid, mhp); + return 0; +} + +int build_phys_page_list(struct ib_phys_buf *buffer_list, + int num_phys_buf, + u64 *iova_start, + u64 *total_size, + int *npages, + int *shift, + __be64 **page_list) +{ + u64 mask; + int i, j, n; + + mask = 0; + *total_size = 0; + for (i = 0; i < num_phys_buf; ++i) { + if (i != 0 && buffer_list[i].addr & ~PAGE_MASK) + return (-EINVAL); + if (i != 0 && i != num_phys_buf - 1 && + (buffer_list[i].size & ~PAGE_MASK)) + return (-EINVAL); + *total_size += buffer_list[i].size; + if (i > 0) + mask |= buffer_list[i].addr; + else + mask |= buffer_list[i].addr & PAGE_MASK; + if (i != num_phys_buf - 1) + mask |= buffer_list[i].addr + buffer_list[i].size; + else + mask |= (buffer_list[i].addr + buffer_list[i].size + + PAGE_SIZE - 1) & PAGE_MASK; + } + + if (*total_size > 0xFFFFFFFFULL) + return (-ENOMEM); + + /* Find largest page shift we can use to cover buffers */ + for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift)) + if ((1ULL << *shift) & mask) + break; + + buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1); + buffer_list[0].addr &= ~0ull << *shift; + + *npages = 0; + for (i = 0; i < num_phys_buf; ++i) + *npages += (buffer_list[i].size + + (1ULL << *shift) - 1) >> *shift; + + if (!*npages) + return (-EINVAL); + + *page_list = kmalloc(sizeof(u64) * *npages, M_NOWAIT); + if (!*page_list) + return (-ENOMEM); + + n = 0; + for (i = 0; i < num_phys_buf; ++i) + for (j = 0; + j < (buffer_list[i].size + (1ULL << *shift) - 1) >> *shift; + ++j) + (*page_list)[n++] = htobe64(buffer_list[i].addr + + ((u64) j << *shift)); + + CTR6(KTR_IW_CXGB, "%s va 0x%llx mask 0x%llx shift %d len %lld pbl_size %d", + __FUNCTION__, (unsigned long long) *iova_start, + (unsigned long long) mask, *shift, (unsigned long long) *total_size, + *npages); + + return 0; + +} diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c new file mode 100644 index 0000000..4ef7dc5 --- /dev/null +++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.c @@ -0,0 +1,1295 @@ +/************************************************************************** + +Copyright (c) 2007, Chelsio Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Neither the name of the Chelsio Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +***************************************************************************/ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/bus.h> +#include <sys/module.h> +#include <sys/pciio.h> +#include <sys/conf.h> +#include <machine/bus.h> +#include <machine/resource.h> +#include <sys/bus_dma.h> +#include <sys/rman.h> +#include <sys/ioccom.h> +#include <sys/mbuf.h> +#include <sys/mutex.h> +#include <sys/rwlock.h> +#include <sys/linker.h> +#include <sys/firmware.h> +#include <sys/socket.h> +#include <sys/sockio.h> +#include <sys/smp.h> +#include <sys/sysctl.h> +#include <sys/syslog.h> +#include <sys/queue.h> +#include <sys/taskqueue.h> +#include <sys/proc.h> +#include <sys/queue.h> + +#include <netinet/in.h> + + +#include <vm/vm.h> +#include <vm/pmap.h> + +#include <contrib/rdma/ib_verbs.h> +#include <contrib/rdma/ib_umem.h> +#include <contrib/rdma/ib_user_verbs.h> + + + + +#ifdef CONFIG_DEFINED +#include <cxgb_include.h> +#include <ulp/iw_cxgb/iw_cxgb_wr.h> +#include <ulp/iw_cxgb/iw_cxgb_hal.h> +#include <ulp/iw_cxgb/iw_cxgb_provider.h> +#include <ulp/iw_cxgb/iw_cxgb_cm.h> +#include <ulp/iw_cxgb/iw_cxgb.h> +#include <ulp/iw_cxgb/iw_cxgb_resource.h> +#include <ulp/iw_cxgb/iw_cxgb_user.h> +#else +#include <dev/cxgb/cxgb_include.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_user.h> +#endif + + +static int +iwch_modify_port(struct ib_device *ibdev, + u8 port, int port_modify_mask, + struct ib_port_modify *props) +{ + return (-ENOSYS); +} + +static struct ib_ah * +iwch_ah_create(struct ib_pd *pd, + struct ib_ah_attr *ah_attr) +{ + return ERR_PTR(-ENOSYS); +} + +static int +iwch_ah_destroy(struct ib_ah *ah) +{ + return (-ENOSYS); +} + +static int iwch_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + return (-ENOSYS); +} + +static int +iwch_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) +{ + return (-ENOSYS); +} + +static int +iwch_process_mad(struct ib_device *ibdev, + int mad_flags, + u8 port_num, + struct ib_wc *in_wc, + struct ib_grh *in_grh, + struct ib_mad *in_mad, struct ib_mad *out_mad) +{ + return (-ENOSYS); +} + +static int +iwch_dealloc_ucontext(struct ib_ucontext *context) +{ + struct iwch_dev *rhp = to_iwch_dev(context->device); + struct iwch_ucontext *ucontext = to_iwch_ucontext(context); + struct iwch_mm_entry *mm, *tmp; + + CTR2(KTR_IW_CXGB, "%s context %p", __FUNCTION__, context); + TAILQ_FOREACH_SAFE(mm, &ucontext->mmaps, entry, tmp) { + TAILQ_REMOVE(&ucontext->mmaps, mm, entry); + cxfree(mm); + } + cxio_release_ucontext(&rhp->rdev, &ucontext->uctx); + cxfree(ucontext); + return 0; +} + +static struct ib_ucontext * +iwch_alloc_ucontext(struct ib_device *ibdev, struct ib_udata *udata) +{ + struct iwch_ucontext *context; + struct iwch_dev *rhp = to_iwch_dev(ibdev); + + CTR2(KTR_IW_CXGB, "%s ibdev %p", __FUNCTION__, ibdev); + context = malloc(sizeof(*context), M_DEVBUF, M_ZERO|M_NOWAIT); + if (!context) + return ERR_PTR(-ENOMEM); + cxio_init_ucontext(&rhp->rdev, &context->uctx); + TAILQ_INIT(&context->mmaps); + mtx_init(&context->mmap_lock, "ucontext mmap", NULL, MTX_DEF); + return &context->ibucontext; +} + +static int +iwch_destroy_cq(struct ib_cq *ib_cq) +{ + struct iwch_cq *chp; + + CTR2(KTR_IW_CXGB, "%s ib_cq %p", __FUNCTION__, ib_cq); + chp = to_iwch_cq(ib_cq); + + remove_handle(chp->rhp, &chp->rhp->cqidr, chp->cq.cqid); + mtx_lock(&chp->lock); + if (--chp->refcnt) + msleep(chp, &chp->lock, 0, "iwch_destroy_cq", 0); + mtx_unlock(&chp->lock); + + cxio_destroy_cq(&chp->rhp->rdev, &chp->cq); + cxfree(chp); + return 0; +} + +static struct ib_cq * +iwch_create_cq(struct ib_device *ibdev, int entries, int vector, + struct ib_ucontext *ib_context, + struct ib_udata *udata) +{ + struct iwch_dev *rhp; + struct iwch_cq *chp; + struct iwch_create_cq_resp uresp; + struct iwch_create_cq_req ureq; + struct iwch_ucontext *ucontext = NULL; + + CTR3(KTR_IW_CXGB, "%s ib_dev %p entries %d", __FUNCTION__, ibdev, entries); + rhp = to_iwch_dev(ibdev); + chp = malloc(sizeof(*chp), M_DEVBUF, M_NOWAIT|M_ZERO); + if (!chp) { + return ERR_PTR(-ENOMEM); + } + if (ib_context) { + ucontext = to_iwch_ucontext(ib_context); + if (!t3a_device(rhp)) { + if (ib_copy_from_udata(&ureq, udata, sizeof (ureq))) { + cxfree(chp); + return ERR_PTR(-EFAULT); + } + chp->user_rptr_addr = (u32 /*__user */*)(unsigned long)ureq.user_rptr_addr; + } + } + + if (t3a_device(rhp)) { + + /* + * T3A: Add some fluff to handle extra CQEs inserted + * for various errors. + * Additional CQE possibilities: + * TERMINATE, + * incoming RDMA WRITE Failures + * incoming RDMA READ REQUEST FAILUREs + * NOTE: We cannot ensure the CQ won't overflow. + */ + entries += 16; + } + entries = roundup_pow_of_two(entries); + chp->cq.size_log2 = ilog2(entries); + + if (cxio_create_cq(&rhp->rdev, &chp->cq)) { + cxfree(chp); + return ERR_PTR(-ENOMEM); + } + chp->rhp = rhp; + chp->ibcq.cqe = 1 << chp->cq.size_log2; + mtx_init(&chp->lock, "cxgb cq", NULL, MTX_DEF|MTX_DUPOK); + chp->refcnt = 1; + insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid); + + if (ucontext) { + struct iwch_mm_entry *mm; + + mm = kmalloc(sizeof *mm, M_NOWAIT); + if (!mm) { + iwch_destroy_cq(&chp->ibcq); + return ERR_PTR(-ENOMEM); + } + uresp.cqid = chp->cq.cqid; + uresp.size_log2 = chp->cq.size_log2; + mtx_lock(&ucontext->mmap_lock); + uresp.key = ucontext->key; + ucontext->key += PAGE_SIZE; + mtx_unlock(&ucontext->mmap_lock); + if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) { + cxfree(mm); + iwch_destroy_cq(&chp->ibcq); + return ERR_PTR(-EFAULT); + } + mm->key = uresp.key; + mm->addr = vtophys(chp->cq.queue); + mm->len = PAGE_ALIGN((1UL << uresp.size_log2) * + sizeof (struct t3_cqe)); + insert_mmap(ucontext, mm); + } + CTR4(KTR_IW_CXGB, "created cqid 0x%0x chp %p size 0x%0x, dma_addr 0x%0llx", + chp->cq.cqid, chp, (1 << chp->cq.size_log2), + (unsigned long long) chp->cq.dma_addr); + return &chp->ibcq; +} + +static int +iwch_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) +{ +#ifdef notyet + struct iwch_cq *chp = to_iwch_cq(cq); + struct t3_cq oldcq, newcq; + int ret; + + CTR3(KTR_IW_CXGB, "%s ib_cq %p cqe %d", __FUNCTION__, cq, cqe); + + /* We don't downsize... */ + if (cqe <= cq->cqe) + return 0; + + /* create new t3_cq with new size */ + cqe = roundup_pow_of_two(cqe+1); + newcq.size_log2 = ilog2(cqe); + + /* Dont allow resize to less than the current wce count */ + if (cqe < Q_COUNT(chp->cq.rptr, chp->cq.wptr)) { + return (-ENOMEM); + } + + /* Quiesce all QPs using this CQ */ + ret = iwch_quiesce_qps(chp); + if (ret) { + return (ret); + } + + ret = cxio_create_cq(&chp->rhp->rdev, &newcq); + if (ret) { + return (ret); + } + + /* copy CQEs */ + memcpy(newcq.queue, chp->cq.queue, (1 << chp->cq.size_log2) * + sizeof(struct t3_cqe)); + + /* old iwch_qp gets new t3_cq but keeps old cqid */ + oldcq = chp->cq; + chp->cq = newcq; + chp->cq.cqid = oldcq.cqid; + + /* resize new t3_cq to update the HW context */ + ret = cxio_resize_cq(&chp->rhp->rdev, &chp->cq); + if (ret) { + chp->cq = oldcq; + return ret; + } + chp->ibcq.cqe = (1<<chp->cq.size_log2) - 1; + + /* destroy old t3_cq */ + oldcq.cqid = newcq.cqid; + ret = cxio_destroy_cq(&chp->rhp->rdev, &oldcq); + if (ret) { + log(LOG_ERR, "%s - cxio_destroy_cq failed %d\n", + __FUNCTION__, ret); + } + + /* add user hooks here */ + + /* resume qps */ + ret = iwch_resume_qps(chp); + return ret; +#else + return (-ENOSYS); +#endif +} + +static int +iwch_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) +{ + struct iwch_dev *rhp; + struct iwch_cq *chp; + enum t3_cq_opcode cq_op; + int err; + u32 rptr; + + chp = to_iwch_cq(ibcq); + rhp = chp->rhp; + if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED) + cq_op = CQ_ARM_SE; + else + cq_op = CQ_ARM_AN; + if (chp->user_rptr_addr) { + if (copyin(&rptr, chp->user_rptr_addr, 4)) + return (-EFAULT); + mtx_lock(&chp->lock); + chp->cq.rptr = rptr; + } else + mtx_lock(&chp->lock); + CTR2(KTR_IW_CXGB, "%s rptr 0x%x", __FUNCTION__, chp->cq.rptr); + err = cxio_hal_cq_op(&rhp->rdev, &chp->cq, cq_op, 0); + mtx_unlock(&chp->lock); + if (err < 0) + log(LOG_ERR, "Error %d rearming CQID 0x%x\n", err, + chp->cq.cqid); + if (err > 0 && !(flags & IB_CQ_REPORT_MISSED_EVENTS)) + err = 0; + return err; +} + +#ifdef notyet +static int +iwch_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +{ +#ifdef notyet + int len = vma->vm_end - vma->vm_start; + u32 key = vma->vm_pgoff << PAGE_SHIFT; + struct cxio_rdev *rdev_p; + int ret = 0; + struct iwch_mm_entry *mm; + struct iwch_ucontext *ucontext; + u64 addr; + + CTR4(KTR_IW_CXGB, "%s pgoff 0x%lx key 0x%x len %d", __FUNCTION__, vma->vm_pgoff, + key, len); + + if (vma->vm_start & (PAGE_SIZE-1)) { + return (-EINVAL); + } + + rdev_p = &(to_iwch_dev(context->device)->rdev); + ucontext = to_iwch_ucontext(context); + + mm = remove_mmap(ucontext, key, len); + if (!mm) + return (-EINVAL); + addr = mm->addr; + cxfree(mm); + + if ((addr >= rdev_p->rnic_info.udbell_physbase) && + (addr < (rdev_p->rnic_info.udbell_physbase + + rdev_p->rnic_info.udbell_len))) { + + /* + * Map T3 DB register. + */ + if (vma->vm_flags & VM_READ) { + return (-EPERM); + } + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; + vma->vm_flags &= ~VM_MAYREAD; + ret = io_remap_pfn_range(vma, vma->vm_start, + addr >> PAGE_SHIFT, + len, vma->vm_page_prot); + } else { + + /* + * Map WQ or CQ contig dma memory... + */ + ret = remap_pfn_range(vma, vma->vm_start, + addr >> PAGE_SHIFT, + len, vma->vm_page_prot); + } + + return ret; +#endif + return (0); +} +#endif + +static int iwch_deallocate_pd(struct ib_pd *pd) +{ + struct iwch_dev *rhp; + struct iwch_pd *php; + + php = to_iwch_pd(pd); + rhp = php->rhp; + CTR3(KTR_IW_CXGB, "%s ibpd %p pdid 0x%x", __FUNCTION__, pd, php->pdid); + cxio_hal_put_pdid(rhp->rdev.rscp, php->pdid); + cxfree(php); + return 0; +} + +static struct ib_pd *iwch_allocate_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct iwch_pd *php; + u32 pdid; + struct iwch_dev *rhp; + + CTR2(KTR_IW_CXGB, "%s ibdev %p", __FUNCTION__, ibdev); + rhp = (struct iwch_dev *) ibdev; + pdid = cxio_hal_get_pdid(rhp->rdev.rscp); + if (!pdid) + return ERR_PTR(-EINVAL); + php = malloc(sizeof(*php), M_DEVBUF, M_ZERO|M_NOWAIT); + if (!php) { + cxio_hal_put_pdid(rhp->rdev.rscp, pdid); + return ERR_PTR(-ENOMEM); + } + php->pdid = pdid; + php->rhp = rhp; + if (context) { + if (ib_copy_to_udata(udata, &php->pdid, sizeof (__u32))) { + iwch_deallocate_pd(&php->ibpd); + return ERR_PTR(-EFAULT); + } + } + CTR3(KTR_IW_CXGB, "%s pdid 0x%0x ptr 0x%p", __FUNCTION__, pdid, php); + return &php->ibpd; +} + +static int iwch_dereg_mr(struct ib_mr *ib_mr) +{ + struct iwch_dev *rhp; + struct iwch_mr *mhp; + u32 mmid; + + CTR2(KTR_IW_CXGB, "%s ib_mr %p", __FUNCTION__, ib_mr); + /* There can be no memory windows */ + if (atomic_load_acq_int(&ib_mr->usecnt)) + return (-EINVAL); + + mhp = to_iwch_mr(ib_mr); + rhp = mhp->rhp; + mmid = mhp->attr.stag >> 8; + cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, + mhp->attr.pbl_addr); + remove_handle(rhp, &rhp->mmidr, mmid); + if (mhp->kva) + cxfree((void *) (unsigned long) mhp->kva); + if (mhp->umem) + ib_umem_release(mhp->umem); + CTR3(KTR_IW_CXGB, "%s mmid 0x%x ptr %p", __FUNCTION__, mmid, mhp); + cxfree(mhp); + return 0; +} + +static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd, + struct ib_phys_buf *buffer_list, + int num_phys_buf, + int acc, + u64 *iova_start) +{ + __be64 *page_list; + int shift; + u64 total_size; + int npages; + struct iwch_dev *rhp; + struct iwch_pd *php; + struct iwch_mr *mhp; + int ret; + + CTR2(KTR_IW_CXGB, "%s ib_pd %p", __FUNCTION__, pd); + php = to_iwch_pd(pd); + rhp = php->rhp; + + mhp = malloc(sizeof(*mhp), M_DEVBUF, M_ZERO|M_NOWAIT); + if (!mhp) + return ERR_PTR(-ENOMEM); + + /* First check that we have enough alignment */ + if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) { + ret = -EINVAL; + goto err; + } + + if (num_phys_buf > 1 && + ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK)) { + ret = -EINVAL; + goto err; + } + + ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start, + &total_size, &npages, &shift, &page_list); + if (ret) + goto err; + + mhp->rhp = rhp; + mhp->attr.pdid = php->pdid; + mhp->attr.zbva = 0; + + mhp->attr.perms = iwch_ib_to_tpt_access(acc); + mhp->attr.va_fbo = *iova_start; + mhp->attr.page_size = shift - 12; + + mhp->attr.len = (u32) total_size; + mhp->attr.pbl_size = npages; + ret = iwch_register_mem(rhp, php, mhp, shift, page_list); + cxfree(page_list); + if (ret) { + goto err; + } + return &mhp->ibmr; +err: + cxfree(mhp); + return ERR_PTR(-ret); + +} + +static int iwch_reregister_phys_mem(struct ib_mr *mr, + int mr_rereg_mask, + struct ib_pd *pd, + struct ib_phys_buf *buffer_list, + int num_phys_buf, + int acc, u64 * iova_start) +{ + + struct iwch_mr mh, *mhp; + struct iwch_pd *php; + struct iwch_dev *rhp; + __be64 *page_list = NULL; + int shift = 0; + u64 total_size; + int npages; + int ret; + + CTR3(KTR_IW_CXGB, "%s ib_mr %p ib_pd %p", __FUNCTION__, mr, pd); + + /* There can be no memory windows */ + if (atomic_load_acq_int(&mr->usecnt)) + return (-EINVAL); + + mhp = to_iwch_mr(mr); + rhp = mhp->rhp; + php = to_iwch_pd(mr->pd); + + /* make sure we are on the same adapter */ + if (rhp != php->rhp) + return (-EINVAL); + + memcpy(&mh, mhp, sizeof *mhp); + + if (mr_rereg_mask & IB_MR_REREG_PD) + php = to_iwch_pd(pd); + if (mr_rereg_mask & IB_MR_REREG_ACCESS) + mh.attr.perms = iwch_ib_to_tpt_access(acc); + if (mr_rereg_mask & IB_MR_REREG_TRANS) { + ret = build_phys_page_list(buffer_list, num_phys_buf, + iova_start, + &total_size, &npages, + &shift, &page_list); + if (ret) + return ret; + } + + ret = iwch_reregister_mem(rhp, php, &mh, shift, page_list, npages); + cxfree(page_list); + if (ret) { + return ret; + } + if (mr_rereg_mask & IB_MR_REREG_PD) + mhp->attr.pdid = php->pdid; + if (mr_rereg_mask & IB_MR_REREG_ACCESS) + mhp->attr.perms = iwch_ib_to_tpt_access(acc); + if (mr_rereg_mask & IB_MR_REREG_TRANS) { + mhp->attr.zbva = 0; + mhp->attr.va_fbo = *iova_start; + mhp->attr.page_size = shift - 12; + mhp->attr.len = (u32) total_size; + mhp->attr.pbl_size = npages; + } + + return 0; +} + + +static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt, int acc, struct ib_udata *udata) +{ + __be64 *pages; + int shift, i, n; + int err = 0; + struct ib_umem_chunk *chunk; + struct iwch_dev *rhp; + struct iwch_pd *php; + struct iwch_mr *mhp; + struct iwch_reg_user_mr_resp uresp; +#ifdef notyet + int j, k, len; +#endif + + CTR2(KTR_IW_CXGB, "%s ib_pd %p", __FUNCTION__, pd); + + php = to_iwch_pd(pd); + rhp = php->rhp; + mhp = malloc(sizeof(*mhp), M_DEVBUF, M_NOWAIT|M_ZERO); + if (!mhp) + return ERR_PTR(-ENOMEM); + + mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc); + if (IS_ERR(mhp->umem)) { + err = PTR_ERR(mhp->umem); + cxfree(mhp); + return ERR_PTR(-err); + } + + shift = ffs(mhp->umem->page_size) - 1; + + n = 0; + TAILQ_FOREACH(chunk, &mhp->umem->chunk_list, entry) + n += chunk->nents; + + pages = kmalloc(n * sizeof(u64), M_NOWAIT); + if (!pages) { + err = -ENOMEM; + goto err; + } + + i = n = 0; + +#if 0 + TAILQ_FOREACH(chunk, &mhp->umem->chunk_list, entry) + for (j = 0; j < chunk->nmap; ++j) { + len = sg_dma_len(&chunk->page_list[j]) >> shift; + for (k = 0; k < len; ++k) { + pages[i++] = htobe64(sg_dma_address( + &chunk->page_list[j]) + + mhp->umem->page_size * k); + } + } +#endif + mhp->rhp = rhp; + mhp->attr.pdid = php->pdid; + mhp->attr.zbva = 0; + mhp->attr.perms = iwch_ib_to_tpt_access(acc); + mhp->attr.va_fbo = virt; + mhp->attr.page_size = shift - 12; + mhp->attr.len = (u32) length; + mhp->attr.pbl_size = i; + err = iwch_register_mem(rhp, php, mhp, shift, pages); + cxfree(pages); + if (err) + goto err; + + if (udata && !t3a_device(rhp)) { + uresp.pbl_addr = (mhp->attr.pbl_addr - + rhp->rdev.rnic_info.pbl_base) >> 3; + CTR2(KTR_IW_CXGB, "%s user resp pbl_addr 0x%x", __FUNCTION__, + uresp.pbl_addr); + + if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) { + iwch_dereg_mr(&mhp->ibmr); + err = EFAULT; + goto err; + } + } + + return &mhp->ibmr; + +err: + ib_umem_release(mhp->umem); + cxfree(mhp); + return ERR_PTR(-err); +} + +static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc) +{ + struct ib_phys_buf bl; + u64 kva; + struct ib_mr *ibmr; + + CTR2(KTR_IW_CXGB, "%s ib_pd %p", __FUNCTION__, pd); + + /* + * T3 only supports 32 bits of size. + */ + bl.size = 0xffffffff; + bl.addr = 0; + kva = 0; + ibmr = iwch_register_phys_mem(pd, &bl, 1, acc, &kva); + return ibmr; +} + +static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd) +{ + struct iwch_dev *rhp; + struct iwch_pd *php; + struct iwch_mw *mhp; + u32 mmid; + u32 stag = 0; + int ret; + + php = to_iwch_pd(pd); + rhp = php->rhp; + mhp = malloc(sizeof(*mhp), M_DEVBUF, M_ZERO|M_NOWAIT); + if (!mhp) + return ERR_PTR(-ENOMEM); + ret = cxio_allocate_window(&rhp->rdev, &stag, php->pdid); + if (ret) { + cxfree(mhp); + return ERR_PTR(-ret); + } + mhp->rhp = rhp; + mhp->attr.pdid = php->pdid; + mhp->attr.type = TPT_MW; + mhp->attr.stag = stag; + mmid = (stag) >> 8; + insert_handle(rhp, &rhp->mmidr, mhp, mmid); + CTR4(KTR_IW_CXGB, "%s mmid 0x%x mhp %p stag 0x%x", __FUNCTION__, mmid, mhp, stag); + return &(mhp->ibmw); +} + +static int iwch_dealloc_mw(struct ib_mw *mw) +{ + struct iwch_dev *rhp; + struct iwch_mw *mhp; + u32 mmid; + + mhp = to_iwch_mw(mw); + rhp = mhp->rhp; + mmid = (mw->rkey) >> 8; + cxio_deallocate_window(&rhp->rdev, mhp->attr.stag); + remove_handle(rhp, &rhp->mmidr, mmid); + cxfree(mhp); + CTR4(KTR_IW_CXGB, "%s ib_mw %p mmid 0x%x ptr %p", __FUNCTION__, mw, mmid, mhp); + return 0; +} + +static int iwch_destroy_qp(struct ib_qp *ib_qp) +{ + struct iwch_dev *rhp; + struct iwch_qp *qhp; + struct iwch_qp_attributes attrs; + struct iwch_ucontext *ucontext; + + qhp = to_iwch_qp(ib_qp); + rhp = qhp->rhp; + + attrs.next_state = IWCH_QP_STATE_ERROR; + iwch_modify_qp(rhp, qhp, IWCH_QP_ATTR_NEXT_STATE, &attrs, 0); + mtx_lock(&qhp->lock); + if (qhp->ep) + msleep(qhp, &qhp->lock, 0, "iwch_destroy_qp1", 0); + mtx_unlock(&qhp->lock); + + remove_handle(rhp, &rhp->qpidr, qhp->wq.qpid); + + mtx_lock(&qhp->lock); + if (--qhp->refcnt) + msleep(qhp, &qhp->lock, 0, "iwch_destroy_qp2", 0); + mtx_unlock(&qhp->lock); + + ucontext = ib_qp->uobject ? to_iwch_ucontext(ib_qp->uobject->context) + : NULL; + cxio_destroy_qp(&rhp->rdev, &qhp->wq, + ucontext ? &ucontext->uctx : &rhp->rdev.uctx); + + CTR4(KTR_IW_CXGB, "%s ib_qp %p qpid 0x%0x qhp %p", __FUNCTION__, + ib_qp, qhp->wq.qpid, qhp); + cxfree(qhp); + return 0; +} + +static struct ib_qp *iwch_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *attrs, + struct ib_udata *udata) +{ + struct iwch_dev *rhp; + struct iwch_qp *qhp; + struct iwch_pd *php; + struct iwch_cq *schp; + struct iwch_cq *rchp; + struct iwch_create_qp_resp uresp; + int wqsize, sqsize, rqsize; + struct iwch_ucontext *ucontext; + + CTR2(KTR_IW_CXGB, "%s ib_pd %p", __FUNCTION__, pd); + if (attrs->qp_type != IB_QPT_RC) + return ERR_PTR(-EINVAL); + php = to_iwch_pd(pd); + rhp = php->rhp; + schp = get_chp(rhp, ((struct iwch_cq *) attrs->send_cq)->cq.cqid); + rchp = get_chp(rhp, ((struct iwch_cq *) attrs->recv_cq)->cq.cqid); + if (!schp || !rchp) + return ERR_PTR(-EINVAL); + + /* The RQT size must be # of entries + 1 rounded up to a power of two */ + rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr); + if (rqsize == attrs->cap.max_recv_wr) + rqsize = roundup_pow_of_two(attrs->cap.max_recv_wr+1); + + /* T3 doesn't support RQT depth < 16 */ + if (rqsize < 16) + rqsize = 16; + + if (rqsize > T3_MAX_RQ_SIZE) + return ERR_PTR(-EINVAL); + + if (attrs->cap.max_inline_data > T3_MAX_INLINE) + return ERR_PTR(-EINVAL); + + /* + * NOTE: The SQ and total WQ sizes don't need to be + * a power of two. However, all the code assumes + * they are. EG: Q_FREECNT() and friends. + */ + sqsize = roundup_pow_of_two(attrs->cap.max_send_wr); + wqsize = roundup_pow_of_two(rqsize + sqsize); + CTR4(KTR_IW_CXGB, "%s wqsize %d sqsize %d rqsize %d", __FUNCTION__, + wqsize, sqsize, rqsize); + qhp = malloc(sizeof(*qhp), M_DEVBUF, M_ZERO|M_NOWAIT); + if (!qhp) + return ERR_PTR(-ENOMEM); + qhp->wq.size_log2 = ilog2(wqsize); + qhp->wq.rq_size_log2 = ilog2(rqsize); + qhp->wq.sq_size_log2 = ilog2(sqsize); + ucontext = pd->uobject ? to_iwch_ucontext(pd->uobject->context) : NULL; + if (cxio_create_qp(&rhp->rdev, !udata, &qhp->wq, + ucontext ? &ucontext->uctx : &rhp->rdev.uctx)) { + cxfree(qhp); + return ERR_PTR(-ENOMEM); + } + + attrs->cap.max_recv_wr = rqsize - 1; + attrs->cap.max_send_wr = sqsize; + attrs->cap.max_inline_data = T3_MAX_INLINE; + + qhp->rhp = rhp; + qhp->attr.pd = php->pdid; + qhp->attr.scq = ((struct iwch_cq *) attrs->send_cq)->cq.cqid; + qhp->attr.rcq = ((struct iwch_cq *) attrs->recv_cq)->cq.cqid; + qhp->attr.sq_num_entries = attrs->cap.max_send_wr; + qhp->attr.rq_num_entries = attrs->cap.max_recv_wr; + qhp->attr.sq_max_sges = attrs->cap.max_send_sge; + qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge; + qhp->attr.rq_max_sges = attrs->cap.max_recv_sge; + qhp->attr.state = IWCH_QP_STATE_IDLE; + qhp->attr.next_state = IWCH_QP_STATE_IDLE; + + /* + * XXX - These don't get passed in from the openib user + * at create time. The CM sets them via a QP modify. + * Need to fix... I think the CM should + */ + qhp->attr.enable_rdma_read = 1; + qhp->attr.enable_rdma_write = 1; + qhp->attr.enable_bind = 1; + qhp->attr.max_ord = 1; + qhp->attr.max_ird = 1; + + mtx_init(&qhp->lock, "cxgb qp", NULL, MTX_DEF|MTX_DUPOK); + qhp->refcnt = 1; + insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.qpid); + + if (udata) { + + struct iwch_mm_entry *mm1, *mm2; + + mm1 = kmalloc(sizeof *mm1, M_NOWAIT); + if (!mm1) { + iwch_destroy_qp(&qhp->ibqp); + return ERR_PTR(-ENOMEM); + } + + mm2 = kmalloc(sizeof *mm2, M_NOWAIT); + if (!mm2) { + cxfree(mm1); + iwch_destroy_qp(&qhp->ibqp); + return ERR_PTR(-ENOMEM); + } + + uresp.qpid = qhp->wq.qpid; + uresp.size_log2 = qhp->wq.size_log2; + uresp.sq_size_log2 = qhp->wq.sq_size_log2; + uresp.rq_size_log2 = qhp->wq.rq_size_log2; + mtx_lock(&ucontext->mmap_lock); + uresp.key = ucontext->key; + ucontext->key += PAGE_SIZE; + uresp.db_key = ucontext->key; + ucontext->key += PAGE_SIZE; + mtx_unlock(&ucontext->mmap_lock); + if (ib_copy_to_udata(udata, &uresp, sizeof (uresp))) { + cxfree(mm1); + cxfree(mm2); + iwch_destroy_qp(&qhp->ibqp); + return ERR_PTR(-EFAULT); + } + mm1->key = uresp.key; + mm1->addr = vtophys(qhp->wq.queue); + mm1->len = PAGE_ALIGN(wqsize * sizeof (union t3_wr)); + insert_mmap(ucontext, mm1); + mm2->key = uresp.db_key; + mm2->addr = qhp->wq.udb & PAGE_MASK; + mm2->len = PAGE_SIZE; + insert_mmap(ucontext, mm2); + } + qhp->ibqp.qp_num = qhp->wq.qpid; + callout_init(&(qhp->timer), TRUE); + CTR6(KTR_IW_CXGB, "sq_num_entries %d, rq_num_entries %d " + "qpid 0x%0x qhp %p dma_addr 0x%llx size %d", + qhp->attr.sq_num_entries, qhp->attr.rq_num_entries, + qhp->wq.qpid, qhp, (unsigned long long) qhp->wq.dma_addr, + 1 << qhp->wq.size_log2); + return &qhp->ibqp; +} + +static int iwch_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + struct iwch_dev *rhp; + struct iwch_qp *qhp; + enum iwch_qp_attr_mask mask = 0; + struct iwch_qp_attributes attrs; + + CTR2(KTR_IW_CXGB, "%s ib_qp %p", __FUNCTION__, ibqp); + + /* iwarp does not support the RTR state */ + if ((attr_mask & IB_QP_STATE) && (attr->qp_state == IB_QPS_RTR)) + attr_mask &= ~IB_QP_STATE; + + /* Make sure we still have something left to do */ + if (!attr_mask) + return 0; + + memset(&attrs, 0, sizeof attrs); + qhp = to_iwch_qp(ibqp); + rhp = qhp->rhp; + + attrs.next_state = iwch_convert_state(attr->qp_state); + attrs.enable_rdma_read = (attr->qp_access_flags & + IB_ACCESS_REMOTE_READ) ? 1 : 0; + attrs.enable_rdma_write = (attr->qp_access_flags & + IB_ACCESS_REMOTE_WRITE) ? 1 : 0; + attrs.enable_bind = (attr->qp_access_flags & IB_ACCESS_MW_BIND) ? 1 : 0; + + + mask |= (attr_mask & IB_QP_STATE) ? IWCH_QP_ATTR_NEXT_STATE : 0; + mask |= (attr_mask & IB_QP_ACCESS_FLAGS) ? + (IWCH_QP_ATTR_ENABLE_RDMA_READ | + IWCH_QP_ATTR_ENABLE_RDMA_WRITE | + IWCH_QP_ATTR_ENABLE_RDMA_BIND) : 0; + + return iwch_modify_qp(rhp, qhp, mask, &attrs, 0); +} + +void iwch_qp_add_ref(struct ib_qp *qp) +{ + CTR2(KTR_IW_CXGB, "%s ib_qp %p", __FUNCTION__, qp); + mtx_lock(&to_iwch_qp(qp)->lock); + to_iwch_qp(qp)->refcnt++; + mtx_unlock(&to_iwch_qp(qp)->lock); +} + +void iwch_qp_rem_ref(struct ib_qp *qp) +{ + CTR2(KTR_IW_CXGB, "%s ib_qp %p", __FUNCTION__, qp); + mtx_lock(&to_iwch_qp(qp)->lock); + if (--to_iwch_qp(qp)->refcnt == 0) + wakeup(to_iwch_qp(qp)); + mtx_unlock(&to_iwch_qp(qp)->lock); +} + +static struct ib_qp *iwch_get_qp(struct ib_device *dev, int qpn) +{ + CTR3(KTR_IW_CXGB, "%s ib_dev %p qpn 0x%x", __FUNCTION__, dev, qpn); + return (struct ib_qp *)get_qhp(to_iwch_dev(dev), qpn); +} + + +static int iwch_query_pkey(struct ib_device *ibdev, + u8 port, u16 index, u16 * pkey) +{ + CTR2(KTR_IW_CXGB, "%s ibdev %p", __FUNCTION__, ibdev); + *pkey = 0; + return 0; +} + +static int iwch_query_gid(struct ib_device *ibdev, u8 port, + int index, union ib_gid *gid) +{ + struct iwch_dev *dev; + struct port_info *pi; + + CTR5(KTR_IW_CXGB, "%s ibdev %p, port %d, index %d, gid %p", + __FUNCTION__, ibdev, port, index, gid); + dev = to_iwch_dev(ibdev); + PANIC_IF(port == 0 || port > 2); + pi = ((struct port_info *)dev->rdev.port_info.lldevs[port-1]->if_softc); + memset(&(gid->raw[0]), 0, sizeof(gid->raw)); + memcpy(&(gid->raw[0]), pi->hw_addr, 6); + return 0; +} + +static int iwch_query_device(struct ib_device *ibdev, + struct ib_device_attr *props) +{ + + struct iwch_dev *dev; + CTR2(KTR_IW_CXGB, "%s ibdev %p", __FUNCTION__, ibdev); + + dev = to_iwch_dev(ibdev); + memset(props, 0, sizeof *props); +#ifdef notyet + memcpy(&props->sys_image_guid, dev->rdev.t3cdev_p->lldev->if_addr.ifa_addr, 6); +#endif + props->device_cap_flags = dev->device_cap_flags; +#ifdef notyet + props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor; + props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device; +#endif + props->max_mr_size = ~0ull; + props->max_qp = dev->attr.max_qps; + props->max_qp_wr = dev->attr.max_wrs; + props->max_sge = dev->attr.max_sge_per_wr; + props->max_sge_rd = 1; + props->max_qp_rd_atom = dev->attr.max_rdma_reads_per_qp; + props->max_qp_init_rd_atom = dev->attr.max_rdma_reads_per_qp; + props->max_cq = dev->attr.max_cqs; + props->max_cqe = dev->attr.max_cqes_per_cq; + props->max_mr = dev->attr.max_mem_regs; + props->max_pd = dev->attr.max_pds; + props->local_ca_ack_delay = 0; + + return 0; +} + +static int iwch_query_port(struct ib_device *ibdev, + u8 port, struct ib_port_attr *props) +{ + CTR2(KTR_IW_CXGB, "%s ibdev %p", __FUNCTION__, ibdev); + props->max_mtu = IB_MTU_4096; + props->lid = 0; + props->lmc = 0; + props->sm_lid = 0; + props->sm_sl = 0; + props->state = IB_PORT_ACTIVE; + props->phys_state = 0; + props->port_cap_flags = + IB_PORT_CM_SUP | + IB_PORT_SNMP_TUNNEL_SUP | + IB_PORT_REINIT_SUP | + IB_PORT_DEVICE_MGMT_SUP | + IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; + props->gid_tbl_len = 1; + props->pkey_tbl_len = 1; + props->qkey_viol_cntr = 0; + props->active_width = 2; + props->active_speed = 2; + props->max_msg_sz = -1; + + return 0; +} + +#ifdef notyet +static ssize_t show_rev(struct class_device *cdev, char *buf) +{ + struct iwch_dev *dev = container_of(cdev, struct iwch_dev, + ibdev.class_dev); + CTR2(KTR_IW_CXGB, "%s class dev 0x%p", __FUNCTION__, cdev); + return sprintf(buf, "%d\n", dev->rdev.t3cdev_p->type); +} + +static ssize_t show_fw_ver(struct class_device *cdev, char *buf) +{ + struct iwch_dev *dev = container_of(cdev, struct iwch_dev, + ibdev.class_dev); + struct ethtool_drvinfo info; + struct net_device *lldev = dev->rdev.t3cdev_p->lldev; + + CTR2(KTR_IW_CXGB, "%s class dev 0x%p", __FUNCTION__, cdev); + lldev->ethtool_ops->get_drvinfo(lldev, &info); + return sprintf(buf, "%s\n", info.fw_version); +} + +static ssize_t show_hca(struct class_device *cdev, char *buf) +{ + struct iwch_dev *dev = container_of(cdev, struct iwch_dev, + ibdev.class_dev); + struct ethtool_drvinfo info; + struct net_device *lldev = dev->rdev.t3cdev_p->lldev; + + CTR2(KTR_IW_CXGB, "%s class dev 0x%p", __FUNCTION__, cdev); + lldev->ethtool_ops->get_drvinfo(lldev, &info); + return sprintf(buf, "%s\n", info.driver); +} + +static ssize_t show_board(struct class_device *cdev, char *buf) +{ + struct iwch_dev *dev = container_of(cdev, struct iwch_dev, + ibdev.class_dev); + CTR2(KTR_IW_CXGB, "%s class dev 0x%p", __FUNCTION__, dev); +#ifdef notyet + return sprintf(buf, "%x.%x\n", dev->rdev.rnic_info.pdev->vendor, + dev->rdev.rnic_info.pdev->device); +#else + return sprintf(buf, "%x.%x\n", 0xdead, 0xbeef); /* XXX */ +#endif +} + +static CLASS_DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); +static CLASS_DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); +static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); +static CLASS_DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); + +static struct class_device_attribute *iwch_class_attributes[] = { + &class_device_attr_hw_rev, + &class_device_attr_fw_ver, + &class_device_attr_hca_type, + &class_device_attr_board_id +}; +#endif + +int iwch_register_device(struct iwch_dev *dev) +{ + int ret; +#ifdef notyet + int i; +#endif + CTR2(KTR_IW_CXGB, "%s iwch_dev %p", __FUNCTION__, dev); + strlcpy(dev->ibdev.name, "cxgb3_%d", IB_DEVICE_NAME_MAX); + memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); +#ifdef notyet + memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); +#endif + dev->device_cap_flags = + (IB_DEVICE_ZERO_STAG | + IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW); + + dev->ibdev.uverbs_cmd_mask = + (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_DEREG_MR) | + (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | + (1ull << IB_USER_VERBS_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ull << IB_USER_VERBS_CMD_POLL_CQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | + (1ull << IB_USER_VERBS_CMD_POST_SEND) | + (1ull << IB_USER_VERBS_CMD_POST_RECV); + dev->ibdev.node_type = RDMA_NODE_RNIC; + memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC)); + dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports; + dev->ibdev.num_comp_vectors = 1; + dev->ibdev.dma_device = dev->rdev.rnic_info.pdev; + dev->ibdev.query_device = iwch_query_device; + dev->ibdev.query_port = iwch_query_port; + dev->ibdev.modify_port = iwch_modify_port; + dev->ibdev.query_pkey = iwch_query_pkey; + dev->ibdev.query_gid = iwch_query_gid; + dev->ibdev.alloc_ucontext = iwch_alloc_ucontext; + dev->ibdev.dealloc_ucontext = iwch_dealloc_ucontext; +#ifdef notyet + dev->ibdev.mmap = iwch_mmap; +#endif + dev->ibdev.alloc_pd = iwch_allocate_pd; + dev->ibdev.dealloc_pd = iwch_deallocate_pd; + dev->ibdev.create_ah = iwch_ah_create; + dev->ibdev.destroy_ah = iwch_ah_destroy; + dev->ibdev.create_qp = iwch_create_qp; + dev->ibdev.modify_qp = iwch_ib_modify_qp; + dev->ibdev.destroy_qp = iwch_destroy_qp; + dev->ibdev.create_cq = iwch_create_cq; + dev->ibdev.destroy_cq = iwch_destroy_cq; + dev->ibdev.resize_cq = iwch_resize_cq; + dev->ibdev.poll_cq = iwch_poll_cq; + dev->ibdev.get_dma_mr = iwch_get_dma_mr; + dev->ibdev.reg_phys_mr = iwch_register_phys_mem; + dev->ibdev.rereg_phys_mr = iwch_reregister_phys_mem; + dev->ibdev.reg_user_mr = iwch_reg_user_mr; + dev->ibdev.dereg_mr = iwch_dereg_mr; + dev->ibdev.alloc_mw = iwch_alloc_mw; + dev->ibdev.bind_mw = iwch_bind_mw; + dev->ibdev.dealloc_mw = iwch_dealloc_mw; + + dev->ibdev.attach_mcast = iwch_multicast_attach; + dev->ibdev.detach_mcast = iwch_multicast_detach; + dev->ibdev.process_mad = iwch_process_mad; + + dev->ibdev.req_notify_cq = iwch_arm_cq; + dev->ibdev.post_send = iwch_post_send; + dev->ibdev.post_recv = iwch_post_receive; + + + dev->ibdev.iwcm = + (struct iw_cm_verbs *) kmalloc(sizeof(struct iw_cm_verbs), + M_NOWAIT); + dev->ibdev.iwcm->connect = iwch_connect; + dev->ibdev.iwcm->accept = iwch_accept_cr; + dev->ibdev.iwcm->reject = iwch_reject_cr; + dev->ibdev.iwcm->create_listen = iwch_create_listen; + dev->ibdev.iwcm->destroy_listen = iwch_destroy_listen; + dev->ibdev.iwcm->add_ref = iwch_qp_add_ref; + dev->ibdev.iwcm->rem_ref = iwch_qp_rem_ref; + dev->ibdev.iwcm->get_qp = iwch_get_qp; + + ret = ib_register_device(&dev->ibdev); + if (ret) + goto bail1; +#ifdef notyet + for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i) { + ret = class_device_create_file(&dev->ibdev.class_dev, + iwch_class_attributes[i]); + if (ret) { + goto bail2; + } + } +#endif + return 0; +#ifdef notyet +bail2: +#endif + ib_unregister_device(&dev->ibdev); +bail1: + return ret; +} + +void iwch_unregister_device(struct iwch_dev *dev) +{ +#ifdef notyet + int i; + + CTR2(KTR_IW_CXGB, "%s iwch_dev %p", __FUNCTION__, dev); + + for (i = 0; i < ARRAY_SIZE(iwch_class_attributes); ++i) + class_device_remove_file(&dev->ibdev.class_dev, + iwch_class_attributes[i]); +#endif + ib_unregister_device(&dev->ibdev); + return; +} diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h new file mode 100644 index 0000000..c857ce8 --- /dev/null +++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h @@ -0,0 +1,362 @@ +/************************************************************************** + +Copyright (c) 2007, 2008 Chelsio Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Neither the name of the Chelsio Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +$FreeBSD$ + +***************************************************************************/ +#ifndef __IWCH_PROVIDER_H__ +#define __IWCH_PROVIDER_H__ + +#include <contrib/rdma/ib_verbs.h> + +struct iwch_pd { + struct ib_pd ibpd; + u32 pdid; + struct iwch_dev *rhp; +}; + +#ifndef container_of +#define container_of(p, stype, field) ((stype *)(((uint8_t *)(p)) - offsetof(stype, field))) +#endif +static __inline struct iwch_pd * +to_iwch_pd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct iwch_pd, ibpd); +} + +struct tpt_attributes { + u32 stag; + u32 state:1; + u32 type:2; + u32 rsvd:1; + enum tpt_mem_perm perms; + u32 remote_invaliate_disable:1; + u32 zbva:1; + u32 mw_bind_enable:1; + u32 page_size:5; + + u32 pdid; + u32 qpid; + u32 pbl_addr; + u32 len; + u64 va_fbo; + u32 pbl_size; +}; + +struct iwch_mr { + struct ib_mr ibmr; + struct ib_umem *umem; + struct iwch_dev *rhp; + u64 kva; + struct tpt_attributes attr; +}; + +typedef struct iwch_mw iwch_mw_handle; + +static __inline struct iwch_mr * +to_iwch_mr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct iwch_mr, ibmr); +} + +struct iwch_mw { + struct ib_mw ibmw; + struct iwch_dev *rhp; + u64 kva; + struct tpt_attributes attr; +}; + +static __inline struct iwch_mw * +to_iwch_mw(struct ib_mw *ibmw) +{ + return container_of(ibmw, struct iwch_mw, ibmw); +} + +struct iwch_cq { + struct ib_cq ibcq; + struct iwch_dev *rhp; + struct t3_cq cq; + struct mtx lock; + int refcnt; + u32 /* __user */ *user_rptr_addr; +}; + +static __inline struct iwch_cq * +to_iwch_cq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct iwch_cq, ibcq); +} + +enum IWCH_QP_FLAGS { + QP_QUIESCED = 0x01 +}; + +struct iwch_mpa_attributes { + u8 recv_marker_enabled; + u8 xmit_marker_enabled; /* iWARP: enable inbound Read Resp. */ + u8 crc_enabled; + u8 version; /* 0 or 1 */ +}; + +struct iwch_qp_attributes { + u32 scq; + u32 rcq; + u32 sq_num_entries; + u32 rq_num_entries; + u32 sq_max_sges; + u32 sq_max_sges_rdma_write; + u32 rq_max_sges; + u32 state; + u8 enable_rdma_read; + u8 enable_rdma_write; /* enable inbound Read Resp. */ + u8 enable_bind; + u8 enable_mmid0_fastreg; /* Enable STAG0 + Fast-register */ + /* + * Next QP state. If specify the current state, only the + * QP attributes will be modified. + */ + u32 max_ord; + u32 max_ird; + u32 pd; /* IN */ + u32 next_state; + char terminate_buffer[52]; + u32 terminate_msg_len; + u8 is_terminate_local; + struct iwch_mpa_attributes mpa_attr; /* IN-OUT */ + struct iwch_ep *llp_stream_handle; + char *stream_msg_buf; /* Last stream msg. before Idle -> RTS */ + u32 stream_msg_buf_len; /* Only on Idle -> RTS */ +}; + +struct iwch_qp { + struct ib_qp ibqp; + struct iwch_dev *rhp; + struct iwch_ep *ep; + struct iwch_qp_attributes attr; + struct t3_wq wq; + struct mtx lock; + int refcnt; + enum IWCH_QP_FLAGS flags; + struct callout timer; +}; + +static __inline int +qp_quiesced(struct iwch_qp *qhp) +{ + return qhp->flags & QP_QUIESCED; +} + +static __inline struct iwch_qp * +to_iwch_qp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct iwch_qp, ibqp); +} + +void iwch_qp_add_ref(struct ib_qp *qp); +void iwch_qp_rem_ref(struct ib_qp *qp); + +struct iwch_ucontext { + struct ib_ucontext ibucontext; + struct cxio_ucontext uctx; + u32 key; + struct mtx mmap_lock; + TAILQ_HEAD( ,iwch_mm_entry) mmaps; +}; + +static __inline struct iwch_ucontext * +to_iwch_ucontext(struct ib_ucontext *c) +{ + return container_of(c, struct iwch_ucontext, ibucontext); +} + +struct iwch_mm_entry { + TAILQ_ENTRY(iwch_mm_entry) entry; + u64 addr; + u32 key; + unsigned len; +}; + +static __inline struct iwch_mm_entry * +remove_mmap(struct iwch_ucontext *ucontext, + u32 key, unsigned len) +{ + struct iwch_mm_entry *tmp, *mm; + + mtx_lock(&ucontext->mmap_lock); + TAILQ_FOREACH_SAFE(mm, &ucontext->mmaps, entry, tmp) { + if (mm->key == key && mm->len == len) { + TAILQ_REMOVE(&ucontext->mmaps, mm, entry); + mtx_unlock(&ucontext->mmap_lock); + CTR4(KTR_IW_CXGB, "%s key 0x%x addr 0x%llx len %d\n", __FUNCTION__, + key, (unsigned long long) mm->addr, mm->len); + return mm; + } + } + mtx_unlock(&ucontext->mmap_lock); + + return NULL; +} + +static __inline void +insert_mmap(struct iwch_ucontext *ucontext, + struct iwch_mm_entry *mm) +{ + mtx_lock(&ucontext->mmap_lock); + CTR4(KTR_IW_CXGB, "%s key 0x%x addr 0x%llx len %d\n", __FUNCTION__, + mm->key, (unsigned long long) mm->addr, mm->len); + TAILQ_INSERT_TAIL(&ucontext->mmaps, mm, entry); + mtx_unlock(&ucontext->mmap_lock); +} + +enum iwch_qp_attr_mask { + IWCH_QP_ATTR_NEXT_STATE = 1 << 0, + IWCH_QP_ATTR_ENABLE_RDMA_READ = 1 << 7, + IWCH_QP_ATTR_ENABLE_RDMA_WRITE = 1 << 8, + IWCH_QP_ATTR_ENABLE_RDMA_BIND = 1 << 9, + IWCH_QP_ATTR_MAX_ORD = 1 << 11, + IWCH_QP_ATTR_MAX_IRD = 1 << 12, + IWCH_QP_ATTR_LLP_STREAM_HANDLE = 1 << 22, + IWCH_QP_ATTR_STREAM_MSG_BUFFER = 1 << 23, + IWCH_QP_ATTR_MPA_ATTR = 1 << 24, + IWCH_QP_ATTR_QP_CONTEXT_ACTIVATE = 1 << 25, + IWCH_QP_ATTR_VALID_MODIFY = (IWCH_QP_ATTR_ENABLE_RDMA_READ | + IWCH_QP_ATTR_ENABLE_RDMA_WRITE | + IWCH_QP_ATTR_MAX_ORD | + IWCH_QP_ATTR_MAX_IRD | + IWCH_QP_ATTR_LLP_STREAM_HANDLE | + IWCH_QP_ATTR_STREAM_MSG_BUFFER | + IWCH_QP_ATTR_MPA_ATTR | + IWCH_QP_ATTR_QP_CONTEXT_ACTIVATE) +}; + +int iwch_modify_qp(struct iwch_dev *rhp, + struct iwch_qp *qhp, + enum iwch_qp_attr_mask mask, + struct iwch_qp_attributes *attrs, + int internal); + +enum iwch_qp_state { + IWCH_QP_STATE_IDLE, + IWCH_QP_STATE_RTS, + IWCH_QP_STATE_ERROR, + IWCH_QP_STATE_TERMINATE, + IWCH_QP_STATE_CLOSING, + IWCH_QP_STATE_TOT +}; + +static __inline int +iwch_convert_state(enum ib_qp_state ib_state) +{ + switch (ib_state) { + case IB_QPS_RESET: + case IB_QPS_INIT: + return IWCH_QP_STATE_IDLE; + case IB_QPS_RTS: + return IWCH_QP_STATE_RTS; + case IB_QPS_SQD: + return IWCH_QP_STATE_CLOSING; + case IB_QPS_SQE: + return IWCH_QP_STATE_TERMINATE; + case IB_QPS_ERR: + return IWCH_QP_STATE_ERROR; + default: + return -1; + } +} + +static __inline u32 +iwch_ib_to_tpt_access(int acc) +{ + return (acc & IB_ACCESS_REMOTE_WRITE ? TPT_REMOTE_WRITE : 0) | + (acc & IB_ACCESS_REMOTE_READ ? TPT_REMOTE_READ : 0) | + (acc & IB_ACCESS_LOCAL_WRITE ? TPT_LOCAL_WRITE : 0) | + TPT_LOCAL_READ; +} + +static __inline u32 +iwch_ib_to_mwbind_access(int acc) +{ + return (acc & IB_ACCESS_REMOTE_WRITE ? T3_MEM_ACCESS_REM_WRITE : 0) | + (acc & IB_ACCESS_REMOTE_READ ? T3_MEM_ACCESS_REM_READ : 0) | + (acc & IB_ACCESS_LOCAL_WRITE ? T3_MEM_ACCESS_LOCAL_WRITE : 0) | + T3_MEM_ACCESS_LOCAL_READ; +} + +enum iwch_mmid_state { + IWCH_STAG_STATE_VALID, + IWCH_STAG_STATE_INVALID +}; + +enum iwch_qp_query_flags { + IWCH_QP_QUERY_CONTEXT_NONE = 0x0, /* No ctx; Only attrs */ + IWCH_QP_QUERY_CONTEXT_GET = 0x1, /* Get ctx + attrs */ + IWCH_QP_QUERY_CONTEXT_SUSPEND = 0x2, /* Not Supported */ + + /* + * Quiesce QP context; Consumer + * will NOT replay outstanding WR + */ + IWCH_QP_QUERY_CONTEXT_QUIESCE = 0x4, + IWCH_QP_QUERY_CONTEXT_REMOVE = 0x8, + IWCH_QP_QUERY_TEST_USERWRITE = 0x32 /* Test special */ +}; + +int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr); +int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); +int iwch_bind_mw(struct ib_qp *qp, + struct ib_mw *mw, + struct ib_mw_bind *mw_bind); +int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); +int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg); +int iwch_register_device(struct iwch_dev *dev); +void iwch_unregister_device(struct iwch_dev *dev); +int iwch_quiesce_qps(struct iwch_cq *chp); +int iwch_resume_qps(struct iwch_cq *chp); +void stop_read_rep_timer(struct iwch_qp *qhp); +int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php, + struct iwch_mr *mhp, + int shift, + __be64 *page_list); +int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php, + struct iwch_mr *mhp, + int shift, + __be64 *page_list, + int npages); +int build_phys_page_list(struct ib_phys_buf *buffer_list, + int num_phys_buf, + u64 *iova_start, + u64 *total_size, + int *npages, + int *shift, + __be64 **page_list); + + +#define IWCH_NODE_DESC "cxgb3 Chelsio Communications" + +#endif diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c new file mode 100644 index 0000000..3c203f1 --- /dev/null +++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_qp.c @@ -0,0 +1,1052 @@ + +/************************************************************************** + +Copyright (c) 2007, Chelsio Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Neither the name of the Chelsio Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +***************************************************************************/ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/bus.h> +#include <sys/module.h> +#include <sys/pciio.h> +#include <sys/conf.h> +#include <machine/bus.h> +#include <machine/resource.h> +#include <sys/bus_dma.h> +#include <sys/rman.h> +#include <sys/ioccom.h> +#include <sys/mbuf.h> +#include <sys/mutex.h> +#include <sys/rwlock.h> +#include <sys/linker.h> +#include <sys/firmware.h> +#include <sys/socket.h> +#include <sys/sockio.h> +#include <sys/smp.h> +#include <sys/sysctl.h> +#include <sys/syslog.h> +#include <sys/queue.h> +#include <sys/taskqueue.h> +#include <sys/proc.h> +#include <sys/queue.h> + +#include <netinet/in.h> + +#include <contrib/rdma/ib_verbs.h> +#include <contrib/rdma/ib_umem.h> +#include <contrib/rdma/ib_user_verbs.h> + + +#ifdef CONFIG_DEFINED +#include <cxgb_include.h> +#include <ulp/iw_cxgb/iw_cxgb_wr.h> +#include <ulp/iw_cxgb/iw_cxgb_hal.h> +#include <ulp/iw_cxgb/iw_cxgb_provider.h> +#include <ulp/iw_cxgb/iw_cxgb_cm.h> +#include <ulp/iw_cxgb/iw_cxgb.h> +#include <ulp/iw_cxgb/iw_cxgb_resource.h> +#include <ulp/iw_cxgb/iw_cxgb_user.h> +#else +#include <dev/cxgb/cxgb_include.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_user.h> +#endif + + +#define NO_SUPPORT -1 + +static int iwch_build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr, + u8 * flit_cnt) +{ + int i; + u32 plen; + + switch (wr->opcode) { + case IB_WR_SEND: + case IB_WR_SEND_WITH_IMM: + if (wr->send_flags & IB_SEND_SOLICITED) + wqe->send.rdmaop = T3_SEND_WITH_SE; + else + wqe->send.rdmaop = T3_SEND; + wqe->send.rem_stag = 0; + break; +#if 0 /* Not currently supported */ + case TYPE_SEND_INVALIDATE: + case TYPE_SEND_INVALIDATE_IMMEDIATE: + wqe->send.rdmaop = T3_SEND_WITH_INV; + wqe->send.rem_stag = htobe32(wr->wr.rdma.rkey); + break; + case TYPE_SEND_SE_INVALIDATE: + wqe->send.rdmaop = T3_SEND_WITH_SE_INV; + wqe->send.rem_stag = htobe32(wr->wr.rdma.rkey); + break; +#endif + default: + break; + } + if (wr->num_sge > T3_MAX_SGE) + return (-EINVAL); + wqe->send.reserved[0] = 0; + wqe->send.reserved[1] = 0; + wqe->send.reserved[2] = 0; + if (wr->opcode == IB_WR_SEND_WITH_IMM) { + plen = 4; + wqe->send.sgl[0].stag = wr->imm_data; + wqe->send.sgl[0].len = 0; + wqe->send.num_sgle = 0; + *flit_cnt = 5; + } else { + plen = 0; + for (i = 0; i < wr->num_sge; i++) { + if ((plen + wr->sg_list[i].length) < plen) { + return (-EMSGSIZE); + } + plen += wr->sg_list[i].length; + wqe->send.sgl[i].stag = + htobe32(wr->sg_list[i].lkey); + wqe->send.sgl[i].len = + htobe32(wr->sg_list[i].length); + wqe->send.sgl[i].to = htobe64(wr->sg_list[i].addr); + } + wqe->send.num_sgle = htobe32(wr->num_sge); + *flit_cnt = 4 + ((wr->num_sge) << 1); + } + wqe->send.plen = htobe32(plen); + return 0; +} + +static int iwch_build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr, + u8 *flit_cnt) +{ + int i; + u32 plen; + + if (wr->num_sge > T3_MAX_SGE) + return (-EINVAL); + wqe->write.rdmaop = T3_RDMA_WRITE; + wqe->write.reserved[0] = 0; + wqe->write.reserved[1] = 0; + wqe->write.reserved[2] = 0; + wqe->write.stag_sink = htobe32(wr->wr.rdma.rkey); + wqe->write.to_sink = htobe64(wr->wr.rdma.remote_addr); + + if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { + plen = 4; + wqe->write.sgl[0].stag = wr->imm_data; + wqe->write.sgl[0].len = 0; + wqe->write.num_sgle = 0; + *flit_cnt = 6; + } else { + plen = 0; + for (i = 0; i < wr->num_sge; i++) { + if ((plen + wr->sg_list[i].length) < plen) { + return (-EMSGSIZE); + } + plen += wr->sg_list[i].length; + wqe->write.sgl[i].stag = + htobe32(wr->sg_list[i].lkey); + wqe->write.sgl[i].len = + htobe32(wr->sg_list[i].length); + wqe->write.sgl[i].to = + htobe64(wr->sg_list[i].addr); + } + wqe->write.num_sgle = htobe32(wr->num_sge); + *flit_cnt = 5 + ((wr->num_sge) << 1); + } + wqe->write.plen = htobe32(plen); + return 0; +} + +static int iwch_build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr, + u8 *flit_cnt) +{ + if (wr->num_sge > 1) + return (-EINVAL); + wqe->read.rdmaop = T3_READ_REQ; + wqe->read.reserved[0] = 0; + wqe->read.reserved[1] = 0; + wqe->read.reserved[2] = 0; + wqe->read.rem_stag = htobe32(wr->wr.rdma.rkey); + wqe->read.rem_to = htobe64(wr->wr.rdma.remote_addr); + wqe->read.local_stag = htobe32(wr->sg_list[0].lkey); + wqe->read.local_len = htobe32(wr->sg_list[0].length); + wqe->read.local_to = htobe64(wr->sg_list[0].addr); + *flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3; + return 0; +} + +/* + * TBD: this is going to be moved to firmware. Missing pdid/qpid check for now. + */ +static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list, + u32 num_sgle, u32 * pbl_addr, u8 * page_size) +{ + int i; + struct iwch_mr *mhp; + u32 offset; + for (i = 0; i < num_sgle; i++) { + + mhp = get_mhp(rhp, (sg_list[i].lkey) >> 8); + if (!mhp) { + CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__); + return (-EIO); + } + if (!mhp->attr.state) { + CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__); + return (-EIO); + } + if (mhp->attr.zbva) { + CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__); + return (-EIO); + } + + if (sg_list[i].addr < mhp->attr.va_fbo) { + CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__); + return (-EINVAL); + } + if (sg_list[i].addr + ((u64) sg_list[i].length) < + sg_list[i].addr) { + CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__); + return (-EINVAL); + } + if (sg_list[i].addr + ((u64) sg_list[i].length) > + mhp->attr.va_fbo + ((u64) mhp->attr.len)) { + CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__); + return (-EINVAL); + } + offset = sg_list[i].addr - mhp->attr.va_fbo; + offset += ((u32) mhp->attr.va_fbo) % + (1UL << (12 + mhp->attr.page_size)); + pbl_addr[i] = ((mhp->attr.pbl_addr - + rhp->rdev.rnic_info.pbl_base) >> 3) + + (offset >> (12 + mhp->attr.page_size)); + page_size[i] = mhp->attr.page_size; + } + return 0; +} + +static int iwch_build_rdma_recv(struct iwch_dev *rhp, union t3_wr *wqe, + struct ib_recv_wr *wr) +{ + int i; + if (wr->num_sge > T3_MAX_SGE) + return (-EINVAL); + wqe->recv.num_sgle = htobe32(wr->num_sge); + for (i = 0; i < wr->num_sge; i++) { + wqe->recv.sgl[i].stag = htobe32(wr->sg_list[i].lkey); + wqe->recv.sgl[i].len = htobe32(wr->sg_list[i].length); + wqe->recv.sgl[i].to = htobe64(wr->sg_list[i].addr); + } + for (; i < T3_MAX_SGE; i++) { + wqe->recv.sgl[i].stag = 0; + wqe->recv.sgl[i].len = 0; + wqe->recv.sgl[i].to = 0; + } + return 0; +} + +int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + int err = 0; + u8 t3_wr_flit_cnt = 0; + enum t3_wr_opcode t3_wr_opcode = 0; + enum t3_wr_flags t3_wr_flags; + struct iwch_qp *qhp; + u32 idx; + union t3_wr *wqe; + u32 num_wrs; + struct t3_swsq *sqp; + + qhp = to_iwch_qp(ibqp); + mtx_lock(&qhp->lock); + if (qhp->attr.state > IWCH_QP_STATE_RTS) { + mtx_unlock(&qhp->lock); + return (-EINVAL); + } + num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr, + qhp->wq.sq_size_log2); + if (num_wrs <= 0) { + mtx_unlock(&qhp->lock); + return (-ENOMEM); + } + while (wr) { + if (num_wrs == 0) { + err = -ENOMEM; + *bad_wr = wr; + break; + } + idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); + wqe = (union t3_wr *) (qhp->wq.queue + idx); + t3_wr_flags = 0; + if (wr->send_flags & IB_SEND_SOLICITED) + t3_wr_flags |= T3_SOLICITED_EVENT_FLAG; + if (wr->send_flags & IB_SEND_FENCE) + t3_wr_flags |= T3_READ_FENCE_FLAG; + if (wr->send_flags & IB_SEND_SIGNALED) + t3_wr_flags |= T3_COMPLETION_FLAG; + sqp = qhp->wq.sq + + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2); + switch (wr->opcode) { + case IB_WR_SEND: + case IB_WR_SEND_WITH_IMM: + t3_wr_opcode = T3_WR_SEND; + err = iwch_build_rdma_send(wqe, wr, &t3_wr_flit_cnt); + break; + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + t3_wr_opcode = T3_WR_WRITE; + err = iwch_build_rdma_write(wqe, wr, &t3_wr_flit_cnt); + break; + case IB_WR_RDMA_READ: + t3_wr_opcode = T3_WR_READ; + t3_wr_flags = 0; /* T3 reads are always signaled */ + err = iwch_build_rdma_read(wqe, wr, &t3_wr_flit_cnt); + if (err) + break; + sqp->read_len = wqe->read.local_len; + if (!qhp->wq.oldest_read) + qhp->wq.oldest_read = sqp; + break; + default: + CTR2(KTR_IW_CXGB, "%s post of type=%d TBD!", __FUNCTION__, + wr->opcode); + err = -EINVAL; + } + if (err) { + *bad_wr = wr; + break; + } + wqe->send.wrid.id0.hi = qhp->wq.sq_wptr; + sqp->wr_id = wr->wr_id; + sqp->opcode = wr2opcode(t3_wr_opcode); + sqp->sq_wptr = qhp->wq.sq_wptr; + sqp->complete = 0; + sqp->signaled = (wr->send_flags & IB_SEND_SIGNALED); + + build_fw_riwrh((void *) wqe, t3_wr_opcode, t3_wr_flags, + Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), + 0, t3_wr_flit_cnt); + CTR5(KTR_IW_CXGB, "%s cookie 0x%llx wq idx 0x%x swsq idx %ld opcode %d", + __FUNCTION__, (unsigned long long) wr->wr_id, idx, + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2), + sqp->opcode); + wr = wr->next; + num_wrs--; + ++(qhp->wq.wptr); + ++(qhp->wq.sq_wptr); + } + mtx_unlock(&qhp->lock); + ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); + return err; +} + +int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + int err = 0; + struct iwch_qp *qhp; + u32 idx; + union t3_wr *wqe; + u32 num_wrs; + + qhp = to_iwch_qp(ibqp); + mtx_lock(&qhp->lock); + if (qhp->attr.state > IWCH_QP_STATE_RTS) { + mtx_unlock(&qhp->lock); + return (-EINVAL); + } + num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr, + qhp->wq.rq_size_log2) - 1; + if (!wr) { + mtx_unlock(&qhp->lock); + return (-EINVAL); + } + while (wr) { + idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); + wqe = (union t3_wr *) (qhp->wq.queue + idx); + if (num_wrs) + err = iwch_build_rdma_recv(qhp->rhp, wqe, wr); + else + err = -ENOMEM; + if (err) { + *bad_wr = wr; + break; + } + qhp->wq.rq[Q_PTR2IDX(qhp->wq.rq_wptr, qhp->wq.rq_size_log2)] = + wr->wr_id; + build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG, + Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), + 0, sizeof(struct t3_receive_wr) >> 3); + CTR6(KTR_IW_CXGB, "%s cookie 0x%llx idx 0x%x rq_wptr 0x%x rw_rptr 0x%x " + "wqe %p ", __FUNCTION__, (unsigned long long) wr->wr_id, + idx, qhp->wq.rq_wptr, qhp->wq.rq_rptr, wqe); + ++(qhp->wq.rq_wptr); + ++(qhp->wq.wptr); + wr = wr->next; + num_wrs--; + } + mtx_unlock(&qhp->lock); + ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); + return err; +} + +int iwch_bind_mw(struct ib_qp *qp, + struct ib_mw *mw, + struct ib_mw_bind *mw_bind) +{ + struct iwch_dev *rhp; + struct iwch_mw *mhp; + struct iwch_qp *qhp; + union t3_wr *wqe; + u32 pbl_addr; + u8 page_size; + u32 num_wrs; + struct ib_sge sgl; + int err=0; + enum t3_wr_flags t3_wr_flags; + u32 idx; + struct t3_swsq *sqp; + + qhp = to_iwch_qp(qp); + mhp = to_iwch_mw(mw); + rhp = qhp->rhp; + + mtx_lock(&qhp->lock); + if (qhp->attr.state > IWCH_QP_STATE_RTS) { + mtx_unlock(&qhp->lock); + return (-EINVAL); + } + num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr, + qhp->wq.sq_size_log2); + if ((num_wrs) <= 0) { + mtx_unlock(&qhp->lock); + return (-ENOMEM); + } + idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2); + CTR4(KTR_IW_CXGB, "%s: idx 0x%0x, mw 0x%p, mw_bind 0x%p", __FUNCTION__, idx, + mw, mw_bind); + wqe = (union t3_wr *) (qhp->wq.queue + idx); + + t3_wr_flags = 0; + if (mw_bind->send_flags & IB_SEND_SIGNALED) + t3_wr_flags = T3_COMPLETION_FLAG; + + sgl.addr = mw_bind->addr; + sgl.lkey = mw_bind->mr->lkey; + sgl.length = mw_bind->length; + wqe->bind.reserved = 0; + wqe->bind.type = T3_VA_BASED_TO; + + /* TBD: check perms */ + wqe->bind.perms = iwch_ib_to_mwbind_access(mw_bind->mw_access_flags); + wqe->bind.mr_stag = htobe32(mw_bind->mr->lkey); + wqe->bind.mw_stag = htobe32(mw->rkey); + wqe->bind.mw_len = htobe32(mw_bind->length); + wqe->bind.mw_va = htobe64(mw_bind->addr); + err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size); + if (err) { + mtx_unlock(&qhp->lock); + return (err); + } + wqe->send.wrid.id0.hi = qhp->wq.sq_wptr; + sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2); + sqp->wr_id = mw_bind->wr_id; + sqp->opcode = T3_BIND_MW; + sqp->sq_wptr = qhp->wq.sq_wptr; + sqp->complete = 0; + sqp->signaled = (mw_bind->send_flags & IB_SEND_SIGNALED); + wqe->bind.mr_pbl_addr = htobe32(pbl_addr); + wqe->bind.mr_pagesz = page_size; + wqe->flit[T3_SQ_COOKIE_FLIT] = mw_bind->wr_id; + build_fw_riwrh((void *)wqe, T3_WR_BIND, t3_wr_flags, + Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0, + sizeof(struct t3_bind_mw_wr) >> 3); + ++(qhp->wq.wptr); + ++(qhp->wq.sq_wptr); + mtx_unlock(&qhp->lock); + + ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid); + + return err; +} + +static inline void build_term_codes(struct respQ_msg_t *rsp_msg, + u8 *layer_type, u8 *ecode) +{ + int status = TPT_ERR_INTERNAL_ERR; + int tagged = 0; + int opcode = -1; + int rqtype = 0; + int send_inv = 0; + + if (rsp_msg) { + status = CQE_STATUS(rsp_msg->cqe); + opcode = CQE_OPCODE(rsp_msg->cqe); + rqtype = RQ_TYPE(rsp_msg->cqe); + send_inv = (opcode == T3_SEND_WITH_INV) || + (opcode == T3_SEND_WITH_SE_INV); + tagged = (opcode == T3_RDMA_WRITE) || + (rqtype && (opcode == T3_READ_RESP)); + } + + switch (status) { + case TPT_ERR_STAG: + if (send_inv) { + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; + *ecode = RDMAP_CANT_INV_STAG; + } else { + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + *ecode = RDMAP_INV_STAG; + } + break; + case TPT_ERR_PDID: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + if ((opcode == T3_SEND_WITH_INV) || + (opcode == T3_SEND_WITH_SE_INV)) + *ecode = RDMAP_CANT_INV_STAG; + else + *ecode = RDMAP_STAG_NOT_ASSOC; + break; + case TPT_ERR_QPID: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + *ecode = RDMAP_STAG_NOT_ASSOC; + break; + case TPT_ERR_ACCESS: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + *ecode = RDMAP_ACC_VIOL; + break; + case TPT_ERR_WRAP: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + *ecode = RDMAP_TO_WRAP; + break; + case TPT_ERR_BOUND: + if (tagged) { + *layer_type = LAYER_DDP|DDP_TAGGED_ERR; + *ecode = DDPT_BASE_BOUNDS; + } else { + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_PROT; + *ecode = RDMAP_BASE_BOUNDS; + } + break; + case TPT_ERR_INVALIDATE_SHARED_MR: + case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; + *ecode = RDMAP_CANT_INV_STAG; + break; + case TPT_ERR_ECC: + case TPT_ERR_ECC_PSTAG: + case TPT_ERR_INTERNAL_ERR: + *layer_type = LAYER_RDMAP|RDMAP_LOCAL_CATA; + *ecode = 0; + break; + case TPT_ERR_OUT_OF_RQE: + *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; + *ecode = DDPU_INV_MSN_NOBUF; + break; + case TPT_ERR_PBL_ADDR_BOUND: + *layer_type = LAYER_DDP|DDP_TAGGED_ERR; + *ecode = DDPT_BASE_BOUNDS; + break; + case TPT_ERR_CRC: + *layer_type = LAYER_MPA|DDP_LLP; + *ecode = MPA_CRC_ERR; + break; + case TPT_ERR_MARKER: + *layer_type = LAYER_MPA|DDP_LLP; + *ecode = MPA_MARKER_ERR; + break; + case TPT_ERR_PDU_LEN_ERR: + *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; + *ecode = DDPU_MSG_TOOBIG; + break; + case TPT_ERR_DDP_VERSION: + if (tagged) { + *layer_type = LAYER_DDP|DDP_TAGGED_ERR; + *ecode = DDPT_INV_VERS; + } else { + *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; + *ecode = DDPU_INV_VERS; + } + break; + case TPT_ERR_RDMA_VERSION: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; + *ecode = RDMAP_INV_VERS; + break; + case TPT_ERR_OPCODE: + *layer_type = LAYER_RDMAP|RDMAP_REMOTE_OP; + *ecode = RDMAP_INV_OPCODE; + break; + case TPT_ERR_DDP_QUEUE_NUM: + *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; + *ecode = DDPU_INV_QN; + break; + case TPT_ERR_MSN: + case TPT_ERR_MSN_GAP: + case TPT_ERR_MSN_RANGE: + case TPT_ERR_IRD_OVERFLOW: + *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; + *ecode = DDPU_INV_MSN_RANGE; + break; + case TPT_ERR_TBIT: + *layer_type = LAYER_DDP|DDP_LOCAL_CATA; + *ecode = 0; + break; + case TPT_ERR_MO: + *layer_type = LAYER_DDP|DDP_UNTAGGED_ERR; + *ecode = DDPU_INV_MO; + break; + default: + *layer_type = LAYER_RDMAP|DDP_LOCAL_CATA; + *ecode = 0; + break; + } +} + +/* + * This posts a TERMINATE with layer=RDMA, type=catastrophic. + */ +int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg) +{ + union t3_wr *wqe; + struct terminate_message *term; + struct mbuf *m; + + CTR2(KTR_IW_CXGB, "%s %d", __FUNCTION__, __LINE__); + m = m_gethdr(MT_DATA, M_NOWAIT); + if (!m) { + log(LOG_ERR, "%s cannot send TERMINATE!\n", __FUNCTION__); + return (-ENOMEM); + } + wqe = mtod(m, union t3_wr *); + m->m_len = m->m_pkthdr.len = 40; + memset(wqe, 0, 40); + wqe->send.rdmaop = T3_TERMINATE; + + /* immediate data length */ + wqe->send.plen = htonl(4); + + /* immediate data starts here. */ + term = (struct terminate_message *)wqe->send.sgl; + build_term_codes(rsp_msg, &term->layer_etype, &term->ecode); + wqe->send.wrh.op_seop_flags = htobe32(V_FW_RIWR_OP(T3_WR_SEND) | + V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG)); + wqe->send.wrh.gen_tid_len = htobe32(V_FW_RIWR_TID(qhp->ep->hwtid)); + + m_set_priority(m, CPL_PRIORITY_DATA); + m_set_sgl(m, NULL); + m_set_sgllen(m, 0); + return cxgb_ofld_send(qhp->rhp->rdev.t3cdev_p, m); +} + +/* + * Assumes qhp lock is held. + */ +static void __flush_qp(struct iwch_qp *qhp) +{ + struct iwch_cq *rchp, *schp; + int count; + + rchp = get_chp(qhp->rhp, qhp->attr.rcq); + schp = get_chp(qhp->rhp, qhp->attr.scq); + + CTR4(KTR_IW_CXGB, "%s qhp %p rchp %p schp %p", __FUNCTION__, qhp, rchp, schp); + /* take a ref on the qhp since we must release the lock */ + qhp->refcnt++; + mtx_unlock(&qhp->lock); + + /* locking heirarchy: cq lock first, then qp lock. */ + mtx_lock(&rchp->lock); + mtx_lock(&qhp->lock); + cxio_flush_hw_cq(&rchp->cq); + cxio_count_rcqes(&rchp->cq, &qhp->wq, &count); + cxio_flush_rq(&qhp->wq, &rchp->cq, count); + mtx_unlock(&qhp->lock); + mtx_unlock(&rchp->lock); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); + + /* locking heirarchy: cq lock first, then qp lock. */ + mtx_lock(&schp->lock); + mtx_lock(&qhp->lock); + cxio_flush_hw_cq(&schp->cq); + cxio_count_scqes(&schp->cq, &qhp->wq, &count); + cxio_flush_sq(&qhp->wq, &schp->cq, count); + mtx_unlock(&qhp->lock); + mtx_unlock(&schp->lock); + (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); + + /* deref */ + mtx_lock(&qhp->lock); + if (--qhp->refcnt == 0) + wakeup(qhp); +} + +static void flush_qp(struct iwch_qp *qhp) +{ + if (qhp->ibqp.uobject) + cxio_set_wq_in_error(&qhp->wq); + else + __flush_qp(qhp); +} + + +/* + * Return non zero if at least one RECV was pre-posted. + */ +static int rqes_posted(struct iwch_qp *qhp) +{ + return fw_riwrh_opcode((struct fw_riwrh *)qhp->wq.queue) == T3_WR_RCV; +} + +static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp, + enum iwch_qp_attr_mask mask, + struct iwch_qp_attributes *attrs) +{ + struct t3_rdma_init_attr init_attr; + int ret; + + init_attr.tid = qhp->ep->hwtid; + init_attr.qpid = qhp->wq.qpid; + init_attr.pdid = qhp->attr.pd; + init_attr.scqid = qhp->attr.scq; + init_attr.rcqid = qhp->attr.rcq; + init_attr.rq_addr = qhp->wq.rq_addr; + init_attr.rq_size = 1 << qhp->wq.rq_size_log2; + init_attr.mpaattrs = uP_RI_MPA_IETF_ENABLE | + qhp->attr.mpa_attr.recv_marker_enabled | + (qhp->attr.mpa_attr.xmit_marker_enabled << 1) | + (qhp->attr.mpa_attr.crc_enabled << 2); + + /* + * XXX - The IWCM doesn't quite handle getting these + * attrs set before going into RTS. For now, just turn + * them on always... + */ +#if 0 + init_attr.qpcaps = qhp->attr.enableRdmaRead | + (qhp->attr.enableRdmaWrite << 1) | + (qhp->attr.enableBind << 2) | + (qhp->attr.enable_stag0_fastreg << 3) | + (qhp->attr.enable_stag0_fastreg << 4); +#else + init_attr.qpcaps = 0x1f; +#endif + init_attr.tcp_emss = qhp->ep->emss; + init_attr.ord = qhp->attr.max_ord; + init_attr.ird = qhp->attr.max_ird; + init_attr.qp_dma_addr = qhp->wq.dma_addr; + init_attr.qp_dma_size = (1UL << qhp->wq.size_log2); + init_attr.flags = rqes_posted(qhp) ? RECVS_POSTED : 0; + init_attr.irs = qhp->ep->rcv_seq; + CTR5(KTR_IW_CXGB, "%s init_attr.rq_addr 0x%x init_attr.rq_size = %d " + "flags 0x%x qpcaps 0x%x", __FUNCTION__, + init_attr.rq_addr, init_attr.rq_size, + init_attr.flags, init_attr.qpcaps); + ret = cxio_rdma_init(&rhp->rdev, &init_attr); + CTR2(KTR_IW_CXGB, "%s ret %d", __FUNCTION__, ret); + return ret; +} + +int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, + enum iwch_qp_attr_mask mask, + struct iwch_qp_attributes *attrs, + int internal) +{ + int ret = 0; + struct iwch_qp_attributes newattr = qhp->attr; + int disconnect = 0; + int terminate = 0; + int abort = 0; + int free = 0; + struct iwch_ep *ep = NULL; + + CTR6(KTR_IW_CXGB, "%s qhp %p qpid 0x%x ep %p state %d -> %d", __FUNCTION__, + qhp, qhp->wq.qpid, qhp->ep, qhp->attr.state, + (mask & IWCH_QP_ATTR_NEXT_STATE) ? attrs->next_state : -1); + + mtx_lock(&qhp->lock); + + /* Process attr changes if in IDLE */ + if (mask & IWCH_QP_ATTR_VALID_MODIFY) { + if (qhp->attr.state != IWCH_QP_STATE_IDLE) { + ret = -EIO; + goto out; + } + if (mask & IWCH_QP_ATTR_ENABLE_RDMA_READ) + newattr.enable_rdma_read = attrs->enable_rdma_read; + if (mask & IWCH_QP_ATTR_ENABLE_RDMA_WRITE) + newattr.enable_rdma_write = attrs->enable_rdma_write; + if (mask & IWCH_QP_ATTR_ENABLE_RDMA_BIND) + newattr.enable_bind = attrs->enable_bind; + if (mask & IWCH_QP_ATTR_MAX_ORD) { + if (attrs->max_ord > + rhp->attr.max_rdma_read_qp_depth) { + ret = -EINVAL; + goto out; + } + newattr.max_ord = attrs->max_ord; + } + if (mask & IWCH_QP_ATTR_MAX_IRD) { + if (attrs->max_ird > + rhp->attr.max_rdma_reads_per_qp) { + ret = -EINVAL; + goto out; + } + newattr.max_ird = attrs->max_ird; + } + qhp->attr = newattr; + } + + if (!(mask & IWCH_QP_ATTR_NEXT_STATE)) + goto out; + if (qhp->attr.state == attrs->next_state) + goto out; + + switch (qhp->attr.state) { + case IWCH_QP_STATE_IDLE: + switch (attrs->next_state) { + case IWCH_QP_STATE_RTS: + if (!(mask & IWCH_QP_ATTR_LLP_STREAM_HANDLE)) { + ret = -EINVAL; + goto out; + } + if (!(mask & IWCH_QP_ATTR_MPA_ATTR)) { + ret = -EINVAL; + goto out; + } + qhp->attr.mpa_attr = attrs->mpa_attr; + qhp->attr.llp_stream_handle = attrs->llp_stream_handle; + qhp->ep = qhp->attr.llp_stream_handle; + qhp->attr.state = IWCH_QP_STATE_RTS; + + /* + * Ref the endpoint here and deref when we + * disassociate the endpoint from the QP. This + * happens in CLOSING->IDLE transition or *->ERROR + * transition. + */ + get_ep(&qhp->ep->com); + mtx_unlock(&qhp->lock); + ret = rdma_init(rhp, qhp, mask, attrs); + mtx_lock(&qhp->lock); + if (ret) + goto err; + break; + case IWCH_QP_STATE_ERROR: + qhp->attr.state = IWCH_QP_STATE_ERROR; + flush_qp(qhp); + break; + default: + ret = -EINVAL; + goto out; + } + break; + case IWCH_QP_STATE_RTS: + switch (attrs->next_state) { + case IWCH_QP_STATE_CLOSING: + PANIC_IF(atomic_load_acq_int(&qhp->ep->com.refcount) < 2); + qhp->attr.state = IWCH_QP_STATE_CLOSING; + if (!internal) { + abort=0; + disconnect = 1; + ep = qhp->ep; + } + flush_qp(qhp); + break; + case IWCH_QP_STATE_TERMINATE: + qhp->attr.state = IWCH_QP_STATE_TERMINATE; + if (qhp->ibqp.uobject) + cxio_set_wq_in_error(&qhp->wq); + if (!internal) + terminate = 1; + break; + case IWCH_QP_STATE_ERROR: + qhp->attr.state = IWCH_QP_STATE_ERROR; + if (!internal) { + abort=1; + disconnect = 1; + ep = qhp->ep; + } + goto err; + break; + default: + ret = -EINVAL; + goto out; + } + break; + case IWCH_QP_STATE_CLOSING: + if (!internal) { + ret = -EINVAL; + goto out; + } + switch (attrs->next_state) { + case IWCH_QP_STATE_IDLE: + qhp->attr.state = IWCH_QP_STATE_IDLE; + qhp->attr.llp_stream_handle = NULL; + put_ep(&qhp->ep->com); + qhp->ep = NULL; + wakeup(qhp); + break; + case IWCH_QP_STATE_ERROR: + disconnect=1; + goto err; + default: + ret = -EINVAL; + goto err; + } + break; + case IWCH_QP_STATE_ERROR: + if (attrs->next_state != IWCH_QP_STATE_IDLE) { + ret = -EINVAL; + goto out; + } + + if (!Q_EMPTY(qhp->wq.sq_rptr, qhp->wq.sq_wptr) || + !Q_EMPTY(qhp->wq.rq_rptr, qhp->wq.rq_wptr)) { + ret = -EINVAL; + goto out; + } + qhp->attr.state = IWCH_QP_STATE_IDLE; + memset(&qhp->attr, 0, sizeof(qhp->attr)); + break; + case IWCH_QP_STATE_TERMINATE: + if (!internal) { + ret = -EINVAL; + goto out; + } + goto err; + break; + default: + log(LOG_ERR, "%s in a bad state %d\n", + __FUNCTION__, qhp->attr.state); + ret = -EINVAL; + goto err; + break; + } + goto out; +err: + CTR3(KTR_IW_CXGB, "%s disassociating ep %p qpid 0x%x", __FUNCTION__, qhp->ep, + qhp->wq.qpid); + + /* disassociate the LLP connection */ + qhp->attr.llp_stream_handle = NULL; + ep = qhp->ep; + qhp->ep = NULL; + qhp->attr.state = IWCH_QP_STATE_ERROR; + free=1; + wakeup(qhp); + PANIC_IF(!ep); + flush_qp(qhp); +out: + mtx_unlock(&qhp->lock); + + if (terminate) + iwch_post_terminate(qhp, NULL); + + /* + * If disconnect is 1, then we need to initiate a disconnect + * on the EP. This can be a normal close (RTS->CLOSING) or + * an abnormal close (RTS/CLOSING->ERROR). + */ + if (disconnect) + iwch_ep_disconnect(ep, abort, M_NOWAIT); + + /* + * If free is 1, then we've disassociated the EP from the QP + * and we need to dereference the EP. + */ + if (free) + put_ep(&ep->com); + + CTR2(KTR_IW_CXGB, "%s exit state %d", __FUNCTION__, qhp->attr.state); + return ret; +} + +static int quiesce_qp(struct iwch_qp *qhp) +{ + mtx_lock(&qhp->lock); + iwch_quiesce_tid(qhp->ep); + qhp->flags |= QP_QUIESCED; + mtx_unlock(&qhp->lock); + return 0; +} + +static int resume_qp(struct iwch_qp *qhp) +{ + mtx_lock(&qhp->lock); + iwch_resume_tid(qhp->ep); + qhp->flags &= ~QP_QUIESCED; + mtx_lock(&qhp->lock); + return 0; +} + +int iwch_quiesce_qps(struct iwch_cq *chp) +{ + int i; + struct iwch_qp *qhp; + + for (i=0; i < T3_MAX_NUM_QP; i++) { + qhp = get_qhp(chp->rhp, i); + if (!qhp) + continue; + if ((qhp->attr.rcq == chp->cq.cqid) && !qp_quiesced(qhp)) { + quiesce_qp(qhp); + continue; + } + if ((qhp->attr.scq == chp->cq.cqid) && !qp_quiesced(qhp)) + quiesce_qp(qhp); + } + return 0; +} + +int iwch_resume_qps(struct iwch_cq *chp) +{ + int i; + struct iwch_qp *qhp; + + for (i=0; i < T3_MAX_NUM_QP; i++) { + qhp = get_qhp(chp->rhp, i); + if (!qhp) + continue; + if ((qhp->attr.rcq == chp->cq.cqid) && qp_quiesced(qhp)) { + resume_qp(qhp); + continue; + } + if ((qhp->attr.scq == chp->cq.cqid) && qp_quiesced(qhp)) + resume_qp(qhp); + } + return 0; +} + diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.c b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.c new file mode 100644 index 0000000..9d3618f --- /dev/null +++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.c @@ -0,0 +1,382 @@ +/************************************************************************** + +Copyright (c) 2007, Chelsio Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Neither the name of the Chelsio Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +***************************************************************************/ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/bus.h> +#include <sys/module.h> +#include <sys/pciio.h> +#include <sys/conf.h> +#include <machine/bus.h> +#include <machine/resource.h> +#include <sys/bus_dma.h> +#include <sys/rman.h> +#include <sys/ioccom.h> +#include <sys/mbuf.h> +#include <sys/mutex.h> +#include <sys/rwlock.h> +#include <sys/linker.h> +#include <sys/firmware.h> +#include <sys/socket.h> +#include <sys/sockio.h> +#include <sys/smp.h> +#include <sys/sysctl.h> +#include <sys/syslog.h> +#include <sys/queue.h> +#include <sys/taskqueue.h> +#include <sys/proc.h> +#include <sys/queue.h> +#include <sys/libkern.h> + +#include <netinet/in.h> + +#include <contrib/rdma/ib_verbs.h> +#include <contrib/rdma/ib_umem.h> +#include <contrib/rdma/ib_user_verbs.h> + + +#ifdef CONFIG_DEFINED +#include <cxgb_include.h> +#include <ulp/iw_cxgb/iw_cxgb_wr.h> +#include <ulp/iw_cxgb/iw_cxgb_hal.h> +#include <ulp/iw_cxgb/iw_cxgb_provider.h> +#include <ulp/iw_cxgb/iw_cxgb_cm.h> +#include <ulp/iw_cxgb/iw_cxgb.h> +#include <ulp/iw_cxgb/iw_cxgb_resource.h> +#include <ulp/iw_cxgb/iw_cxgb_user.h> +#else +#include <dev/cxgb/cxgb_include.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_hal.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_provider.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_cm.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.h> +#include <dev/cxgb/ulp/iw_cxgb/iw_cxgb_user.h> +#endif + +#ifdef needed +static struct buf_ring *rhdl_fifo; +static struct mtx rhdl_fifo_lock; +#endif + +#define RANDOM_SIZE 16 + +static int __cxio_init_resource_fifo(struct buf_ring **fifo, + struct mtx *fifo_lock, + u32 nr, u32 skip_low, + u32 skip_high, + int randomize) +{ + u32 i, j, idx; + u32 random_bytes; + u32 rarray[16]; + mtx_init(fifo_lock, "cxio fifo", NULL, MTX_DEF|MTX_DUPOK); + + *fifo = buf_ring_alloc(nr, M_NOWAIT); + if (*fifo == NULL) + return (-ENOMEM); +#if 0 + for (i = 0; i < skip_low + skip_high; i++) { + u32 entry = 0; + + buf_ring_enqueue(*fifo, (uintptr_t) entry); + } +#endif + if (randomize) { + j = 0; + random_bytes = random(); + for (i = 0; i < RANDOM_SIZE; i++) + rarray[i] = i + skip_low; + for (i = skip_low + RANDOM_SIZE; i < nr - skip_high; i++) { + if (j >= RANDOM_SIZE) { + j = 0; + random_bytes = random(); + } + idx = (random_bytes >> (j * 2)) & 0xF; + buf_ring_enqueue(*fifo, (void *)(uintptr_t)rarray[idx]); + rarray[idx] = i; + j++; + } + for (i = 0; i < RANDOM_SIZE; i++) + buf_ring_enqueue(*fifo, (void *) (uintptr_t)rarray[i]); + } else + for (i = skip_low; i < nr - skip_high; i++) + buf_ring_enqueue(*fifo, (void *) (uintptr_t)i); +#if 0 + for (i = 0; i < skip_low + skip_high; i++) + buf_ring_dequeue(*fifo); +#endif + return 0; +} + +static int cxio_init_resource_fifo(struct buf_ring **fifo, struct mtx * fifo_lock, + u32 nr, u32 skip_low, u32 skip_high) +{ + return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low, + skip_high, 0)); +} + +static int cxio_init_resource_fifo_random(struct buf_ring **fifo, + struct mtx * fifo_lock, + u32 nr, u32 skip_low, u32 skip_high) +{ + + return (__cxio_init_resource_fifo(fifo, fifo_lock, nr, skip_low, + skip_high, 1)); +} + +static int cxio_init_qpid_fifo(struct cxio_rdev *rdev_p) +{ + u32 i; + + mtx_init(&rdev_p->rscp->qpid_fifo_lock, "qpid fifo", NULL, MTX_DEF); + + rdev_p->rscp->qpid_fifo = buf_ring_alloc(T3_MAX_NUM_QP, M_NOWAIT); + if (rdev_p->rscp->qpid_fifo == NULL) + return (-ENOMEM); + + for (i = 16; i < T3_MAX_NUM_QP; i++) + if (!(i & rdev_p->qpmask)) + buf_ring_enqueue(rdev_p->rscp->qpid_fifo, (void *) (uintptr_t)i); + return 0; +} + +#ifdef needed +int cxio_hal_init_rhdl_resource(u32 nr_rhdl) +{ + return cxio_init_resource_fifo(&rhdl_fifo, &rhdl_fifo_lock, nr_rhdl, 1, + 0); +} + +void cxio_hal_destroy_rhdl_resource(void) +{ + buf_ring_free(rhdl_fifo); +} +#endif + +/* nr_* must be power of 2 */ +int cxio_hal_init_resource(struct cxio_rdev *rdev_p, + u32 nr_tpt, u32 nr_pbl, + u32 nr_rqt, u32 nr_qpid, u32 nr_cqid, u32 nr_pdid) +{ + int err = 0; + struct cxio_hal_resource *rscp; + + rscp = malloc(sizeof(*rscp), M_DEVBUF, M_NOWAIT|M_ZERO); + if (!rscp) + return (-ENOMEM); + rdev_p->rscp = rscp; + err = cxio_init_resource_fifo_random(&rscp->tpt_fifo, + &rscp->tpt_fifo_lock, + nr_tpt, 1, 0); + if (err) + goto tpt_err; + err = cxio_init_qpid_fifo(rdev_p); + if (err) + goto qpid_err; + err = cxio_init_resource_fifo(&rscp->cqid_fifo, &rscp->cqid_fifo_lock, + nr_cqid, 1, 0); + if (err) + goto cqid_err; + err = cxio_init_resource_fifo(&rscp->pdid_fifo, &rscp->pdid_fifo_lock, + nr_pdid, 1, 0); + if (err) + goto pdid_err; + return 0; +pdid_err: + buf_ring_free(rscp->cqid_fifo); +cqid_err: + buf_ring_free(rscp->qpid_fifo); +qpid_err: + buf_ring_free(rscp->tpt_fifo); +tpt_err: + return (-ENOMEM); +} + +/* + * returns 0 if no resource available + */ +static u32 cxio_hal_get_resource(struct buf_ring *fifo, struct mtx *lock) +{ + u32 entry; + + mtx_lock(lock); + entry = (u32)(uintptr_t)buf_ring_dequeue(fifo); + mtx_unlock(lock); + return entry; +} + +static void cxio_hal_put_resource(struct buf_ring *fifo, u32 entry, struct mtx *lock) +{ + mtx_lock(lock); + buf_ring_enqueue(fifo, (void *) (uintptr_t)entry); + mtx_unlock(lock); +} + +u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp) +{ + return cxio_hal_get_resource(rscp->tpt_fifo, &rscp->tpt_fifo_lock); +} + +void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag) +{ + cxio_hal_put_resource(rscp->tpt_fifo, stag, &rscp->tpt_fifo_lock); +} + +u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp) +{ + u32 qpid = cxio_hal_get_resource(rscp->qpid_fifo, &rscp->qpid_fifo_lock); + CTR2(KTR_IW_CXGB, "%s qpid 0x%x", __FUNCTION__, qpid); + return qpid; +} + +void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid) +{ + CTR2(KTR_IW_CXGB, "%s qpid 0x%x", __FUNCTION__, qpid); + cxio_hal_put_resource(rscp->qpid_fifo, qpid, &rscp->qpid_fifo_lock); +} + +u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp) +{ + return cxio_hal_get_resource(rscp->cqid_fifo, &rscp->cqid_fifo_lock); +} + +void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid) +{ + cxio_hal_put_resource(rscp->cqid_fifo, cqid, &rscp->cqid_fifo_lock); +} + +u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp) +{ + return cxio_hal_get_resource(rscp->pdid_fifo, &rscp->pdid_fifo_lock); +} + +void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid) +{ + cxio_hal_put_resource(rscp->pdid_fifo, pdid, &rscp->pdid_fifo_lock); +} + +void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp) +{ + buf_ring_free(rscp->tpt_fifo); + buf_ring_free(rscp->cqid_fifo); + buf_ring_free(rscp->qpid_fifo); + buf_ring_free(rscp->pdid_fifo); + free(rscp, M_DEVBUF); +} + +/* + * PBL Memory Manager. Uses Linux generic allocator. + */ + +#define MIN_PBL_SHIFT 8 /* 256B == min PBL size (32 entries) */ +#define PBL_CHUNK 2*1024*1024 + +u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size) +{ + unsigned long addr = gen_pool_alloc(rdev_p->pbl_pool, size); + CTR3(KTR_IW_CXGB, "%s addr 0x%x size %d", __FUNCTION__, (u32)addr, size); + return (u32)addr; +} + +void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size) +{ + CTR3(KTR_IW_CXGB, "%s addr 0x%x size %d", __FUNCTION__, addr, size); + gen_pool_free(rdev_p->pbl_pool, (unsigned long)addr, size); +} + +int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p) +{ + + rdev_p->pbl_pool = gen_pool_create(rdev_p->rnic_info.pbl_base, MIN_PBL_SHIFT, + rdev_p->rnic_info.pbl_top - rdev_p->rnic_info.pbl_base); +#if 0 + if (rdev_p->pbl_pool) { + + unsigned long i; + for (i = rdev_p->rnic_info.pbl_base; + i <= rdev_p->rnic_info.pbl_top - PBL_CHUNK + 1; + i += PBL_CHUNK) + gen_pool_add(rdev_p->pbl_pool, i, PBL_CHUNK, -1); + } +#endif + return rdev_p->pbl_pool ? 0 : (-ENOMEM); +} + +void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p) +{ + gen_pool_destroy(rdev_p->pbl_pool); +} + +/* + * RQT Memory Manager. Uses Linux generic allocator. + */ + +#define MIN_RQT_SHIFT 10 /* 1KB == mini RQT size (16 entries) */ +#define RQT_CHUNK 2*1024*1024 + +u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size) +{ + unsigned long addr = gen_pool_alloc(rdev_p->rqt_pool, size << 6); + CTR3(KTR_IW_CXGB, "%s addr 0x%x size %d", __FUNCTION__, (u32)addr, size << 6); + return (u32)addr; +} + +void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size) +{ + CTR3(KTR_IW_CXGB, "%s addr 0x%x size %d", __FUNCTION__, addr, size << 6); + gen_pool_free(rdev_p->rqt_pool, (unsigned long)addr, size << 6); +} + +int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p) +{ + + rdev_p->rqt_pool = gen_pool_create(rdev_p->rnic_info.rqt_base, + MIN_RQT_SHIFT, rdev_p->rnic_info.rqt_top - rdev_p->rnic_info.rqt_base); +#if 0 + if (rdev_p->rqt_pool) { + unsigned long i; + + for (i = rdev_p->rnic_info.rqt_base; + i <= rdev_p->rnic_info.rqt_top - RQT_CHUNK + 1; + i += RQT_CHUNK) + gen_pool_add(rdev_p->rqt_pool, i, RQT_CHUNK, -1); + } +#endif + return rdev_p->rqt_pool ? 0 : (-ENOMEM); +} + +void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p) +{ + gen_pool_destroy(rdev_p->rqt_pool); +} diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.h b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.h new file mode 100644 index 0000000..e0282a3 --- /dev/null +++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_resource.h @@ -0,0 +1,59 @@ +/************************************************************************** + +Copyright (c) 2007, 2008 Chelsio Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Neither the name of the Chelsio Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +$FreeBSD$ + +***************************************************************************/ +#ifndef __CXIO_RESOURCE_H__ +#define __CXIO_RESOURCE_H__ + +extern int cxio_hal_init_rhdl_resource(u32 nr_rhdl); +extern void cxio_hal_destroy_rhdl_resource(void); +extern int cxio_hal_init_resource(struct cxio_rdev *rdev_p, + u32 nr_tpt, u32 nr_pbl, + u32 nr_rqt, u32 nr_qpid, u32 nr_cqid, + u32 nr_pdid); +extern u32 cxio_hal_get_stag(struct cxio_hal_resource *rscp); +extern void cxio_hal_put_stag(struct cxio_hal_resource *rscp, u32 stag); +extern u32 cxio_hal_get_qpid(struct cxio_hal_resource *rscp); +extern void cxio_hal_put_qpid(struct cxio_hal_resource *rscp, u32 qpid); +extern u32 cxio_hal_get_cqid(struct cxio_hal_resource *rscp); +extern void cxio_hal_put_cqid(struct cxio_hal_resource *rscp, u32 cqid); +extern void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp); + +#define PBL_OFF(rdev_p, a) ( (a) - (rdev_p)->rnic_info.pbl_base ) +extern int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p); +extern void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p); +extern u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size); +extern void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size); + +#define RQT_OFF(rdev_p, a) ( (a) - (rdev_p)->rnic_info.rqt_base ) +extern int cxio_hal_rqtpool_create(struct cxio_rdev *rdev_p); +extern void cxio_hal_rqtpool_destroy(struct cxio_rdev *rdev_p); +extern u32 cxio_hal_rqtpool_alloc(struct cxio_rdev *rdev_p, int size); +extern void cxio_hal_rqtpool_free(struct cxio_rdev *rdev_p, u32 addr, int size); +#endif diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_user.h b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_user.h new file mode 100644 index 0000000..3086a63 --- /dev/null +++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_user.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2007, 2008 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $FreeBSD$ + */ +#ifndef __IWCH_USER_H__ +#define __IWCH_USER_H__ + +#define IWCH_UVERBS_ABI_VERSION 1 + +/* + * Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + * In particular do not use pointer types -- pass pointers in uint64_t + * instead. + */ +struct iwch_create_cq_req { + uint64_t user_rptr_addr; +}; + +struct iwch_create_cq_resp { + uint64_t key; + uint32_t cqid; + uint32_t size_log2; +}; + +struct iwch_create_qp_resp { + uint64_t key; + uint64_t db_key; + uint32_t qpid; + uint32_t size_log2; + uint32_t sq_size_log2; + uint32_t rq_size_log2; +}; + +struct iwch_reg_user_mr_resp { + uint32_t pbl_addr; +}; +#endif diff --git a/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h new file mode 100644 index 0000000..bf8f2d6 --- /dev/null +++ b/sys/dev/cxgb/ulp/iw_cxgb/iw_cxgb_wr.h @@ -0,0 +1,684 @@ +/************************************************************************** + +Copyright (c) 2007, 2008 Chelsio Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Neither the name of the Chelsio Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +$FreeBSD$ + +***************************************************************************/ +#ifndef __CXIO_WR_H__ +#define __CXIO_WR_H__ +#define T3_MAX_SGE 4 +#define T3_MAX_INLINE 64 + +#define Q_EMPTY(rptr,wptr) ((rptr)==(wptr)) +#define Q_FULL(rptr,wptr,size_log2) ( (((wptr)-(rptr))>>(size_log2)) && \ + ((rptr)!=(wptr)) ) +#define Q_GENBIT(ptr,size_log2) (!(((ptr)>>size_log2)&0x1)) +#define Q_FREECNT(rptr,wptr,size_log2) ((1UL<<size_log2)-((wptr)-(rptr))) +#define Q_COUNT(rptr,wptr) ((wptr)-(rptr)) +#define Q_PTR2IDX(ptr,size_log2) (ptr & ((1UL<<size_log2)-1)) + +static __inline void +ring_doorbell(void /* __iomem */ *doorbell, u32 qpid) +{ + writel(doorbell, ((1<<31) | qpid)); +} + +#define SEQ32_GE(x,y) (!( (((u32) (x)) - ((u32) (y))) & 0x80000000 )) + +enum t3_wr_flags { + T3_COMPLETION_FLAG = 0x01, + T3_NOTIFY_FLAG = 0x02, + T3_SOLICITED_EVENT_FLAG = 0x04, + T3_READ_FENCE_FLAG = 0x08, + T3_LOCAL_FENCE_FLAG = 0x10 +} __attribute__ ((packed)); + +enum t3_wr_opcode { + T3_WR_BP = FW_WROPCODE_RI_BYPASS, + T3_WR_SEND = FW_WROPCODE_RI_SEND, + T3_WR_WRITE = FW_WROPCODE_RI_RDMA_WRITE, + T3_WR_READ = FW_WROPCODE_RI_RDMA_READ, + T3_WR_INV_STAG = FW_WROPCODE_RI_LOCAL_INV, + T3_WR_BIND = FW_WROPCODE_RI_BIND_MW, + T3_WR_RCV = FW_WROPCODE_RI_RECEIVE, + T3_WR_INIT = FW_WROPCODE_RI_RDMA_INIT, + T3_WR_QP_MOD = FW_WROPCODE_RI_MODIFY_QP +} __attribute__ ((packed)); + +enum t3_rdma_opcode { + T3_RDMA_WRITE, /* IETF RDMAP v1.0 ... */ + T3_READ_REQ, + T3_READ_RESP, + T3_SEND, + T3_SEND_WITH_INV, + T3_SEND_WITH_SE, + T3_SEND_WITH_SE_INV, + T3_TERMINATE, + T3_RDMA_INIT, /* CHELSIO RI specific ... */ + T3_BIND_MW, + T3_FAST_REGISTER, + T3_LOCAL_INV, + T3_QP_MOD, + T3_BYPASS +} __attribute__ ((packed)); + +static inline enum t3_rdma_opcode wr2opcode(enum t3_wr_opcode wrop) +{ + switch (wrop) { + case T3_WR_BP: return T3_BYPASS; + case T3_WR_SEND: return T3_SEND; + case T3_WR_WRITE: return T3_RDMA_WRITE; + case T3_WR_READ: return T3_READ_REQ; + case T3_WR_INV_STAG: return T3_LOCAL_INV; + case T3_WR_BIND: return T3_BIND_MW; + case T3_WR_INIT: return T3_RDMA_INIT; + case T3_WR_QP_MOD: return T3_QP_MOD; + default: break; + } + return -1; +} + + +/* Work request id */ +union t3_wrid { + struct { + u32 hi; + u32 low; + } id0; + u64 id1; +}; + +#define WRID(wrid) (wrid.id1) +#define WRID_GEN(wrid) (wrid.id0.wr_gen) +#define WRID_IDX(wrid) (wrid.id0.wr_idx) +#define WRID_LO(wrid) (wrid.id0.wr_lo) + +struct fw_riwrh { + __be32 op_seop_flags; + __be32 gen_tid_len; +}; + +#define S_FW_RIWR_OP 24 +#define M_FW_RIWR_OP 0xff +#define V_FW_RIWR_OP(x) ((x) << S_FW_RIWR_OP) +#define G_FW_RIWR_OP(x) ((((x) >> S_FW_RIWR_OP)) & M_FW_RIWR_OP) + +#define S_FW_RIWR_SOPEOP 22 +#define M_FW_RIWR_SOPEOP 0x3 +#define V_FW_RIWR_SOPEOP(x) ((x) << S_FW_RIWR_SOPEOP) + +#define S_FW_RIWR_FLAGS 8 +#define M_FW_RIWR_FLAGS 0x3fffff +#define V_FW_RIWR_FLAGS(x) ((x) << S_FW_RIWR_FLAGS) +#define G_FW_RIWR_FLAGS(x) ((((x) >> S_FW_RIWR_FLAGS)) & M_FW_RIWR_FLAGS) + +#define S_FW_RIWR_TID 8 +#define V_FW_RIWR_TID(x) ((x) << S_FW_RIWR_TID) + +#define S_FW_RIWR_LEN 0 +#define V_FW_RIWR_LEN(x) ((x) << S_FW_RIWR_LEN) + +#define S_FW_RIWR_GEN 31 +#define V_FW_RIWR_GEN(x) ((x) << S_FW_RIWR_GEN) + +struct t3_sge { + __be32 stag; + __be32 len; + __be64 to; +}; + +/* If num_sgle is zero, flit 5+ contains immediate data.*/ +struct t3_send_wr { + struct fw_riwrh wrh; /* 0 */ + union t3_wrid wrid; /* 1 */ + + u8 rdmaop; /* 2 */ + u8 reserved[3]; + __be32 rem_stag; + __be32 plen; /* 3 */ + __be32 num_sgle; + struct t3_sge sgl[T3_MAX_SGE]; /* 4+ */ +}; + +struct t3_local_inv_wr { + struct fw_riwrh wrh; /* 0 */ + union t3_wrid wrid; /* 1 */ + __be32 stag; /* 2 */ + __be32 reserved3; +}; + +struct t3_rdma_write_wr { + struct fw_riwrh wrh; /* 0 */ + union t3_wrid wrid; /* 1 */ + u8 rdmaop; /* 2 */ + u8 reserved[3]; + __be32 stag_sink; + __be64 to_sink; /* 3 */ + __be32 plen; /* 4 */ + __be32 num_sgle; + struct t3_sge sgl[T3_MAX_SGE]; /* 5+ */ +}; + +struct t3_rdma_read_wr { + struct fw_riwrh wrh; /* 0 */ + union t3_wrid wrid; /* 1 */ + u8 rdmaop; /* 2 */ + u8 reserved[3]; + __be32 rem_stag; + __be64 rem_to; /* 3 */ + __be32 local_stag; /* 4 */ + __be32 local_len; + __be64 local_to; /* 5 */ +}; + +enum t3_addr_type { + T3_VA_BASED_TO = 0x0, + T3_ZERO_BASED_TO = 0x1 +} __attribute__ ((packed)); + +enum t3_mem_perms { + T3_MEM_ACCESS_LOCAL_READ = 0x1, + T3_MEM_ACCESS_LOCAL_WRITE = 0x2, + T3_MEM_ACCESS_REM_READ = 0x4, + T3_MEM_ACCESS_REM_WRITE = 0x8 +} __attribute__ ((packed)); + +struct t3_bind_mw_wr { + struct fw_riwrh wrh; /* 0 */ + union t3_wrid wrid; /* 1 */ + u16 reserved; /* 2 */ + u8 type; + u8 perms; + __be32 mr_stag; + __be32 mw_stag; /* 3 */ + __be32 mw_len; + __be64 mw_va; /* 4 */ + __be32 mr_pbl_addr; /* 5 */ + u8 reserved2[3]; + u8 mr_pagesz; +}; + +struct t3_receive_wr { + struct fw_riwrh wrh; /* 0 */ + union t3_wrid wrid; /* 1 */ + u8 pagesz[T3_MAX_SGE]; + __be32 num_sgle; /* 2 */ + struct t3_sge sgl[T3_MAX_SGE]; /* 3+ */ + __be32 pbl_addr[T3_MAX_SGE]; +}; + +struct t3_bypass_wr { + struct fw_riwrh wrh; + union t3_wrid wrid; /* 1 */ +}; + +struct t3_modify_qp_wr { + struct fw_riwrh wrh; /* 0 */ + union t3_wrid wrid; /* 1 */ + __be32 flags; /* 2 */ + __be32 quiesce; /* 2 */ + __be32 max_ird; /* 3 */ + __be32 max_ord; /* 3 */ + __be64 sge_cmd; /* 4 */ + __be64 ctx1; /* 5 */ + __be64 ctx0; /* 6 */ +}; + +enum t3_modify_qp_flags { + MODQP_QUIESCE = 0x01, + MODQP_MAX_IRD = 0x02, + MODQP_MAX_ORD = 0x04, + MODQP_WRITE_EC = 0x08, + MODQP_READ_EC = 0x10, +}; + + +enum t3_mpa_attrs { + uP_RI_MPA_RX_MARKER_ENABLE = 0x1, + uP_RI_MPA_TX_MARKER_ENABLE = 0x2, + uP_RI_MPA_CRC_ENABLE = 0x4, + uP_RI_MPA_IETF_ENABLE = 0x8 +} __attribute__ ((packed)); + +enum t3_qp_caps { + uP_RI_QP_RDMA_READ_ENABLE = 0x01, + uP_RI_QP_RDMA_WRITE_ENABLE = 0x02, + uP_RI_QP_BIND_ENABLE = 0x04, + uP_RI_QP_FAST_REGISTER_ENABLE = 0x08, + uP_RI_QP_STAG0_ENABLE = 0x10 +} __attribute__ ((packed)); + +struct t3_rdma_init_attr { + u32 tid; + u32 qpid; + u32 pdid; + u32 scqid; + u32 rcqid; + u32 rq_addr; + u32 rq_size; + enum t3_mpa_attrs mpaattrs; + enum t3_qp_caps qpcaps; + u16 tcp_emss; + u32 ord; + u32 ird; + u64 qp_dma_addr; + u32 qp_dma_size; + u32 flags; + u32 irs; +}; + +struct t3_rdma_init_wr { + struct fw_riwrh wrh; /* 0 */ + union t3_wrid wrid; /* 1 */ + __be32 qpid; /* 2 */ + __be32 pdid; + __be32 scqid; /* 3 */ + __be32 rcqid; + __be32 rq_addr; /* 4 */ + __be32 rq_size; + u8 mpaattrs; /* 5 */ + u8 qpcaps; + __be16 ulpdu_size; + __be32 flags; /* bits 31-1 - reservered */ + /* bit 0 - set if RECV posted */ + __be32 ord; /* 6 */ + __be32 ird; + __be64 qp_dma_addr; /* 7 */ + __be32 qp_dma_size; /* 8 */ + u32 irs; +}; + +struct t3_genbit { + u64 flit[15]; + __be64 genbit; +}; + +enum rdma_init_wr_flags { + RECVS_POSTED = 1, +}; + +union t3_wr { + struct t3_send_wr send; + struct t3_rdma_write_wr write; + struct t3_rdma_read_wr read; + struct t3_receive_wr recv; + struct t3_local_inv_wr local_inv; + struct t3_bind_mw_wr bind; + struct t3_bypass_wr bypass; + struct t3_rdma_init_wr init; + struct t3_modify_qp_wr qp_mod; + struct t3_genbit genbit; + u64 flit[16]; +}; + +#define T3_SQ_CQE_FLIT 13 +#define T3_SQ_COOKIE_FLIT 14 + +#define T3_RQ_COOKIE_FLIT 13 +#define T3_RQ_CQE_FLIT 14 + +static inline enum t3_wr_opcode fw_riwrh_opcode(struct fw_riwrh *wqe) +{ + return G_FW_RIWR_OP(be32toh(wqe->op_seop_flags)); +} + +static inline void build_fw_riwrh(struct fw_riwrh *wqe, enum t3_wr_opcode op, + enum t3_wr_flags flags, u8 genbit, u32 tid, + u8 len) +{ + wqe->op_seop_flags = htobe32(V_FW_RIWR_OP(op) | + V_FW_RIWR_SOPEOP(M_FW_RIWR_SOPEOP) | + V_FW_RIWR_FLAGS(flags)); + wmb(); + wqe->gen_tid_len = htobe32(V_FW_RIWR_GEN(genbit) | + V_FW_RIWR_TID(tid) | + V_FW_RIWR_LEN(len)); + /* 2nd gen bit... */ + ((union t3_wr *)wqe)->genbit.genbit = htobe64(genbit); +} + +/* + * T3 ULP2_TX commands + */ +enum t3_utx_mem_op { + T3_UTX_MEM_READ = 2, + T3_UTX_MEM_WRITE = 3 +}; + +/* T3 MC7 RDMA TPT entry format */ + +enum tpt_mem_type { + TPT_NON_SHARED_MR = 0x0, + TPT_SHARED_MR = 0x1, + TPT_MW = 0x2, + TPT_MW_RELAXED_PROTECTION = 0x3 +}; + +enum tpt_addr_type { + TPT_ZBTO = 0, + TPT_VATO = 1 +}; + +enum tpt_mem_perm { + TPT_LOCAL_READ = 0x8, + TPT_LOCAL_WRITE = 0x4, + TPT_REMOTE_READ = 0x2, + TPT_REMOTE_WRITE = 0x1 +}; + +struct tpt_entry { + __be32 valid_stag_pdid; + __be32 flags_pagesize_qpid; + + __be32 rsvd_pbl_addr; + __be32 len; + __be32 va_hi; + __be32 va_low_or_fbo; + + __be32 rsvd_bind_cnt_or_pstag; + __be32 rsvd_pbl_size; +}; + +#define S_TPT_VALID 31 +#define V_TPT_VALID(x) ((x) << S_TPT_VALID) +#define F_TPT_VALID V_TPT_VALID(1U) + +#define S_TPT_STAG_KEY 23 +#define M_TPT_STAG_KEY 0xFF +#define V_TPT_STAG_KEY(x) ((x) << S_TPT_STAG_KEY) +#define G_TPT_STAG_KEY(x) (((x) >> S_TPT_STAG_KEY) & M_TPT_STAG_KEY) + +#define S_TPT_STAG_STATE 22 +#define V_TPT_STAG_STATE(x) ((x) << S_TPT_STAG_STATE) +#define F_TPT_STAG_STATE V_TPT_STAG_STATE(1U) + +#define S_TPT_STAG_TYPE 20 +#define M_TPT_STAG_TYPE 0x3 +#define V_TPT_STAG_TYPE(x) ((x) << S_TPT_STAG_TYPE) +#define G_TPT_STAG_TYPE(x) (((x) >> S_TPT_STAG_TYPE) & M_TPT_STAG_TYPE) + +#define S_TPT_PDID 0 +#define M_TPT_PDID 0xFFFFF +#define V_TPT_PDID(x) ((x) << S_TPT_PDID) +#define G_TPT_PDID(x) (((x) >> S_TPT_PDID) & M_TPT_PDID) + +#define S_TPT_PERM 28 +#define M_TPT_PERM 0xF +#define V_TPT_PERM(x) ((x) << S_TPT_PERM) +#define G_TPT_PERM(x) (((x) >> S_TPT_PERM) & M_TPT_PERM) + +#define S_TPT_REM_INV_DIS 27 +#define V_TPT_REM_INV_DIS(x) ((x) << S_TPT_REM_INV_DIS) +#define F_TPT_REM_INV_DIS V_TPT_REM_INV_DIS(1U) + +#define S_TPT_ADDR_TYPE 26 +#define V_TPT_ADDR_TYPE(x) ((x) << S_TPT_ADDR_TYPE) +#define F_TPT_ADDR_TYPE V_TPT_ADDR_TYPE(1U) + +#define S_TPT_MW_BIND_ENABLE 25 +#define V_TPT_MW_BIND_ENABLE(x) ((x) << S_TPT_MW_BIND_ENABLE) +#define F_TPT_MW_BIND_ENABLE V_TPT_MW_BIND_ENABLE(1U) + +#define S_TPT_PAGE_SIZE 20 +#define M_TPT_PAGE_SIZE 0x1F +#define V_TPT_PAGE_SIZE(x) ((x) << S_TPT_PAGE_SIZE) +#define G_TPT_PAGE_SIZE(x) (((x) >> S_TPT_PAGE_SIZE) & M_TPT_PAGE_SIZE) + +#define S_TPT_PBL_ADDR 0 +#define M_TPT_PBL_ADDR 0x1FFFFFFF +#define V_TPT_PBL_ADDR(x) ((x) << S_TPT_PBL_ADDR) +#define G_TPT_PBL_ADDR(x) (((x) >> S_TPT_PBL_ADDR) & M_TPT_PBL_ADDR) + +#define S_TPT_QPID 0 +#define M_TPT_QPID 0xFFFFF +#define V_TPT_QPID(x) ((x) << S_TPT_QPID) +#define G_TPT_QPID(x) (((x) >> S_TPT_QPID) & M_TPT_QPID) + +#define S_TPT_PSTAG 0 +#define M_TPT_PSTAG 0xFFFFFF +#define V_TPT_PSTAG(x) ((x) << S_TPT_PSTAG) +#define G_TPT_PSTAG(x) (((x) >> S_TPT_PSTAG) & M_TPT_PSTAG) + +#define S_TPT_PBL_SIZE 0 +#define M_TPT_PBL_SIZE 0xFFFFF +#define V_TPT_PBL_SIZE(x) ((x) << S_TPT_PBL_SIZE) +#define G_TPT_PBL_SIZE(x) (((x) >> S_TPT_PBL_SIZE) & M_TPT_PBL_SIZE) + +/* + * CQE defs + */ +struct t3_cqe { + __be32 header; + __be32 len; + union { + struct { + __be32 stag; + __be32 msn; + } rcqe; + struct { + u32 wrid_hi; + u32 wrid_low; + } scqe; + } u; +}; + +#define S_CQE_OOO 31 +#define M_CQE_OOO 0x1 +#define G_CQE_OOO(x) ((((x) >> S_CQE_OOO)) & M_CQE_OOO) +#define V_CEQ_OOO(x) ((x)<<S_CQE_OOO) + +#define S_CQE_QPID 12 +#define M_CQE_QPID 0x7FFFF +#define G_CQE_QPID(x) ((((x) >> S_CQE_QPID)) & M_CQE_QPID) +#define V_CQE_QPID(x) ((x)<<S_CQE_QPID) + +#define S_CQE_SWCQE 11 +#define M_CQE_SWCQE 0x1 +#define G_CQE_SWCQE(x) ((((x) >> S_CQE_SWCQE)) & M_CQE_SWCQE) +#define V_CQE_SWCQE(x) ((x)<<S_CQE_SWCQE) + +#define S_CQE_GENBIT 10 +#define M_CQE_GENBIT 0x1 +#define G_CQE_GENBIT(x) (((x) >> S_CQE_GENBIT) & M_CQE_GENBIT) +#define V_CQE_GENBIT(x) ((x)<<S_CQE_GENBIT) + +#define S_CQE_STATUS 5 +#define M_CQE_STATUS 0x1F +#define G_CQE_STATUS(x) ((((x) >> S_CQE_STATUS)) & M_CQE_STATUS) +#define V_CQE_STATUS(x) ((x)<<S_CQE_STATUS) + +#define S_CQE_TYPE 4 +#define M_CQE_TYPE 0x1 +#define G_CQE_TYPE(x) ((((x) >> S_CQE_TYPE)) & M_CQE_TYPE) +#define V_CQE_TYPE(x) ((x)<<S_CQE_TYPE) + +#define S_CQE_OPCODE 0 +#define M_CQE_OPCODE 0xF +#define G_CQE_OPCODE(x) ((((x) >> S_CQE_OPCODE)) & M_CQE_OPCODE) +#define V_CQE_OPCODE(x) ((x)<<S_CQE_OPCODE) + +#define SW_CQE(x) (G_CQE_SWCQE(be32toh((x).header))) +#define CQE_OOO(x) (G_CQE_OOO(be32toh((x).header))) +#define CQE_QPID(x) (G_CQE_QPID(be32toh((x).header))) +#define CQE_GENBIT(x) (G_CQE_GENBIT(be32toh((x).header))) +#define CQE_TYPE(x) (G_CQE_TYPE(be32toh((x).header))) +#define SQ_TYPE(x) (CQE_TYPE((x))) +#define RQ_TYPE(x) (!CQE_TYPE((x))) +#define CQE_STATUS(x) (G_CQE_STATUS(be32toh((x).header))) +#define CQE_OPCODE(x) (G_CQE_OPCODE(be32toh((x).header))) + +#define CQE_LEN(x) (be32toh((x).len)) + +/* used for RQ completion processing */ +#define CQE_WRID_STAG(x) (be32toh((x).u.rcqe.stag)) +#define CQE_WRID_MSN(x) (be32toh((x).u.rcqe.msn)) + +/* used for SQ completion processing */ +#define CQE_WRID_SQ_WPTR(x) ((x).u.scqe.wrid_hi) +#define CQE_WRID_WPTR(x) ((x).u.scqe.wrid_low) + +/* generic accessor macros */ +#define CQE_WRID_HI(x) ((x).u.scqe.wrid_hi) +#define CQE_WRID_LOW(x) ((x).u.scqe.wrid_low) + +#define TPT_ERR_SUCCESS 0x0 +#define TPT_ERR_STAG 0x1 /* STAG invalid: either the */ + /* STAG is offlimt, being 0, */ + /* or STAG_key mismatch */ +#define TPT_ERR_PDID 0x2 /* PDID mismatch */ +#define TPT_ERR_QPID 0x3 /* QPID mismatch */ +#define TPT_ERR_ACCESS 0x4 /* Invalid access right */ +#define TPT_ERR_WRAP 0x5 /* Wrap error */ +#define TPT_ERR_BOUND 0x6 /* base and bounds voilation */ +#define TPT_ERR_INVALIDATE_SHARED_MR 0x7 /* attempt to invalidate a */ + /* shared memory region */ +#define TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND 0x8 /* attempt to invalidate a */ + /* shared memory region */ +#define TPT_ERR_ECC 0x9 /* ECC error detected */ +#define TPT_ERR_ECC_PSTAG 0xA /* ECC error detected when */ + /* reading PSTAG for a MW */ + /* Invalidate */ +#define TPT_ERR_PBL_ADDR_BOUND 0xB /* pbl addr out of bounds: */ + /* software error */ +#define TPT_ERR_SWFLUSH 0xC /* SW FLUSHED */ +#define TPT_ERR_CRC 0x10 /* CRC error */ +#define TPT_ERR_MARKER 0x11 /* Marker error */ +#define TPT_ERR_PDU_LEN_ERR 0x12 /* invalid PDU length */ +#define TPT_ERR_OUT_OF_RQE 0x13 /* out of RQE */ +#define TPT_ERR_DDP_VERSION 0x14 /* wrong DDP version */ +#define TPT_ERR_RDMA_VERSION 0x15 /* wrong RDMA version */ +#define TPT_ERR_OPCODE 0x16 /* invalid rdma opcode */ +#define TPT_ERR_DDP_QUEUE_NUM 0x17 /* invalid ddp queue number */ +#define TPT_ERR_MSN 0x18 /* MSN error */ +#define TPT_ERR_TBIT 0x19 /* tag bit not set correctly */ +#define TPT_ERR_MO 0x1A /* MO not 0 for TERMINATE */ + /* or READ_REQ */ +#define TPT_ERR_MSN_GAP 0x1B +#define TPT_ERR_MSN_RANGE 0x1C +#define TPT_ERR_IRD_OVERFLOW 0x1D +#define TPT_ERR_RQE_ADDR_BOUND 0x1E /* RQE addr out of bounds: */ + /* software error */ +#define TPT_ERR_INTERNAL_ERR 0x1F /* internal error (opcode */ + /* mismatch) */ + +struct t3_swsq { + uint64_t wr_id; + struct t3_cqe cqe; + uint32_t sq_wptr; + uint32_t read_len; + int opcode; + int complete; + int signaled; +}; + +/* + * A T3 WQ implements both the SQ and RQ. + */ +struct t3_wq { + union t3_wr *queue; /* DMA accessable memory */ + bus_addr_t dma_addr; /* DMA address for HW */ +#ifdef notyet + DECLARE_PCI_UNMAP_ADDR(mapping) /* unmap kruft */ +#endif + u32 error; /* 1 once we go to ERROR */ + u32 qpid; + u32 wptr; /* idx to next available WR slot */ + u32 size_log2; /* total wq size */ + struct t3_swsq *sq; /* SW SQ */ + struct t3_swsq *oldest_read; /* tracks oldest pending read */ + u32 sq_wptr; /* sq_wptr - sq_rptr == count of */ + u32 sq_rptr; /* pending wrs */ + u32 sq_size_log2; /* sq size */ + u64 *rq; /* SW RQ (holds consumer wr_ids */ + u32 rq_wptr; /* rq_wptr - rq_rptr == count of */ + u32 rq_rptr; /* pending wrs */ + u64 *rq_oldest_wr; /* oldest wr on the SW RQ */ + u32 rq_size_log2; /* rq size */ + u32 rq_addr; /* rq adapter address */ + void /* __iomem */ *doorbell; /* kernel db */ + u64 udb; /* user db if any */ +}; + +struct t3_cq { + u32 cqid; + u32 rptr; + u32 wptr; + u32 size_log2; + bus_addr_t dma_addr; +#ifdef notyet + DECLARE_PCI_UNMAP_ADDR(mapping) +#endif + struct t3_cqe *queue; + struct t3_cqe *sw_queue; + u32 sw_rptr; + u32 sw_wptr; +}; + +#define CQ_VLD_ENTRY(ptr,size_log2,cqe) (Q_GENBIT(ptr,size_log2) == \ + CQE_GENBIT(*cqe)) + +static inline void cxio_set_wq_in_error(struct t3_wq *wq) +{ + wq->queue->flit[13] = 1; +} + +static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq) +{ + struct t3_cqe *cqe; + + cqe = cq->queue + (Q_PTR2IDX(cq->rptr, cq->size_log2)); + if (CQ_VLD_ENTRY(cq->rptr, cq->size_log2, cqe)) + return cqe; + return NULL; +} + +static inline struct t3_cqe *cxio_next_sw_cqe(struct t3_cq *cq) +{ + struct t3_cqe *cqe; + + if (!Q_EMPTY(cq->sw_rptr, cq->sw_wptr)) { + cqe = cq->sw_queue + (Q_PTR2IDX(cq->sw_rptr, cq->size_log2)); + return cqe; + } + return NULL; +} + +static inline struct t3_cqe *cxio_next_cqe(struct t3_cq *cq) +{ + struct t3_cqe *cqe; + + if (!Q_EMPTY(cq->sw_rptr, cq->sw_wptr)) { + cqe = cq->sw_queue + (Q_PTR2IDX(cq->sw_rptr, cq->size_log2)); + return cqe; + } + cqe = cq->queue + (Q_PTR2IDX(cq->rptr, cq->size_log2)); + if (CQ_VLD_ENTRY(cq->rptr, cq->size_log2, cqe)) + return cqe; + return NULL; +} + +#endif |