summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/sw
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-09-03 17:49:17 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-03 17:49:17 -0700
commitaa9d4648c2fbb455df7750ade1b73dd9ad9b3690 (patch)
treebc4590c27e6f30ec0612b28f3f38a539535b9930 /drivers/infiniband/sw
parent906dde0f355bd97c080c215811ae7db1137c4af8 (diff)
parent8eb19e8e7c8658226d8b7e75728e6dfa2ef32717 (diff)
downloadop-kernel-dev-aa9d4648c2fbb455df7750ade1b73dd9ad9b3690.zip
op-kernel-dev-aa9d4648c2fbb455df7750ade1b73dd9ad9b3690.tar.gz
Merge tag 'for-linus-ioctl' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
Pull rdma updates from Doug Ledford: "This is a big pull request. Of note is that I'm sending you the new ioctl API for the rdma subsystem. We put it up on linux-api@, but didn't get much response. The API is complex, but it solves two different problems in one go: 1) The bi-directional nature of the RDMA file write calls, which created the security hole we had to handle (and for which the fix is now causing problems for systems in production, we were a bit over zealous in the fix and the ability to open a device, then fork, then create new queue pairs on the device and use them is broken). 2) The bloat caused by different vendors implementing extensions to the base verbs API. Each vendor's hardware is slightly different, and the hardware might be suitable for one extension but not another. By the time we add generic extensions for all the different ways that the different hardware can offload things, the API becomes bloated. Things like our completion structs have started to exceed a cache line in size because of all the elements needed to support this. That in turn shows up heavily in the performance graphs with a noticable drop in performance on 100Gigabit links as our completion structs go from occupying one cache line to 1+. This API makes things like the completion structs modular in a very similar way to netlink so that your structs can only include the items needed for the offloads/features you are actually using on a given queue pair. In that way we support everything, but only use what we need, and our structs stay smaller. The ioctl API is better explained by the posting on linux-api@ than I can explain it here, so I'll just leave it at that. The rest of the pull request is typical stuff. Updates for 4.14 kernel merge window - Lots of hfi1 driver updates (mixed with a few qib and core updates as well) - rxe updates - various mlx updates - Set default roce type to RoCEv2 - Several larger fixes for bnxt_re that were too big for -rc - Several larger fixes for qedr that, likewise, were too big for -rc - Misc core changes - Make the hns_roce driver compilable on arches other than aarch64 so we can more easily debug build issues related to it - Add rdma-netlink infrastructure updates - Add automatic IRQ affinity infrastructure - Add 32bit lid support - Lots of misc fixes across the subsystem from random people - Autoloading of RDMA netlink modules - PCI pool cleanups from Romain Perier - mlx5 driver feature additions and fixes - Hardware tag matchine feature - Fix sleeping in atomic when resolving roce ah - Add experimental ioctl interface as posted to linux-api@" * tag 'for-linus-ioctl' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (328 commits) IB/core: Expose ioctl interface through experimental Kconfig IB/core: Assign root to all drivers IB/core: Add completion queue (cq) object actions IB/core: Add legacy driver's user-data IB/core: Export ioctl enum types to user-space IB/core: Explicitly destroy an object while keeping uobject IB/core: Add macros for declaring methods and attributes IB/core: Add uverbs merge trees functionality IB/core: Add DEVICE object and root tree structure IB/core: Declare an object instead of declaring only type attributes IB/core: Add new ioctl interface RDMA/vmw_pvrdma: Fix a signedness RDMA/vmw_pvrdma: Report network header type in WC IB/core: Add might_sleep() annotation to ib_init_ah_from_wc() IB/cm: Fix sleeping in atomic when RoCE is used IB/core: Add support to finalize objects in one transaction IB/core: Add a generic way to execute an operation on a uobject Documentation: Hardware tag matching IB/mlx5: Support IB_SRQT_TM net/mlx5: Add XRQ support ...
Diffstat (limited to 'drivers/infiniband/sw')
-rw-r--r--drivers/infiniband/sw/rdmavt/ah.c10
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.c2
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.c170
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c348
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_mr.h62
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_tx.h11
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.c9
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c1
-rw-r--r--drivers/infiniband/sw/rxe/rxe.h2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_av.c7
-rw-r--r--drivers/infiniband/sw/rxe/rxe_cq.c19
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hw_counters.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h6
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mmap.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c12
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c26
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c9
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c6
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_task.c4
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c67
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h2
23 files changed, 600 insertions, 181 deletions
diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c
index a96d4aa..ba3639a 100644
--- a/drivers/infiniband/sw/rdmavt/ah.c
+++ b/drivers/infiniband/sw/rdmavt/ah.c
@@ -66,8 +66,6 @@ int rvt_check_ah(struct ib_device *ibdev,
int port_num = rdma_ah_get_port_num(ah_attr);
struct ib_port_attr port_attr;
struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
- enum rdma_link_layer link = rdma_port_get_link_layer(ibdev, port_num);
- u32 dlid = rdma_ah_get_dlid(ah_attr);
u8 ah_flags = rdma_ah_get_ah_flags(ah_attr);
u8 static_rate = rdma_ah_get_static_rate(ah_attr);
@@ -83,14 +81,6 @@ int rvt_check_ah(struct ib_device *ibdev,
if ((ah_flags & IB_AH_GRH) &&
rdma_ah_read_grh(ah_attr)->sgid_index >= port_attr.gid_tbl_len)
return -EINVAL;
- if (link != IB_LINK_LAYER_ETHERNET) {
- if (dlid == 0)
- return -EINVAL;
- if (dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) &&
- dlid != be16_to_cpu(IB_LID_PERMISSIVE) &&
- !(ah_flags & IB_AH_GRH))
- return -EINVAL;
- }
if (rdi->driver_f.check_ah)
return rdi->driver_f.check_ah(ibdev, ah_attr);
return 0;
diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
index 0ae2ff8..97d71e4 100644
--- a/drivers/infiniband/sw/rdmavt/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c
@@ -107,7 +107,7 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
wc->uqueue[head].src_qp = entry->src_qp;
wc->uqueue[head].wc_flags = entry->wc_flags;
wc->uqueue[head].pkey_index = entry->pkey_index;
- wc->uqueue[head].slid = entry->slid;
+ wc->uqueue[head].slid = ib_lid_cpu16(entry->slid);
wc->uqueue[head].sl = entry->sl;
wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits;
wc->uqueue[head].port_num = entry->port_num;
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index aa5f9ea3..4271351 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -441,6 +441,105 @@ bail_umem:
}
/**
+ * rvt_dereg_clean_qp_cb - callback from iterator
+ * @qp - the qp
+ * @v - the mregion (as u64)
+ *
+ * This routine fields the callback for all QPs and
+ * for QPs in the same PD as the MR will call the
+ * rvt_qp_mr_clean() to potentially cleanup references.
+ */
+static void rvt_dereg_clean_qp_cb(struct rvt_qp *qp, u64 v)
+{
+ struct rvt_mregion *mr = (struct rvt_mregion *)v;
+
+ /* skip PDs that are not ours */
+ if (mr->pd != qp->ibqp.pd)
+ return;
+ rvt_qp_mr_clean(qp, mr->lkey);
+}
+
+/**
+ * rvt_dereg_clean_qps - find QPs for reference cleanup
+ * @mr - the MR that is being deregistered
+ *
+ * This routine iterates RC QPs looking for references
+ * to the lkey noted in mr.
+ */
+static void rvt_dereg_clean_qps(struct rvt_mregion *mr)
+{
+ struct rvt_dev_info *rdi = ib_to_rvt(mr->pd->device);
+
+ rvt_qp_iter(rdi, (u64)mr, rvt_dereg_clean_qp_cb);
+}
+
+/**
+ * rvt_check_refs - check references
+ * @mr - the megion
+ * @t - the caller identification
+ *
+ * This routine checks MRs holding a reference during
+ * when being de-registered.
+ *
+ * If the count is non-zero, the code calls a clean routine then
+ * waits for the timeout for the count to zero.
+ */
+static int rvt_check_refs(struct rvt_mregion *mr, const char *t)
+{
+ unsigned long timeout;
+ struct rvt_dev_info *rdi = ib_to_rvt(mr->pd->device);
+
+ if (percpu_ref_is_zero(&mr->refcount))
+ return 0;
+ /* avoid dma mr */
+ if (mr->lkey)
+ rvt_dereg_clean_qps(mr);
+ timeout = wait_for_completion_timeout(&mr->comp, 5 * HZ);
+ if (!timeout) {
+ rvt_pr_err(rdi,
+ "%s timeout mr %p pd %p lkey %x refcount %ld\n",
+ t, mr, mr->pd, mr->lkey,
+ atomic_long_read(&mr->refcount.count));
+ rvt_get_mr(mr);
+ return -EBUSY;
+ }
+ return 0;
+}
+
+/**
+ * rvt_mr_has_lkey - is MR
+ * @mr - the mregion
+ * @lkey - the lkey
+ */
+bool rvt_mr_has_lkey(struct rvt_mregion *mr, u32 lkey)
+{
+ return mr && lkey == mr->lkey;
+}
+
+/**
+ * rvt_ss_has_lkey - is mr in sge tests
+ * @ss - the sge state
+ * @lkey
+ *
+ * This code tests for an MR in the indicated
+ * sge state.
+ */
+bool rvt_ss_has_lkey(struct rvt_sge_state *ss, u32 lkey)
+{
+ int i;
+ bool rval = false;
+
+ if (!ss->num_sge)
+ return rval;
+ /* first one */
+ rval = rvt_mr_has_lkey(ss->sge.mr, lkey);
+ /* any others */
+ for (i = 0; !rval && i < ss->num_sge - 1; i++)
+ rval = rvt_mr_has_lkey(ss->sg_list[i].mr, lkey);
+ return rval;
+}
+
+/**
* rvt_dereg_mr - unregister and free a memory region
* @ibmr: the memory region to free
*
@@ -453,22 +552,14 @@ bail_umem:
int rvt_dereg_mr(struct ib_mr *ibmr)
{
struct rvt_mr *mr = to_imr(ibmr);
- struct rvt_dev_info *rdi = ib_to_rvt(ibmr->pd->device);
- int ret = 0;
- unsigned long timeout;
+ int ret;
rvt_free_lkey(&mr->mr);
rvt_put_mr(&mr->mr); /* will set completion if last */
- timeout = wait_for_completion_timeout(&mr->mr.comp, 5 * HZ);
- if (!timeout) {
- rvt_pr_err(rdi,
- "rvt_dereg_mr timeout mr %p pd %p\n",
- mr, mr->mr.pd);
- rvt_get_mr(&mr->mr);
- ret = -EBUSY;
+ ret = rvt_check_refs(&mr->mr, __func__);
+ if (ret)
goto out;
- }
rvt_deinit_mregion(&mr->mr);
if (mr->umem)
ib_umem_release(mr->umem);
@@ -761,16 +852,12 @@ int rvt_dealloc_fmr(struct ib_fmr *ibfmr)
{
struct rvt_fmr *fmr = to_ifmr(ibfmr);
int ret = 0;
- unsigned long timeout;
rvt_free_lkey(&fmr->mr);
rvt_put_mr(&fmr->mr); /* will set completion if last */
- timeout = wait_for_completion_timeout(&fmr->mr.comp, 5 * HZ);
- if (!timeout) {
- rvt_get_mr(&fmr->mr);
- ret = -EBUSY;
+ ret = rvt_check_refs(&fmr->mr, __func__);
+ if (ret)
goto out;
- }
rvt_deinit_mregion(&fmr->mr);
kfree(fmr);
out:
@@ -778,23 +865,52 @@ out:
}
/**
+ * rvt_sge_adjacent - is isge compressible
+ * @last_sge: last outgoing SGE written
+ * @sge: SGE to check
+ *
+ * If adjacent will update last_sge to add length.
+ *
+ * Return: true if isge is adjacent to last sge
+ */
+static inline bool rvt_sge_adjacent(struct rvt_sge *last_sge,
+ struct ib_sge *sge)
+{
+ if (last_sge && sge->lkey == last_sge->mr->lkey &&
+ ((uint64_t)(last_sge->vaddr + last_sge->length) == sge->addr)) {
+ if (sge->lkey) {
+ if (unlikely((sge->addr - last_sge->mr->user_base +
+ sge->length > last_sge->mr->length)))
+ return false; /* overrun, caller will catch */
+ } else {
+ last_sge->length += sge->length;
+ }
+ last_sge->sge_length += sge->length;
+ trace_rvt_sge_adjacent(last_sge, sge);
+ return true;
+ }
+ return false;
+}
+
+/**
* rvt_lkey_ok - check IB SGE for validity and initialize
* @rkt: table containing lkey to check SGE against
* @pd: protection domain
* @isge: outgoing internal SGE
+ * @last_sge: last outgoing SGE written
* @sge: SGE to check
* @acc: access flags
*
* Check the IB SGE for validity and initialize our internal version
* of it.
*
- * Return: 1 if valid and successful, otherwise returns 0.
- *
- * increments the reference count upon success
+ * Increments the reference count when a new sge is stored.
*
+ * Return: 0 if compressed, 1 if added , otherwise returns -errno.
*/
int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
- struct rvt_sge *isge, struct ib_sge *sge, int acc)
+ struct rvt_sge *isge, struct rvt_sge *last_sge,
+ struct ib_sge *sge, int acc)
{
struct rvt_mregion *mr;
unsigned n, m;
@@ -804,12 +920,14 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
* We use LKEY == zero for kernel virtual addresses
* (see rvt_get_dma_mr() and dma_virt_ops).
*/
- rcu_read_lock();
if (sge->lkey == 0) {
struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);
if (pd->user)
- goto bail;
+ return -EINVAL;
+ if (rvt_sge_adjacent(last_sge, sge))
+ return 0;
+ rcu_read_lock();
mr = rcu_dereference(dev->dma_mr);
if (!mr)
goto bail;
@@ -824,6 +942,9 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
isge->n = 0;
goto ok;
}
+ if (rvt_sge_adjacent(last_sge, sge))
+ return 0;
+ rcu_read_lock();
mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]);
if (!mr)
goto bail;
@@ -874,12 +995,13 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
isge->m = m;
isge->n = n;
ok:
+ trace_rvt_sge_new(isge, sge);
return 1;
bail_unref:
rvt_put_mr(mr);
bail:
rcu_read_unlock();
- return 0;
+ return -EINVAL;
}
EXPORT_SYMBOL(rvt_lkey_ok);
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 8876ee7..22df09a 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -52,6 +52,7 @@
#include <linux/slab.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_hdrs.h>
+#include <rdma/opa_addr.h>
#include "qp.h"
#include "vt.h"
#include "trace.h"
@@ -421,15 +422,6 @@ bail:
return ret;
}
-static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
-{
- struct rvt_qpn_map *map;
-
- map = qpt->map + qpn / RVT_BITS_PER_PAGE;
- if (map->page)
- clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page);
-}
-
/**
* rvt_clear_mr_refs - Drop help mr refs
* @qp: rvt qp data structure
@@ -448,13 +440,9 @@ static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends)
if (clr_sends) {
while (qp->s_last != qp->s_head) {
struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last);
- unsigned i;
- for (i = 0; i < wqe->wr.num_sge; i++) {
- struct rvt_sge *sge = &wqe->sg_list[i];
+ rvt_put_swqe(wqe);
- rvt_put_mr(sge->mr);
- }
if (qp->ibqp.qp_type == IB_QPT_UD ||
qp->ibqp.qp_type == IB_QPT_SMI ||
qp->ibqp.qp_type == IB_QPT_GSI)
@@ -470,10 +458,7 @@ static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends)
}
}
- if (qp->ibqp.qp_type != IB_QPT_RC)
- return;
-
- for (n = 0; n < rvt_max_atomic(rdi); n++) {
+ for (n = 0; qp->s_ack_queue && n < rvt_max_atomic(rdi); n++) {
struct rvt_ack_entry *e = &qp->s_ack_queue[n];
if (e->rdma_sge.mr) {
@@ -484,6 +469,113 @@ static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends)
}
/**
+ * rvt_swqe_has_lkey - return true if lkey is used by swqe
+ * @wqe - the send wqe
+ * @lkey - the lkey
+ *
+ * Test the swqe for using lkey
+ */
+static bool rvt_swqe_has_lkey(struct rvt_swqe *wqe, u32 lkey)
+{
+ int i;
+
+ for (i = 0; i < wqe->wr.num_sge; i++) {
+ struct rvt_sge *sge = &wqe->sg_list[i];
+
+ if (rvt_mr_has_lkey(sge->mr, lkey))
+ return true;
+ }
+ return false;
+}
+
+/**
+ * rvt_qp_sends_has_lkey - return true is qp sends use lkey
+ * @qp - the rvt_qp
+ * @lkey - the lkey
+ */
+static bool rvt_qp_sends_has_lkey(struct rvt_qp *qp, u32 lkey)
+{
+ u32 s_last = qp->s_last;
+
+ while (s_last != qp->s_head) {
+ struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, s_last);
+
+ if (rvt_swqe_has_lkey(wqe, lkey))
+ return true;
+
+ if (++s_last >= qp->s_size)
+ s_last = 0;
+ }
+ if (qp->s_rdma_mr)
+ if (rvt_mr_has_lkey(qp->s_rdma_mr, lkey))
+ return true;
+ return false;
+}
+
+/**
+ * rvt_qp_acks_has_lkey - return true if acks have lkey
+ * @qp - the qp
+ * @lkey - the lkey
+ */
+static bool rvt_qp_acks_has_lkey(struct rvt_qp *qp, u32 lkey)
+{
+ int i;
+ struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+
+ for (i = 0; qp->s_ack_queue && i < rvt_max_atomic(rdi); i++) {
+ struct rvt_ack_entry *e = &qp->s_ack_queue[i];
+
+ if (rvt_mr_has_lkey(e->rdma_sge.mr, lkey))
+ return true;
+ }
+ return false;
+}
+
+/*
+ * rvt_qp_mr_clean - clean up remote ops for lkey
+ * @qp - the qp
+ * @lkey - the lkey that is being de-registered
+ *
+ * This routine checks if the lkey is being used by
+ * the qp.
+ *
+ * If so, the qp is put into an error state to elminate
+ * any references from the qp.
+ */
+void rvt_qp_mr_clean(struct rvt_qp *qp, u32 lkey)
+{
+ bool lastwqe = false;
+
+ if (qp->ibqp.qp_type == IB_QPT_SMI ||
+ qp->ibqp.qp_type == IB_QPT_GSI)
+ /* avoid special QPs */
+ return;
+ spin_lock_irq(&qp->r_lock);
+ spin_lock(&qp->s_hlock);
+ spin_lock(&qp->s_lock);
+
+ if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET)
+ goto check_lwqe;
+
+ if (rvt_ss_has_lkey(&qp->r_sge, lkey) ||
+ rvt_qp_sends_has_lkey(qp, lkey) ||
+ rvt_qp_acks_has_lkey(qp, lkey))
+ lastwqe = rvt_error_qp(qp, IB_WC_LOC_PROT_ERR);
+check_lwqe:
+ spin_unlock(&qp->s_lock);
+ spin_unlock(&qp->s_hlock);
+ spin_unlock_irq(&qp->r_lock);
+ if (lastwqe) {
+ struct ib_event ev;
+
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+ }
+}
+
+/**
* rvt_remove_qp - remove qp form table
* @rdi: rvt dev struct
* @qp: qp to remove
@@ -645,6 +737,19 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
lockdep_assert_held(&qp->s_lock);
}
+/** rvt_free_qpn - Free a qpn from the bit map
+ * @qpt: QP table
+ * @qpn: queue pair number to free
+ */
+static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
+{
+ struct rvt_qpn_map *map;
+
+ map = qpt->map + (qpn & RVT_QPN_MASK) / RVT_BITS_PER_PAGE;
+ if (map->page)
+ clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page);
+}
+
/**
* rvt_create_qp - create a queue pair for a device
* @ibpd: the protection domain who's device we create the queue pair for
@@ -914,7 +1019,7 @@ bail_ip:
kref_put(&qp->ip->ref, rvt_release_mmap_info);
bail_qpn:
- free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
+ rvt_free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
bail_rq_wq:
if (!qp->ip)
@@ -1062,6 +1167,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int mig = 0;
int pmtu = 0; /* for gcc warning only */
enum rdma_link_layer link;
+ int opa_ah;
link = rdma_port_get_link_layer(ibqp->device, qp->port_num);
@@ -1072,6 +1178,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
cur_state = attr_mask & IB_QP_CUR_STATE ?
attr->cur_qp_state : qp->state;
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
+ opa_ah = rdma_cap_opa_ah(ibqp->device, qp->port_num);
if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
attr_mask, link))
@@ -1082,17 +1189,31 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto inval;
if (attr_mask & IB_QP_AV) {
- if (rdma_ah_get_dlid(&attr->ah_attr) >=
- be16_to_cpu(IB_MULTICAST_LID_BASE))
- goto inval;
+ if (opa_ah) {
+ if (rdma_ah_get_dlid(&attr->ah_attr) >=
+ opa_get_mcast_base(OPA_MCAST_NR))
+ goto inval;
+ } else {
+ if (rdma_ah_get_dlid(&attr->ah_attr) >=
+ be16_to_cpu(IB_MULTICAST_LID_BASE))
+ goto inval;
+ }
+
if (rvt_check_ah(qp->ibqp.device, &attr->ah_attr))
goto inval;
}
if (attr_mask & IB_QP_ALT_PATH) {
- if (rdma_ah_get_dlid(&attr->alt_ah_attr) >=
- be16_to_cpu(IB_MULTICAST_LID_BASE))
- goto inval;
+ if (opa_ah) {
+ if (rdma_ah_get_dlid(&attr->alt_ah_attr) >=
+ opa_get_mcast_base(OPA_MCAST_NR))
+ goto inval;
+ } else {
+ if (rdma_ah_get_dlid(&attr->alt_ah_attr) >=
+ be16_to_cpu(IB_MULTICAST_LID_BASE))
+ goto inval;
+ }
+
if (rvt_check_ah(qp->ibqp.device, &attr->alt_ah_attr))
goto inval;
if (attr->alt_pkey_index >= rvt_get_npkeys(rdi))
@@ -1239,7 +1360,6 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (attr_mask & IB_QP_PATH_MTU) {
qp->pmtu = rdi->driver_f.mtu_from_qp(rdi, qp, pmtu);
- qp->path_mtu = rdi->driver_f.mtu_to_path_mtu(qp->pmtu);
qp->log_pmtu = ilog2(qp->pmtu);
}
@@ -1301,19 +1421,6 @@ inval:
return -EINVAL;
}
-/** rvt_free_qpn - Free a qpn from the bit map
- * @qpt: QP table
- * @qpn: queue pair number to free
- */
-static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
-{
- struct rvt_qpn_map *map;
-
- map = qpt->map + qpn / RVT_BITS_PER_PAGE;
- if (map->page)
- clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page);
-}
-
/**
* rvt_destroy_qp - destroy a queue pair
* @ibqp: the queue pair to destroy
@@ -1375,7 +1482,7 @@ int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
attr->qp_state = qp->state;
attr->cur_qp_state = attr->qp_state;
- attr->path_mtu = qp->path_mtu;
+ attr->path_mtu = rdi->driver_f.mtu_to_path_mtu(qp->pmtu);
attr->path_mig_state = qp->s_mig_state;
attr->qkey = qp->qkey;
attr->rq_psn = qp->r_psn & rdi->dparms.psn_mask;
@@ -1695,22 +1802,23 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
wqe->length = 0;
j = 0;
if (wr->num_sge) {
+ struct rvt_sge *last_sge = NULL;
+
acc = wr->opcode >= IB_WR_RDMA_READ ?
IB_ACCESS_LOCAL_WRITE : 0;
for (i = 0; i < wr->num_sge; i++) {
u32 length = wr->sg_list[i].length;
- int ok;
if (length == 0)
continue;
- ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j],
- &wr->sg_list[i], acc);
- if (!ok) {
- ret = -EINVAL;
+ ret = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], last_sge,
+ &wr->sg_list[i], acc);
+ if (unlikely(ret < 0))
goto bail_inval_free;
- }
wqe->length += length;
- j++;
+ if (ret)
+ last_sge = &wqe->sg_list[j];
+ j += ret;
}
wqe->wr.num_sge = j;
}
@@ -1757,7 +1865,7 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
wqe->wr.send_flags &= ~RVT_SEND_RESERVE_USED;
qp->s_avail--;
}
- trace_rvt_post_one_wr(qp, wqe);
+ trace_rvt_post_one_wr(qp, wqe, wr->num_sge);
smp_wmb(); /* see request builders */
qp->s_head = next;
@@ -2065,3 +2173,147 @@ enum hrtimer_restart rvt_rc_rnr_retry(struct hrtimer *t)
return HRTIMER_NORESTART;
}
EXPORT_SYMBOL(rvt_rc_rnr_retry);
+
+/**
+ * rvt_qp_iter_init - initial for QP iteration
+ * @rdi - rvt devinfo
+ * @v - u64 value
+ *
+ * This returns an iterator suitable for iterating QPs
+ * in the system.
+ *
+ * The @cb is a user defined callback and @v is a 64
+ * bit value passed to and relevant for processing in the
+ * @cb. An example use case would be to alter QP processing
+ * based on criteria not part of the rvt_qp.
+ *
+ * Use cases that require memory allocation to succeed
+ * must preallocate appropriately.
+ *
+ * Return: a pointer to an rvt_qp_iter or NULL
+ */
+struct rvt_qp_iter *rvt_qp_iter_init(struct rvt_dev_info *rdi,
+ u64 v,
+ void (*cb)(struct rvt_qp *qp, u64 v))
+{
+ struct rvt_qp_iter *i;
+
+ i = kzalloc(sizeof(*i), GFP_KERNEL);
+ if (!i)
+ return NULL;
+
+ i->rdi = rdi;
+ /* number of special QPs (SMI/GSI) for device */
+ i->specials = rdi->ibdev.phys_port_cnt * 2;
+ i->v = v;
+ i->cb = cb;
+
+ return i;
+}
+EXPORT_SYMBOL(rvt_qp_iter_init);
+
+/**
+ * rvt_qp_iter_next - return the next QP in iter
+ * @iter - the iterator
+ *
+ * Fine grained QP iterator suitable for use
+ * with debugfs seq_file mechanisms.
+ *
+ * Updates iter->qp with the current QP when the return
+ * value is 0.
+ *
+ * Return: 0 - iter->qp is valid 1 - no more QPs
+ */
+int rvt_qp_iter_next(struct rvt_qp_iter *iter)
+ __must_hold(RCU)
+{
+ int n = iter->n;
+ int ret = 1;
+ struct rvt_qp *pqp = iter->qp;
+ struct rvt_qp *qp;
+ struct rvt_dev_info *rdi = iter->rdi;
+
+ /*
+ * The approach is to consider the special qps
+ * as additional table entries before the
+ * real hash table. Since the qp code sets
+ * the qp->next hash link to NULL, this works just fine.
+ *
+ * iter->specials is 2 * # ports
+ *
+ * n = 0..iter->specials is the special qp indices
+ *
+ * n = iter->specials..rdi->qp_dev->qp_table_size+iter->specials are
+ * the potential hash bucket entries
+ *
+ */
+ for (; n < rdi->qp_dev->qp_table_size + iter->specials; n++) {
+ if (pqp) {
+ qp = rcu_dereference(pqp->next);
+ } else {
+ if (n < iter->specials) {
+ struct rvt_ibport *rvp;
+ int pidx;
+
+ pidx = n % rdi->ibdev.phys_port_cnt;
+ rvp = rdi->ports[pidx];
+ qp = rcu_dereference(rvp->qp[n & 1]);
+ } else {
+ qp = rcu_dereference(
+ rdi->qp_dev->qp_table[
+ (n - iter->specials)]);
+ }
+ }
+ pqp = qp;
+ if (qp) {
+ iter->qp = qp;
+ iter->n = n;
+ return 0;
+ }
+ }
+ return ret;
+}
+EXPORT_SYMBOL(rvt_qp_iter_next);
+
+/**
+ * rvt_qp_iter - iterate all QPs
+ * @rdi - rvt devinfo
+ * @v - a 64 bit value
+ * @cb - a callback
+ *
+ * This provides a way for iterating all QPs.
+ *
+ * The @cb is a user defined callback and @v is a 64
+ * bit value passed to and relevant for processing in the
+ * cb. An example use case would be to alter QP processing
+ * based on criteria not part of the rvt_qp.
+ *
+ * The code has an internal iterator to simplify
+ * non seq_file use cases.
+ */
+void rvt_qp_iter(struct rvt_dev_info *rdi,
+ u64 v,
+ void (*cb)(struct rvt_qp *qp, u64 v))
+{
+ int ret;
+ struct rvt_qp_iter i = {
+ .rdi = rdi,
+ .specials = rdi->ibdev.phys_port_cnt * 2,
+ .v = v,
+ .cb = cb
+ };
+
+ rcu_read_lock();
+ do {
+ ret = rvt_qp_iter_next(&i);
+ if (!ret) {
+ rvt_get_qp(i.qp);
+ rcu_read_unlock();
+ i.cb(i.qp, i.v);
+ rcu_read_lock();
+ rvt_put_qp(i.qp);
+ }
+ } while (!ret);
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL(rvt_qp_iter);
diff --git a/drivers/infiniband/sw/rdmavt/trace_mr.h b/drivers/infiniband/sw/rdmavt/trace_mr.h
index 3318a6c..976e482 100644
--- a/drivers/infiniband/sw/rdmavt/trace_mr.h
+++ b/drivers/infiniband/sw/rdmavt/trace_mr.h
@@ -103,6 +103,68 @@ DEFINE_EVENT(
TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len),
TP_ARGS(mr, m, n, v, len));
+DECLARE_EVENT_CLASS(
+ rvt_sge_template,
+ TP_PROTO(struct rvt_sge *sge, struct ib_sge *isge),
+ TP_ARGS(sge, isge),
+ TP_STRUCT__entry(
+ RDI_DEV_ENTRY(ib_to_rvt(sge->mr->pd->device))
+ __field(struct rvt_mregion *, mr)
+ __field(struct rvt_sge *, sge)
+ __field(struct ib_sge *, isge)
+ __field(void *, vaddr)
+ __field(u64, ivaddr)
+ __field(u32, lkey)
+ __field(u32, sge_length)
+ __field(u32, length)
+ __field(u32, ilength)
+ __field(int, user)
+ __field(u16, m)
+ __field(u16, n)
+ ),
+ TP_fast_assign(
+ RDI_DEV_ASSIGN(ib_to_rvt(sge->mr->pd->device));
+ __entry->mr = sge->mr;
+ __entry->sge = sge;
+ __entry->isge = isge;
+ __entry->vaddr = sge->vaddr;
+ __entry->ivaddr = isge->addr;
+ __entry->lkey = sge->mr->lkey;
+ __entry->sge_length = sge->sge_length;
+ __entry->length = sge->length;
+ __entry->ilength = isge->length;
+ __entry->m = sge->m;
+ __entry->n = sge->m;
+ __entry->user = ibpd_to_rvtpd(sge->mr->pd)->user;
+ ),
+ TP_printk(
+ "[%s] mr %p sge %p isge %p vaddr %p ivaddr %llx lkey %x sge_length %u length %u ilength %u m %u n %u user %u",
+ __get_str(dev),
+ __entry->mr,
+ __entry->sge,
+ __entry->isge,
+ __entry->vaddr,
+ __entry->ivaddr,
+ __entry->lkey,
+ __entry->sge_length,
+ __entry->length,
+ __entry->ilength,
+ __entry->m,
+ __entry->n,
+ __entry->user
+ )
+);
+
+DEFINE_EVENT(
+ rvt_sge_template, rvt_sge_adjacent,
+ TP_PROTO(struct rvt_sge *sge, struct ib_sge *isge),
+ TP_ARGS(sge, isge));
+
+DEFINE_EVENT(
+ rvt_sge_template, rvt_sge_new,
+ TP_PROTO(struct rvt_sge *sge, struct ib_sge *isge),
+ TP_ARGS(sge, isge));
+
#endif /* __RVT_TRACE_MR_H */
#undef TRACE_INCLUDE_PATH
diff --git a/drivers/infiniband/sw/rdmavt/trace_tx.h b/drivers/infiniband/sw/rdmavt/trace_tx.h
index a613a22..0ef25fc 100644
--- a/drivers/infiniband/sw/rdmavt/trace_tx.h
+++ b/drivers/infiniband/sw/rdmavt/trace_tx.h
@@ -84,12 +84,12 @@ __print_symbolic(opcode, \
wr_opcode_name(RESERVED10))
#define POS_PRN \
-"[%s] wqe %p wr_id %llx send_flags %x qpn %x qpt %u psn %x lpsn %x ssn %x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u pid %u num_sge %u"
+"[%s] wqe %p wr_id %llx send_flags %x qpn %x qpt %u psn %x lpsn %x ssn %x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u pid %u num_sge %u wr_num_sge %u"
TRACE_EVENT(
rvt_post_one_wr,
- TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe),
- TP_ARGS(qp, wqe),
+ TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe, int wr_num_sge),
+ TP_ARGS(qp, wqe, wr_num_sge),
TP_STRUCT__entry(
RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
__field(u64, wr_id)
@@ -108,6 +108,7 @@ TRACE_EVENT(
__field(int, send_flags)
__field(pid_t, pid)
__field(int, num_sge)
+ __field(int, wr_num_sge)
),
TP_fast_assign(
RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
@@ -127,6 +128,7 @@ TRACE_EVENT(
__entry->ssn = wqe->ssn;
__entry->send_flags = wqe->wr.send_flags;
__entry->num_sge = wqe->wr.num_sge;
+ __entry->wr_num_sge = wr_num_sge;
),
TP_printk(
POS_PRN,
@@ -146,7 +148,8 @@ TRACE_EVENT(
__entry->head,
__entry->last,
__entry->pid,
- __entry->num_sge
+ __entry->num_sge,
+ __entry->wr_num_sge
)
);
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index 0d7c6bb..64bdd44 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -202,8 +202,13 @@ static int rvt_modify_port(struct ib_device *ibdev, u8 port_num,
return -EINVAL;
rvp = rdi->ports[port_index];
- rvp->port_cap_flags |= props->set_port_cap_mask;
- rvp->port_cap_flags &= ~props->clr_port_cap_mask;
+ if (port_modify_mask & IB_PORT_OPA_MASK_CHG) {
+ rvp->port_cap3_flags |= props->set_port_cap_mask;
+ rvp->port_cap3_flags &= ~props->clr_port_cap_mask;
+ } else {
+ rvp->port_cap_flags |= props->set_port_cap_mask;
+ rvp->port_cap_flags &= ~props->clr_port_cap_mask;
+ }
if (props->set_port_cap_mask || props->clr_port_cap_mask)
rdi->driver_f.cap_mask_chg(rdi, port_num);
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index c21c913..8c3d30b 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -38,7 +38,6 @@
MODULE_AUTHOR("Bob Pearson, Frank Zago, John Groves, Kamal Heib");
MODULE_DESCRIPTION("Soft RDMA transport");
MODULE_LICENSE("Dual BSD/GPL");
-MODULE_VERSION("0.2");
/* free resources for all ports on a device */
static void rxe_cleanup_ports(struct rxe_dev *rxe)
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index 1ac5b85..6447d73 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -97,7 +97,7 @@ int rxe_rcv(struct sk_buff *skb);
void rxe_dev_put(struct rxe_dev *rxe);
struct rxe_dev *net_to_rxe(struct net_device *ndev);
-struct rxe_dev *get_rxe_by_name(const char* name);
+struct rxe_dev *get_rxe_by_name(const char *name);
void rxe_port_up(struct rxe_dev *rxe);
void rxe_port_down(struct rxe_dev *rxe);
diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c
index 5bddf46..1cc9e2e 100644
--- a/drivers/infiniband/sw/rxe/rxe_av.c
+++ b/drivers/infiniband/sw/rxe/rxe_av.c
@@ -38,18 +38,13 @@ int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr)
{
struct rxe_port *port;
- if (rdma_ah_get_port_num(attr) != 1) {
- pr_info("invalid port_num = %d\n", rdma_ah_get_port_num(attr));
- return -EINVAL;
- }
-
port = &rxe->port;
if (rdma_ah_get_ah_flags(attr) & IB_AH_GRH) {
u8 sgid_index = rdma_ah_read_grh(attr)->sgid_index;
if (sgid_index > port->attr.gid_tbl_len) {
- pr_info("invalid sgid index = %d\n", sgid_index);
+ pr_warn("invalid sgid index = %d\n", sgid_index);
return -EINVAL;
}
}
diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
index 49fe42c..c4aabf7 100644
--- a/drivers/infiniband/sw/rxe/rxe_cq.c
+++ b/drivers/infiniband/sw/rxe/rxe_cq.c
@@ -69,6 +69,14 @@ err1:
static void rxe_send_complete(unsigned long data)
{
struct rxe_cq *cq = (struct rxe_cq *)data;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cq->cq_lock, flags);
+ if (cq->is_dying) {
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+ return;
+ }
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
}
@@ -97,6 +105,8 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
if (udata)
cq->is_user = 1;
+ cq->is_dying = false;
+
tasklet_init(&cq->comp_task, rxe_send_complete, (unsigned long)cq);
spin_lock_init(&cq->cq_lock);
@@ -156,6 +166,15 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
return 0;
}
+void rxe_cq_disable(struct rxe_cq *cq)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cq->cq_lock, flags);
+ cq->is_dying = true;
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+}
+
void rxe_cq_cleanup(struct rxe_pool_entry *arg)
{
struct rxe_cq *cq = container_of(arg, typeof(*cq), pelem);
diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.c b/drivers/infiniband/sw/rxe/rxe_hw_counters.c
index 7ef90aa..6aeb7a1 100644
--- a/drivers/infiniband/sw/rxe/rxe_hw_counters.c
+++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.c
@@ -33,7 +33,7 @@
#include "rxe.h"
#include "rxe_hw_counters.h"
-const char * const rxe_counter_name[] = {
+static const char * const rxe_counter_name[] = {
[RXE_CNT_SENT_PKTS] = "sent_pkts",
[RXE_CNT_RCVD_PKTS] = "rcvd_pkts",
[RXE_CNT_DUP_REQ] = "duplicate_request",
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index d6299ed..77b3ed0 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -64,6 +64,8 @@ int rxe_cq_resize_queue(struct rxe_cq *cq, int new_cqe, struct ib_udata *udata);
int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited);
+void rxe_cq_disable(struct rxe_cq *cq);
+
void rxe_cq_cleanup(struct rxe_pool_entry *arg);
/* rxe_mcast.c */
@@ -219,8 +221,6 @@ static inline void rxe_advance_resp_resource(struct rxe_qp *qp)
void retransmit_timer(unsigned long data);
void rnr_nak_timer(unsigned long data);
-void dump_qp(struct rxe_qp *qp);
-
/* rxe_srq.c */
#define IB_SRQ_INIT_MASK (~IB_SRQ_LIMIT)
@@ -250,7 +250,7 @@ void rxe_resp_queue_pkt(struct rxe_dev *rxe,
void rxe_comp_queue_pkt(struct rxe_dev *rxe,
struct rxe_qp *qp, struct sk_buff *skb);
-static inline unsigned wr_opcode_mask(int opcode, struct rxe_qp *qp)
+static inline unsigned int wr_opcode_mask(int opcode, struct rxe_qp *qp)
{
return rxe_wr_opcode_info[opcode].mask[qp->ibqp.qp_type];
}
diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c
index bd812e0..d22431e 100644
--- a/drivers/infiniband/sw/rxe/rxe_mmap.c
+++ b/drivers/infiniband/sw/rxe/rxe_mmap.c
@@ -76,7 +76,7 @@ static void rxe_vma_close(struct vm_area_struct *vma)
kref_put(&ip->ref, rxe_mmap_release);
}
-static struct vm_operations_struct rxe_vm_ops = {
+static const struct vm_operations_struct rxe_vm_ops = {
.open = rxe_vma_open,
.close = rxe_vma_close,
};
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index e37cc89..5c2684b 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -367,11 +367,11 @@ int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length,
dest = (dir == to_mem_obj) ?
((void *)(uintptr_t)iova) : addr;
+ memcpy(dest, src, length);
+
if (crcp)
*crcp = rxe_crc32(to_rdev(mem->pd->ibpd.device),
- *crcp, src, length);
-
- memcpy(dest, src, length);
+ *crcp, dest, length);
return 0;
}
@@ -401,11 +401,11 @@ int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length,
if (bytes > length)
bytes = length;
+ memcpy(dest, src, bytes);
+
if (crcp)
crc = rxe_crc32(to_rdev(mem->pd->ibpd.device),
- crc, src, bytes);
-
- memcpy(dest, src, bytes);
+ crc, dest, bytes);
length -= bytes;
addr += bytes;
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 08f3f90..59dee10 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -191,7 +191,7 @@ static struct dst_entry *rxe_find_route(struct rxe_dev *rxe,
if (qp_type(qp) == IB_QPT_RC)
dst = sk_dst_get(qp->sk->sk);
- if (!dst || !(dst->obsolete && dst->ops->check(dst, 0))) {
+ if (!dst || !dst_check(dst, qp->dst_cookie)) {
if (dst)
dst_release(dst);
@@ -209,6 +209,11 @@ static struct dst_entry *rxe_find_route(struct rxe_dev *rxe,
saddr6 = &av->sgid_addr._sockaddr_in6.sin6_addr;
daddr6 = &av->dgid_addr._sockaddr_in6.sin6_addr;
dst = rxe_find_route6(rxe->ndev, saddr6, daddr6);
+#if IS_ENABLED(CONFIG_IPV6)
+ if (dst)
+ qp->dst_cookie =
+ rt6_get_cookie((struct rt6_info *)dst);
+#endif
}
}
@@ -337,7 +342,7 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb,
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED
| IPSKB_REROUTED);
- skb_dst_set(skb, dst);
+ skb_dst_set(skb, dst_clone(dst));
__skb_push(skb, sizeof(*ip6h));
skb_reset_network_header(skb);
@@ -388,7 +393,7 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
struct sk_buff *skb, struct rxe_av *av)
{
struct rxe_qp *qp = pkt->qp;
- struct dst_entry *dst = NULL;
+ struct dst_entry *dst;
struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr;
struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr;
@@ -460,12 +465,17 @@ int rxe_send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct sk_buff *skb)
nskb->destructor = rxe_skb_tx_dtor;
nskb->sk = pkt->qp->sk->sk;
+ rxe_add_ref(pkt->qp);
+ atomic_inc(&pkt->qp->skb_out);
+
if (av->network_type == RDMA_NETWORK_IPV4) {
err = ip_local_out(dev_net(skb_dst(skb)->dev), nskb->sk, nskb);
} else if (av->network_type == RDMA_NETWORK_IPV6) {
err = ip6_local_out(dev_net(skb_dst(skb)->dev), nskb->sk, nskb);
} else {
pr_err("Unknown layer 3 protocol: %d\n", av->network_type);
+ atomic_dec(&pkt->qp->skb_out);
+ rxe_drop_ref(pkt->qp);
kfree_skb(nskb);
return -EINVAL;
}
@@ -475,10 +485,7 @@ int rxe_send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct sk_buff *skb)
return -EAGAIN;
}
- rxe_add_ref(pkt->qp);
- atomic_inc(&pkt->qp->skb_out);
kfree_skb(skb);
-
return 0;
}
@@ -644,8 +651,13 @@ static int rxe_notify(struct notifier_block *not_blk,
pr_info("%s changed mtu to %d\n", ndev->name, ndev->mtu);
rxe_set_mtu(rxe, ndev->mtu);
break;
- case NETDEV_REBOOT:
case NETDEV_CHANGE:
+ if (netif_running(ndev) && netif_carrier_ok(ndev))
+ rxe_port_up(rxe);
+ else
+ rxe_port_down(rxe);
+ break;
+ case NETDEV_REBOOT:
case NETDEV_GOING_DOWN:
case NETDEV_CHANGEADDR:
case NETDEV_CHANGENAME:
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index 75d11ee..c1b5f38 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -188,7 +188,7 @@ int rxe_pool_init(
struct rxe_dev *rxe,
struct rxe_pool *pool,
enum rxe_elem_type type,
- unsigned max_elem)
+ unsigned int max_elem)
{
int err = 0;
size_t size = rxe_type_info[type].size;
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index 80ccc7c..00bda93 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -851,13 +851,8 @@ void rxe_qp_cleanup(struct rxe_pool_entry *arg)
qp->resp.mr = NULL;
}
- if (qp_type(qp) == IB_QPT_RC) {
- struct dst_entry *dst = NULL;
-
- dst = sk_dst_get(qp->sk->sk);
- if (dst)
- dst_release(dst);
- }
+ if (qp_type(qp) == IB_QPT_RC)
+ sk_dst_reset(qp->sk->sk);
free_rd_atomic_resources(qp);
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 7ee465d..d84222f 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -43,7 +43,7 @@ static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
static inline void retry_first_write_send(struct rxe_qp *qp,
struct rxe_send_wqe *wqe,
- unsigned mask, int npsn)
+ unsigned int mask, int npsn)
{
int i;
@@ -594,8 +594,10 @@ int rxe_requester(void *arg)
rxe_add_ref(qp);
next_wqe:
- if (unlikely(!qp->valid))
+ if (unlikely(!qp->valid)) {
+ rxe_drain_req_pkts(qp, true);
goto exit;
+ }
if (unlikely(qp->req.state == QP_STATE_ERROR)) {
rxe_drain_req_pkts(qp, true);
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index a958ee9..4240866 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -1055,7 +1055,7 @@ static struct resp_res *find_resource(struct rxe_qp *qp, u32 psn)
{
int i;
- for (i = 0; i < qp->attr.max_rd_atomic; i++) {
+ for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) {
struct resp_res *res = &qp->resp.resources[i];
if (res->type == 0)
diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c
index d2a14a1..ea3810b 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.c
+++ b/drivers/infiniband/sw/rxe/rxe_task.c
@@ -78,7 +78,7 @@ void rxe_do_task(unsigned long data)
default:
spin_unlock_irqrestore(&task->state_lock, flags);
- pr_warn("bad state = %d in rxe_do_task\n", task->state);
+ pr_warn("%s failed with bad state %d\n", __func__, task->state);
return;
}
@@ -105,7 +105,7 @@ void rxe_do_task(unsigned long data)
break;
default:
- pr_warn("bad state = %d in rxe_do_task\n",
+ pr_warn("%s failed with bad state %d\n", __func__,
task->state);
}
spin_unlock_irqrestore(&task->state_lock, flags);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index af90a7d..0b362f4 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -51,40 +51,16 @@ static int rxe_query_device(struct ib_device *dev,
return 0;
}
-static void rxe_eth_speed_to_ib_speed(int speed, u8 *active_speed,
- u8 *active_width)
-{
- if (speed <= 1000) {
- *active_width = IB_WIDTH_1X;
- *active_speed = IB_SPEED_SDR;
- } else if (speed <= 10000) {
- *active_width = IB_WIDTH_1X;
- *active_speed = IB_SPEED_FDR10;
- } else if (speed <= 20000) {
- *active_width = IB_WIDTH_4X;
- *active_speed = IB_SPEED_DDR;
- } else if (speed <= 30000) {
- *active_width = IB_WIDTH_4X;
- *active_speed = IB_SPEED_QDR;
- } else if (speed <= 40000) {
- *active_width = IB_WIDTH_4X;
- *active_speed = IB_SPEED_FDR10;
- } else {
- *active_width = IB_WIDTH_4X;
- *active_speed = IB_SPEED_EDR;
- }
-}
-
static int rxe_query_port(struct ib_device *dev,
u8 port_num, struct ib_port_attr *attr)
{
struct rxe_dev *rxe = to_rdev(dev);
struct rxe_port *port;
- u32 speed;
+ int rc = -EINVAL;
if (unlikely(port_num != 1)) {
pr_warn("invalid port_number %d\n", port_num);
- goto err1;
+ goto out;
}
port = &rxe->port;
@@ -93,29 +69,12 @@ static int rxe_query_port(struct ib_device *dev,
*attr = port->attr;
mutex_lock(&rxe->usdev_lock);
- if (rxe->ndev->ethtool_ops->get_link_ksettings) {
- struct ethtool_link_ksettings ks;
-
- rxe->ndev->ethtool_ops->get_link_ksettings(rxe->ndev, &ks);
- speed = ks.base.speed;
- } else if (rxe->ndev->ethtool_ops->get_settings) {
- struct ethtool_cmd cmd;
-
- rxe->ndev->ethtool_ops->get_settings(rxe->ndev, &cmd);
- speed = cmd.speed;
- } else {
- pr_warn("%s speed is unknown, defaulting to 1000\n",
- rxe->ndev->name);
- speed = 1000;
- }
- rxe_eth_speed_to_ib_speed(speed, &attr->active_speed,
- &attr->active_width);
+ rc = ib_get_eth_speed(dev, port_num, &attr->active_speed,
+ &attr->active_width);
mutex_unlock(&rxe->usdev_lock);
- return 0;
-
-err1:
- return -EINVAL;
+out:
+ return rc;
}
static int rxe_query_gid(struct ib_device *device,
@@ -960,6 +919,8 @@ static int rxe_destroy_cq(struct ib_cq *ibcq)
{
struct rxe_cq *cq = to_rcq(ibcq);
+ rxe_cq_disable(cq);
+
rxe_drop_ref(cq);
return 0;
}
@@ -1210,8 +1171,8 @@ static int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
return rxe_mcast_drop_grp_elem(rxe, qp, mgid);
}
-static ssize_t rxe_show_parent(struct device *device,
- struct device_attribute *attr, char *buf)
+static ssize_t parent_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct rxe_dev *rxe = container_of(device, struct rxe_dev,
ib_dev.dev);
@@ -1219,7 +1180,7 @@ static ssize_t rxe_show_parent(struct device *device,
return snprintf(buf, 16, "%s\n", rxe_parent_name(rxe, 1));
}
-static DEVICE_ATTR(parent, S_IRUGO, rxe_show_parent, NULL);
+static DEVICE_ATTR_RO(parent);
static struct device_attribute *rxe_dev_attributes[] = {
&dev_attr_parent,
@@ -1336,15 +1297,15 @@ int rxe_register_device(struct rxe_dev *rxe)
err = ib_register_device(dev, NULL);
if (err) {
- pr_warn("rxe_register_device failed, err = %d\n", err);
+ pr_warn("%s failed with error %d\n", __func__, err);
goto err1;
}
for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i) {
err = device_create_file(&dev->dev, rxe_dev_attributes[i]);
if (err) {
- pr_warn("device_create_file failed, i = %d, err = %d\n",
- i, err);
+ pr_warn("%s failed with error %d for attr number %d\n",
+ __func__, err, i);
goto err2;
}
}
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 5a180fb..0c2dbe4 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -89,6 +89,7 @@ struct rxe_cq {
struct rxe_queue *queue;
spinlock_t cq_lock;
u8 notify;
+ bool is_dying;
int is_user;
struct tasklet_struct comp_task;
};
@@ -247,6 +248,7 @@ struct rxe_qp {
struct rxe_rq rq;
struct socket *sk;
+ u32 dst_cookie;
struct rxe_av pri_av;
struct rxe_av alt_av;
OpenPOWER on IntegriCloud