summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/mlx4/srq.c
diff options
context:
space:
mode:
authorYishai Hadas <yishaih@mellanox.com>2015-02-08 11:49:34 +0200
committerDavid S. Miller <davem@davemloft.net>2015-02-09 14:03:53 -0800
commit35f05dabf95ac3ebc4c15bafd6833f7a3046e66f (patch)
treea39bb7c432f4e36467e61b316050e41ecd408b1f /drivers/infiniband/hw/mlx4/srq.c
parent824c25c1abe70a527646056f6911d181facde9cc (diff)
downloadop-kernel-dev-35f05dabf95ac3ebc4c15bafd6833f7a3046e66f.zip
op-kernel-dev-35f05dabf95ac3ebc4c15bafd6833f7a3046e66f.tar.gz
IB/mlx4: Reset flow support for IB kernel ULPs
The driver exposes interfaces that directly relate to HW state. Upon fatal error, consumers of these interfaces (ULPs) that rely on completion of all their posted work-request could hang, thereby introducing dependencies in shutdown order. To prevent this from happening, we manage the relevant resources (CQs, QPs) that are used by the device. Upon a fatal error, we now generate simulated completions for outstanding WQEs that were not completed at the time the HW was reset. It includes invoking the completion event handler for all involved CQs so that the ULPs will poll those CQs. When polled we return simulated CQEs with IB_WC_WR_FLUSH_ERR return code enabling ULPs to clean up their resources and not wait forever for completions upon receiving remove_one. The above change requires an extra check in the data path to make sure that when device is in error state, the simulated CQEs will be returned and no further WQEs will be posted. Signed-off-by: Yishai Hadas <yishaih@mellanox.com> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/infiniband/hw/mlx4/srq.c')
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c8
1 files changed, 8 insertions, 0 deletions
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 62d9285..dce5dfe 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -316,8 +316,15 @@ int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
int err = 0;
int nreq;
int i;
+ struct mlx4_ib_dev *mdev = to_mdev(ibsrq->device);
spin_lock_irqsave(&srq->lock, flags);
+ if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+ err = -EIO;
+ *bad_wr = wr;
+ nreq = 0;
+ goto out;
+ }
for (nreq = 0; wr; ++nreq, wr = wr->next) {
if (unlikely(wr->num_sge > srq->msrq.max_gs)) {
@@ -362,6 +369,7 @@ int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
*srq->db.db = cpu_to_be32(srq->wqe_ctr);
}
+out:
spin_unlock_irqrestore(&srq->lock, flags);
OpenPOWER on IntegriCloud