summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorAlex Tabachnik <alext@mellanox.com>2012-09-23 15:17:44 +0000
committerRoland Dreier <roland@purestorage.com>2012-10-03 21:26:49 -0700
commit5a33a6694942bc86e487d00cd8feebeae5f14546 (patch)
treec12494add1644126fb8435e85bae72dd49269952 /drivers/infiniband
parent7a9a2970b5c1c2ce73d4bb84edaa7ebf13e0c841 (diff)
downloadop-kernel-dev-5a33a6694942bc86e487d00cd8feebeae5f14546.zip
op-kernel-dev-5a33a6694942bc86e487d00cd8feebeae5f14546.tar.gz
IB/iser: Add more RX CQs to scale out processing of SCSI responses
RX/TX CQs will now be selected from a per HCA pool. For the RX flow this has the effect of using different interrupt vectors when using low level drivers (such as mlx4) that map the "vector" param provided by the ULP on CQ creation to a dedicated IRQ/MSI-X vector. This allows the RX flow processing of IO responses to be distributed across multiple CPUs. QPs (--> iSER sessions) are assigned to CQs in round robin order using the CQ with the minimum number of sessions attached to it. Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com> Signed-off-by: Alex Tabachnik <alext@mellanox.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h17
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c130
2 files changed, 103 insertions, 44 deletions
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 296be43..ef7d3be 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -177,6 +177,7 @@ struct iser_data_buf {
/* fwd declarations */
struct iser_device;
+struct iser_cq_desc;
struct iscsi_iser_conn;
struct iscsi_iser_task;
struct iscsi_endpoint;
@@ -226,16 +227,21 @@ struct iser_rx_desc {
char pad[ISER_RX_PAD_SIZE];
} __attribute__((packed));
+#define ISER_MAX_CQ 4
+
struct iser_device {
struct ib_device *ib_device;
struct ib_pd *pd;
- struct ib_cq *rx_cq;
- struct ib_cq *tx_cq;
+ struct ib_cq *rx_cq[ISER_MAX_CQ];
+ struct ib_cq *tx_cq[ISER_MAX_CQ];
struct ib_mr *mr;
- struct tasklet_struct cq_tasklet;
+ struct tasklet_struct cq_tasklet[ISER_MAX_CQ];
struct ib_event_handler event_handler;
struct list_head ig_list; /* entry in ig devices list */
int refcount;
+ int cq_active_qps[ISER_MAX_CQ];
+ int cqs_used;
+ struct iser_cq_desc *cq_desc;
};
struct iser_conn {
@@ -287,6 +293,11 @@ struct iser_page_vec {
int data_size;
};
+struct iser_cq_desc {
+ struct iser_device *device;
+ int cq_index;
+};
+
struct iser_global {
struct mutex device_list_mutex;/* */
struct list_head device_list; /* all iSER devices */
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 2dddabd..95a49af 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -70,32 +70,50 @@ static void iser_event_handler(struct ib_event_handler *handler,
*/
static int iser_create_device_ib_res(struct iser_device *device)
{
+ int i, j;
+ struct iser_cq_desc *cq_desc;
+
+ device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors);
+ iser_err("using %d CQs, device %s supports %d vectors\n", device->cqs_used,
+ device->ib_device->name, device->ib_device->num_comp_vectors);
+
+ device->cq_desc = kmalloc(sizeof(struct iser_cq_desc) * device->cqs_used,
+ GFP_KERNEL);
+ if (device->cq_desc == NULL)
+ goto cq_desc_err;
+ cq_desc = device->cq_desc;
+
device->pd = ib_alloc_pd(device->ib_device);
if (IS_ERR(device->pd))
goto pd_err;
- device->rx_cq = ib_create_cq(device->ib_device,
- iser_cq_callback,
- iser_cq_event_callback,
- (void *)device,
- ISER_MAX_RX_CQ_LEN, 0);
- if (IS_ERR(device->rx_cq))
- goto rx_cq_err;
+ for (i = 0; i < device->cqs_used; i++) {
+ cq_desc[i].device = device;
+ cq_desc[i].cq_index = i;
+
+ device->rx_cq[i] = ib_create_cq(device->ib_device,
+ iser_cq_callback,
+ iser_cq_event_callback,
+ (void *)&cq_desc[i],
+ ISER_MAX_RX_CQ_LEN, i);
+ if (IS_ERR(device->rx_cq[i]))
+ goto cq_err;
- device->tx_cq = ib_create_cq(device->ib_device,
- NULL, iser_cq_event_callback,
- (void *)device,
- ISER_MAX_TX_CQ_LEN, 0);
+ device->tx_cq[i] = ib_create_cq(device->ib_device,
+ NULL, iser_cq_event_callback,
+ (void *)&cq_desc[i],
+ ISER_MAX_TX_CQ_LEN, i);
- if (IS_ERR(device->tx_cq))
- goto tx_cq_err;
+ if (IS_ERR(device->tx_cq[i]))
+ goto cq_err;
- if (ib_req_notify_cq(device->rx_cq, IB_CQ_NEXT_COMP))
- goto cq_arm_err;
+ if (ib_req_notify_cq(device->rx_cq[i], IB_CQ_NEXT_COMP))
+ goto cq_err;
- tasklet_init(&device->cq_tasklet,
- iser_cq_tasklet_fn,
- (unsigned long)device);
+ tasklet_init(&device->cq_tasklet[i],
+ iser_cq_tasklet_fn,
+ (unsigned long)&cq_desc[i]);
+ }
device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_WRITE |
@@ -113,14 +131,19 @@ static int iser_create_device_ib_res(struct iser_device *device)
handler_err:
ib_dereg_mr(device->mr);
dma_mr_err:
- tasklet_kill(&device->cq_tasklet);
-cq_arm_err:
- ib_destroy_cq(device->tx_cq);
-tx_cq_err:
- ib_destroy_cq(device->rx_cq);
-rx_cq_err:
+ for (j = 0; j < device->cqs_used; j++)
+ tasklet_kill(&device->cq_tasklet[j]);
+cq_err:
+ for (j = 0; j < i; j++) {
+ if (device->tx_cq[j])
+ ib_destroy_cq(device->tx_cq[j]);
+ if (device->rx_cq[j])
+ ib_destroy_cq(device->rx_cq[j]);
+ }
ib_dealloc_pd(device->pd);
pd_err:
+ kfree(device->cq_desc);
+cq_desc_err:
iser_err("failed to allocate an IB resource\n");
return -1;
}
@@ -131,18 +154,24 @@ pd_err:
*/
static void iser_free_device_ib_res(struct iser_device *device)
{
+ int i;
BUG_ON(device->mr == NULL);
- tasklet_kill(&device->cq_tasklet);
+ for (i = 0; i < device->cqs_used; i++) {
+ tasklet_kill(&device->cq_tasklet[i]);
+ (void)ib_destroy_cq(device->tx_cq[i]);
+ (void)ib_destroy_cq(device->rx_cq[i]);
+ device->tx_cq[i] = NULL;
+ device->rx_cq[i] = NULL;
+ }
+
(void)ib_unregister_event_handler(&device->event_handler);
(void)ib_dereg_mr(device->mr);
- (void)ib_destroy_cq(device->tx_cq);
- (void)ib_destroy_cq(device->rx_cq);
(void)ib_dealloc_pd(device->pd);
+ kfree(device->cq_desc);
+
device->mr = NULL;
- device->tx_cq = NULL;
- device->rx_cq = NULL;
device->pd = NULL;
}
@@ -157,6 +186,7 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
struct ib_qp_init_attr init_attr;
int req_err, resp_err, ret = -ENOMEM;
struct ib_fmr_pool_param params;
+ int index, min_index = 0;
BUG_ON(ib_conn->device == NULL);
@@ -220,10 +250,20 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
memset(&init_attr, 0, sizeof init_attr);
+ mutex_lock(&ig.connlist_mutex);
+ /* select the CQ with the minimal number of usages */
+ for (index = 0; index < device->cqs_used; index++)
+ if (device->cq_active_qps[index] <
+ device->cq_active_qps[min_index])
+ min_index = index;
+ device->cq_active_qps[min_index]++;
+ mutex_unlock(&ig.connlist_mutex);
+ iser_err("cq index %d used for ib_conn %p\n", min_index, ib_conn);
+
init_attr.event_handler = iser_qp_event_callback;
init_attr.qp_context = (void *)ib_conn;
- init_attr.send_cq = device->tx_cq;
- init_attr.recv_cq = device->rx_cq;
+ init_attr.send_cq = device->tx_cq[min_index];
+ init_attr.recv_cq = device->rx_cq[min_index];
init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS;
init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS;
init_attr.cap.max_send_sge = 2;
@@ -252,6 +292,7 @@ out_err:
*/
static int iser_free_ib_conn_res(struct iser_conn *ib_conn, int can_destroy_id)
{
+ int cq_index;
BUG_ON(ib_conn == NULL);
iser_err("freeing conn %p cma_id %p fmr pool %p qp %p\n",
@@ -262,9 +303,12 @@ static int iser_free_ib_conn_res(struct iser_conn *ib_conn, int can_destroy_id)
if (ib_conn->fmr_pool != NULL)
ib_destroy_fmr_pool(ib_conn->fmr_pool);
- if (ib_conn->qp != NULL)
- rdma_destroy_qp(ib_conn->cma_id);
+ if (ib_conn->qp != NULL) {
+ cq_index = ((struct iser_cq_desc *)ib_conn->qp->recv_cq->cq_context)->cq_index;
+ ib_conn->device->cq_active_qps[cq_index]--;
+ rdma_destroy_qp(ib_conn->cma_id);
+ }
/* if cma handler context, the caller acts s.t the cma destroy the id */
if (ib_conn->cma_id != NULL && can_destroy_id)
rdma_destroy_id(ib_conn->cma_id);
@@ -791,9 +835,9 @@ static void iser_handle_comp_error(struct iser_tx_desc *desc,
}
}
-static int iser_drain_tx_cq(struct iser_device *device)
+static int iser_drain_tx_cq(struct iser_device *device, int cq_index)
{
- struct ib_cq *cq = device->tx_cq;
+ struct ib_cq *cq = device->tx_cq[cq_index];
struct ib_wc wc;
struct iser_tx_desc *tx_desc;
struct iser_conn *ib_conn;
@@ -822,8 +866,10 @@ static int iser_drain_tx_cq(struct iser_device *device)
static void iser_cq_tasklet_fn(unsigned long data)
{
- struct iser_device *device = (struct iser_device *)data;
- struct ib_cq *cq = device->rx_cq;
+ struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)data;
+ struct iser_device *device = cq_desc->device;
+ int cq_index = cq_desc->cq_index;
+ struct ib_cq *cq = device->rx_cq[cq_index];
struct ib_wc wc;
struct iser_rx_desc *desc;
unsigned long xfer_len;
@@ -851,19 +897,21 @@ static void iser_cq_tasklet_fn(unsigned long data)
}
completed_rx++;
if (!(completed_rx & 63))
- completed_tx += iser_drain_tx_cq(device);
+ completed_tx += iser_drain_tx_cq(device, cq_index);
}
/* #warning "it is assumed here that arming CQ only once its empty" *
* " would not cause interrupts to be missed" */
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
- completed_tx += iser_drain_tx_cq(device);
+ completed_tx += iser_drain_tx_cq(device, cq_index);
iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx);
}
static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
{
- struct iser_device *device = (struct iser_device *)cq_context;
+ struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)cq_context;
+ struct iser_device *device = cq_desc->device;
+ int cq_index = cq_desc->cq_index;
- tasklet_schedule(&device->cq_tasklet);
+ tasklet_schedule(&device->cq_tasklet[cq_index]);
}
OpenPOWER on IntegriCloud