diff options
author | Doug Ledford <dledford@redhat.com> | 2015-12-15 14:10:44 -0500 |
---|---|---|
committer | Doug Ledford <dledford@redhat.com> | 2015-12-15 14:10:44 -0500 |
commit | c6333f9f9f7646e311248a09e8ed96126a97aba8 (patch) | |
tree | 1a86124c5b43fa858f1d8d7beb99bcd29d6d6ab7 | |
parent | 9f9499ae8e6415cefc4fe0a96ad0e27864353c89 (diff) | |
parent | cfeb91b375ad5f52665e00d374a4e403d2e6063e (diff) | |
download | op-kernel-dev-c6333f9f9f7646e311248a09e8ed96126a97aba8.zip op-kernel-dev-c6333f9f9f7646e311248a09e8ed96126a97aba8.tar.gz |
Merge branch 'rdma-cq.2' of git://git.infradead.org/users/hch/rdma into 4.5/rdma-cq
Signed-off-by: Doug Ledford <dledford@redhat.com>
Conflicts:
drivers/infiniband/ulp/srp/ib_srp.c - Conflicts with changes in
ib_srp.c introduced during 4.4-rc updates
32 files changed, 886 insertions, 940 deletions
diff --git a/Documentation/kernel-per-CPU-kthreads.txt b/Documentation/kernel-per-CPU-kthreads.txt index f4cbfe0..edec3a3 100644 --- a/Documentation/kernel-per-CPU-kthreads.txt +++ b/Documentation/kernel-per-CPU-kthreads.txt @@ -90,7 +90,7 @@ BLOCK_SOFTIRQ: Do all of the following: from being initiated from tasks that might run on the CPU to be de-jittered. (It is OK to force this CPU offline and then bring it back online before you start your application.) -BLOCK_IOPOLL_SOFTIRQ: Do all of the following: +IRQ_POLL_SOFTIRQ: Do all of the following: 1. Force block-device interrupts onto some other CPU. 2. Initiate any block I/O and block-I/O polling on other CPUs. 3. Once your application has started, prevent CPU-hotplug operations diff --git a/block/Makefile b/block/Makefile index 00ecc97..e850474 100644 --- a/block/Makefile +++ b/block/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \ blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ - blk-iopoll.o blk-lib.o blk-mq.o blk-mq-tag.o \ + blk-lib.o blk-mq.o blk-mq-tag.o \ blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \ genhd.o scsi_ioctl.o partition-generic.o ioprio.o \ partitions/ diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index aa26f3c..282ec0b 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -5,6 +5,7 @@ menuconfig INFINIBAND depends on NET depends on INET depends on m || IPV6 != m + select IRQ_POLL ---help--- Core support for InfiniBand (IB). Make sure to also select any protocols you wish to use as well as drivers for your diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index d43a899..ae48d8740 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -8,7 +8,7 @@ obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \ $(user_access-y) -ib_core-y := packer.o ud_header.o verbs.o sysfs.o \ +ib_core-y := packer.o ud_header.o verbs.o cq.o sysfs.o \ device.o fmr_pool.o cache.o netlink.o \ roce_gid_mgmt.o ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c new file mode 100644 index 0000000..a754fc7 --- /dev/null +++ b/drivers/infiniband/core/cq.c @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2015 HGST, a Western Digital Company. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#include <linux/module.h> +#include <linux/err.h> +#include <linux/slab.h> +#include <rdma/ib_verbs.h> + +/* # of WCs to poll for with a single call to ib_poll_cq */ +#define IB_POLL_BATCH 16 + +/* # of WCs to iterate over before yielding */ +#define IB_POLL_BUDGET_IRQ 256 +#define IB_POLL_BUDGET_WORKQUEUE 65536 + +#define IB_POLL_FLAGS \ + (IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS) + +static int __ib_process_cq(struct ib_cq *cq, int budget) +{ + int i, n, completed = 0; + + while ((n = ib_poll_cq(cq, IB_POLL_BATCH, cq->wc)) > 0) { + for (i = 0; i < n; i++) { + struct ib_wc *wc = &cq->wc[i]; + + if (wc->wr_cqe) + wc->wr_cqe->done(cq, wc); + else + WARN_ON_ONCE(wc->status == IB_WC_SUCCESS); + } + + completed += n; + + if (n != IB_POLL_BATCH || + (budget != -1 && completed >= budget)) + break; + } + + return completed; +} + +/** + * ib_process_direct_cq - process a CQ in caller context + * @cq: CQ to process + * @budget: number of CQEs to poll for + * + * This function is used to process all outstanding CQ entries on a + * %IB_POLL_DIRECT CQ. It does not offload CQ processing to a different + * context and does not ask for completion interrupts from the HCA. + * + * Note: for compatibility reasons -1 can be passed in %budget for unlimited + * polling. Do not use this feature in new code, it will be removed soon. + */ +int ib_process_cq_direct(struct ib_cq *cq, int budget) +{ + WARN_ON_ONCE(cq->poll_ctx != IB_POLL_DIRECT); + + return __ib_process_cq(cq, budget); +} +EXPORT_SYMBOL(ib_process_cq_direct); + +static void ib_cq_completion_direct(struct ib_cq *cq, void *private) +{ + WARN_ONCE(1, "got unsolicited completion for CQ 0x%p\n", cq); +} + +static int ib_poll_handler(struct irq_poll *iop, int budget) +{ + struct ib_cq *cq = container_of(iop, struct ib_cq, iop); + int completed; + + completed = __ib_process_cq(cq, budget); + if (completed < budget) { + irq_poll_complete(&cq->iop); + if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0) + irq_poll_sched(&cq->iop); + } + + return completed; +} + +static void ib_cq_completion_softirq(struct ib_cq *cq, void *private) +{ + irq_poll_sched(&cq->iop); +} + +static void ib_cq_poll_work(struct work_struct *work) +{ + struct ib_cq *cq = container_of(work, struct ib_cq, work); + int completed; + + completed = __ib_process_cq(cq, IB_POLL_BUDGET_WORKQUEUE); + if (completed >= IB_POLL_BUDGET_WORKQUEUE || + ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0) + queue_work(ib_comp_wq, &cq->work); +} + +static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private) +{ + queue_work(ib_comp_wq, &cq->work); +} + +/** + * ib_alloc_cq - allocate a completion queue + * @dev: device to allocate the CQ for + * @private: driver private data, accessible from cq->cq_context + * @nr_cqe: number of CQEs to allocate + * @comp_vector: HCA completion vectors for this CQ + * @poll_ctx: context to poll the CQ from. + * + * This is the proper interface to allocate a CQ for in-kernel users. A + * CQ allocated with this interface will automatically be polled from the + * specified context. The ULP needs must use wr->wr_cqe instead of wr->wr_id + * to use this CQ abstraction. + */ +struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private, + int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx) +{ + struct ib_cq_init_attr cq_attr = { + .cqe = nr_cqe, + .comp_vector = comp_vector, + }; + struct ib_cq *cq; + int ret = -ENOMEM; + + cq = dev->create_cq(dev, &cq_attr, NULL, NULL); + if (IS_ERR(cq)) + return cq; + + cq->device = dev; + cq->uobject = NULL; + cq->event_handler = NULL; + cq->cq_context = private; + cq->poll_ctx = poll_ctx; + atomic_set(&cq->usecnt, 0); + + cq->wc = kmalloc_array(IB_POLL_BATCH, sizeof(*cq->wc), GFP_KERNEL); + if (!cq->wc) + goto out_destroy_cq; + + switch (cq->poll_ctx) { + case IB_POLL_DIRECT: + cq->comp_handler = ib_cq_completion_direct; + break; + case IB_POLL_SOFTIRQ: + cq->comp_handler = ib_cq_completion_softirq; + + irq_poll_init(&cq->iop, IB_POLL_BUDGET_IRQ, ib_poll_handler); + ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); + break; + case IB_POLL_WORKQUEUE: + cq->comp_handler = ib_cq_completion_workqueue; + INIT_WORK(&cq->work, ib_cq_poll_work); + ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); + break; + default: + ret = -EINVAL; + goto out_free_wc; + } + + return cq; + +out_free_wc: + kfree(cq->wc); +out_destroy_cq: + cq->device->destroy_cq(cq); + return ERR_PTR(ret); +} +EXPORT_SYMBOL(ib_alloc_cq); + +/** + * ib_free_cq - free a completion queue + * @cq: completion queue to free. + */ +void ib_free_cq(struct ib_cq *cq) +{ + int ret; + + if (WARN_ON_ONCE(atomic_read(&cq->usecnt))) + return; + + switch (cq->poll_ctx) { + case IB_POLL_DIRECT: + break; + case IB_POLL_SOFTIRQ: + irq_poll_disable(&cq->iop); + break; + case IB_POLL_WORKQUEUE: + flush_work(&cq->work); + break; + default: + WARN_ON_ONCE(1); + } + + kfree(cq->wc); + ret = cq->device->destroy_cq(cq); + WARN_ON_ONCE(ret); +} +EXPORT_SYMBOL(ib_free_cq); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 179e813..6421d23 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -58,6 +58,7 @@ struct ib_client_data { bool going_down; }; +struct workqueue_struct *ib_comp_wq; struct workqueue_struct *ib_wq; EXPORT_SYMBOL_GPL(ib_wq); @@ -954,10 +955,18 @@ static int __init ib_core_init(void) if (!ib_wq) return -ENOMEM; + ib_comp_wq = alloc_workqueue("ib-comp-wq", + WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM, + WQ_UNBOUND_MAX_ACTIVE); + if (!ib_comp_wq) { + ret = -ENOMEM; + goto err; + } + ret = class_register(&ib_class); if (ret) { printk(KERN_WARNING "Couldn't create InfiniBand device class\n"); - goto err; + goto err_comp; } ret = ibnl_init(); @@ -972,7 +981,8 @@ static int __init ib_core_init(void) err_sysfs: class_unregister(&ib_class); - +err_comp: + destroy_workqueue(ib_comp_wq); err: destroy_workqueue(ib_wq); return ret; @@ -983,6 +993,7 @@ static void __exit ib_core_cleanup(void) ib_cache_cleanup(); ibnl_cleanup(); class_unregister(&ib_class); + destroy_workqueue(ib_comp_wq); /* Make sure that any pending umem accounting work is done. */ destroy_workqueue(ib_wq); } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 3ae9726..9b014f1 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -70,7 +70,6 @@ static struct ib_qp_attr ipoib_cm_err_attr = { #define IPOIB_CM_RX_DRAIN_WRID 0xffffffff static struct ib_send_wr ipoib_cm_rx_drain_wr = { - .wr_id = IPOIB_CM_RX_DRAIN_WRID, .opcode = IB_WR_SEND, }; @@ -223,6 +222,7 @@ static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv) * error" WC will be immediately generated for each WR we post. */ p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list); + ipoib_cm_rx_drain_wr.wr_id = IPOIB_CM_RX_DRAIN_WRID; if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, &bad_wr)) ipoib_warn(priv, "failed to post drain wr\n"); diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 8a5998e..2411680 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -151,16 +151,12 @@ - ISER_MAX_RX_MISC_PDUS) / \ (1 + ISER_INFLIGHT_DATAOUTS)) -#define ISER_WC_BATCH_COUNT 16 #define ISER_SIGNAL_CMD_COUNT 32 #define ISER_VER 0x10 #define ISER_WSV 0x08 #define ISER_RSV 0x04 -#define ISER_FASTREG_LI_WRID 0xffffffffffffffffULL -#define ISER_BEACON_WRID 0xfffffffffffffffeULL - /** * struct iser_hdr - iSER header * @@ -269,7 +265,7 @@ enum iser_desc_type { #define ISER_MAX_WRS 7 /** - * struct iser_tx_desc - iSER TX descriptor (for send wr_id) + * struct iser_tx_desc - iSER TX descriptor * * @iser_header: iser header * @iscsi_header: iscsi header @@ -293,6 +289,7 @@ struct iser_tx_desc { u64 dma_addr; struct ib_sge tx_sg[2]; int num_sge; + struct ib_cqe cqe; bool mapped; u8 wr_idx; union iser_wr { @@ -306,9 +303,10 @@ struct iser_tx_desc { }; #define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \ - sizeof(u64) + sizeof(struct ib_sge))) + sizeof(u64) + sizeof(struct ib_sge) + \ + sizeof(struct ib_cqe))) /** - * struct iser_rx_desc - iSER RX descriptor (for recv wr_id) + * struct iser_rx_desc - iSER RX descriptor * * @iser_header: iser header * @iscsi_header: iscsi header @@ -323,7 +321,27 @@ struct iser_rx_desc { char data[ISER_RECV_DATA_SEG_LEN]; u64 dma_addr; struct ib_sge rx_sg; + struct ib_cqe cqe; char pad[ISER_RX_PAD_SIZE]; +} __packed; + +/** + * struct iser_login_desc - iSER login descriptor + * + * @req: pointer to login request buffer + * @resp: pointer to login response buffer + * @req_dma: DMA address of login request buffer + * @rsp_dma: DMA address of login response buffer + * @sge: IB sge for login post recv + * @cqe: completion handler + */ +struct iser_login_desc { + void *req; + void *rsp; + u64 req_dma; + u64 rsp_dma; + struct ib_sge sge; + struct ib_cqe cqe; } __attribute__((packed)); struct iser_conn; @@ -333,18 +351,12 @@ struct iscsi_iser_task; /** * struct iser_comp - iSER completion context * - * @device: pointer to device handle * @cq: completion queue - * @wcs: work completion array - * @tasklet: Tasklet handle * @active_qps: Number of active QPs attached * to completion context */ struct iser_comp { - struct iser_device *device; struct ib_cq *cq; - struct ib_wc wcs[ISER_WC_BATCH_COUNT]; - struct tasklet_struct tasklet; int active_qps; }; @@ -475,10 +487,11 @@ struct iser_fr_pool { * @rx_wr: receive work request for batch posts * @device: reference to iser device * @comp: iser completion context - * @pi_support: Indicate device T10-PI support - * @beacon: beacon send wr to signal all flush errors were drained - * @flush_comp: completes when all connection completions consumed * @fr_pool: connection fast registration poool + * @pi_support: Indicate device T10-PI support + * @last: last send wr to signal all flush errors were drained + * @last_cqe: cqe handler for last wr + * @last_comp: completes when all connection completions consumed */ struct ib_conn { struct rdma_cm_id *cma_id; @@ -488,10 +501,12 @@ struct ib_conn { struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX]; struct iser_device *device; struct iser_comp *comp; - bool pi_support; - struct ib_send_wr beacon; - struct completion flush_comp; struct iser_fr_pool fr_pool; + bool pi_support; + struct ib_send_wr last; + struct ib_cqe last_cqe; + struct ib_cqe reg_cqe; + struct completion last_comp; }; /** @@ -514,11 +529,7 @@ struct ib_conn { * @up_completion: connection establishment completed * (state is ISER_CONN_UP) * @conn_list: entry in ig conn list - * @login_buf: login data buffer (stores login parameters) - * @login_req_buf: login request buffer - * @login_req_dma: login request buffer dma address - * @login_resp_buf: login response buffer - * @login_resp_dma: login response buffer dma address + * @login_desc: login descriptor * @rx_desc_head: head of rx_descs cyclic buffer * @rx_descs: rx buffers array (cyclic buffer) * @num_rx_descs: number of rx descriptors @@ -541,10 +552,7 @@ struct iser_conn { struct completion ib_completion; struct completion up_completion; struct list_head conn_list; - - char *login_buf; - char *login_req_buf, *login_resp_buf; - u64 login_req_dma, login_resp_dma; + struct iser_login_desc login_desc; unsigned int rx_desc_head; struct iser_rx_desc *rx_descs; u32 num_rx_descs; @@ -633,12 +641,14 @@ int iser_conn_terminate(struct iser_conn *iser_conn); void iser_release_work(struct work_struct *work); -void iser_rcv_completion(struct iser_rx_desc *desc, - unsigned long dto_xfer_len, - struct ib_conn *ib_conn); - -void iser_snd_completion(struct iser_tx_desc *desc, - struct ib_conn *ib_conn); +void iser_err_comp(struct ib_wc *wc, const char *type); +void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc); +void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc); +void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc); +void iser_ctrl_comp(struct ib_cq *cq, struct ib_wc *wc); +void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc); +void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc); +void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc); void iser_task_rdma_init(struct iscsi_iser_task *task); @@ -719,4 +729,28 @@ iser_tx_next_wr(struct iser_tx_desc *tx_desc) return cur_wr; } +static inline struct iser_conn * +to_iser_conn(struct ib_conn *ib_conn) +{ + return container_of(ib_conn, struct iser_conn, ib_conn); +} + +static inline struct iser_rx_desc * +iser_rx(struct ib_cqe *cqe) +{ + return container_of(cqe, struct iser_rx_desc, cqe); +} + +static inline struct iser_tx_desc * +iser_tx(struct ib_cqe *cqe) +{ + return container_of(cqe, struct iser_tx_desc, cqe); +} + +static inline struct iser_login_desc * +iser_login(struct ib_cqe *cqe) +{ + return container_of(cqe, struct iser_login_desc, cqe); +} + #endif diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index ffd00c4..44ecb89 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -174,73 +174,63 @@ static void iser_create_send_desc(struct iser_conn *iser_conn, static void iser_free_login_buf(struct iser_conn *iser_conn) { struct iser_device *device = iser_conn->ib_conn.device; + struct iser_login_desc *desc = &iser_conn->login_desc; - if (!iser_conn->login_buf) + if (!desc->req) return; - if (iser_conn->login_req_dma) - ib_dma_unmap_single(device->ib_device, - iser_conn->login_req_dma, - ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); + ib_dma_unmap_single(device->ib_device, desc->req_dma, + ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); - if (iser_conn->login_resp_dma) - ib_dma_unmap_single(device->ib_device, - iser_conn->login_resp_dma, - ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); + ib_dma_unmap_single(device->ib_device, desc->rsp_dma, + ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); - kfree(iser_conn->login_buf); + kfree(desc->req); + kfree(desc->rsp); /* make sure we never redo any unmapping */ - iser_conn->login_req_dma = 0; - iser_conn->login_resp_dma = 0; - iser_conn->login_buf = NULL; + desc->req = NULL; + desc->rsp = NULL; } static int iser_alloc_login_buf(struct iser_conn *iser_conn) { struct iser_device *device = iser_conn->ib_conn.device; - int req_err, resp_err; - - BUG_ON(device == NULL); - - iser_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN + - ISER_RX_LOGIN_SIZE, GFP_KERNEL); - if (!iser_conn->login_buf) - goto out_err; - - iser_conn->login_req_buf = iser_conn->login_buf; - iser_conn->login_resp_buf = iser_conn->login_buf + - ISCSI_DEF_MAX_RECV_SEG_LEN; - - iser_conn->login_req_dma = ib_dma_map_single(device->ib_device, - iser_conn->login_req_buf, - ISCSI_DEF_MAX_RECV_SEG_LEN, - DMA_TO_DEVICE); - - iser_conn->login_resp_dma = ib_dma_map_single(device->ib_device, - iser_conn->login_resp_buf, - ISER_RX_LOGIN_SIZE, - DMA_FROM_DEVICE); - - req_err = ib_dma_mapping_error(device->ib_device, - iser_conn->login_req_dma); - resp_err = ib_dma_mapping_error(device->ib_device, - iser_conn->login_resp_dma); - - if (req_err || resp_err) { - if (req_err) - iser_conn->login_req_dma = 0; - if (resp_err) - iser_conn->login_resp_dma = 0; - goto free_login_buf; - } + struct iser_login_desc *desc = &iser_conn->login_desc; + + desc->req = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN, GFP_KERNEL); + if (!desc->req) + return -ENOMEM; + + desc->req_dma = ib_dma_map_single(device->ib_device, desc->req, + ISCSI_DEF_MAX_RECV_SEG_LEN, + DMA_TO_DEVICE); + if (ib_dma_mapping_error(device->ib_device, + desc->req_dma)) + goto free_req; + + desc->rsp = kmalloc(ISER_RX_LOGIN_SIZE, GFP_KERNEL); + if (!desc->rsp) + goto unmap_req; + + desc->rsp_dma = ib_dma_map_single(device->ib_device, desc->rsp, + ISER_RX_LOGIN_SIZE, + DMA_FROM_DEVICE); + if (ib_dma_mapping_error(device->ib_device, + desc->rsp_dma)) + goto free_rsp; + return 0; -free_login_buf: - iser_free_login_buf(iser_conn); +free_rsp: + kfree(desc->rsp); +unmap_req: + ib_dma_unmap_single(device->ib_device, desc->req_dma, + ISCSI_DEF_MAX_RECV_SEG_LEN, + DMA_TO_DEVICE); +free_req: + kfree(desc->req); -out_err: - iser_err("unable to alloc or map login buf\n"); return -ENOMEM; } @@ -280,11 +270,11 @@ int iser_alloc_rx_descriptors(struct iser_conn *iser_conn, goto rx_desc_dma_map_failed; rx_desc->dma_addr = dma_addr; - + rx_desc->cqe.done = iser_task_rsp; rx_sg = &rx_desc->rx_sg; - rx_sg->addr = rx_desc->dma_addr; + rx_sg->addr = rx_desc->dma_addr; rx_sg->length = ISER_RX_PAYLOAD_SIZE; - rx_sg->lkey = device->pd->local_dma_lkey; + rx_sg->lkey = device->pd->local_dma_lkey; } iser_conn->rx_desc_head = 0; @@ -383,6 +373,7 @@ int iser_send_command(struct iscsi_conn *conn, /* build the tx desc regd header and add it to the tx desc dto */ tx_desc->type = ISCSI_TX_SCSI_COMMAND; + tx_desc->cqe.done = iser_cmd_comp; iser_create_send_desc(iser_conn, tx_desc); if (hdr->flags & ISCSI_FLAG_CMD_READ) { @@ -464,6 +455,7 @@ int iser_send_data_out(struct iscsi_conn *conn, } tx_desc->type = ISCSI_TX_DATAOUT; + tx_desc->cqe.done = iser_dataout_comp; tx_desc->iser_header.flags = ISER_VER; memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr)); @@ -513,6 +505,7 @@ int iser_send_control(struct iscsi_conn *conn, /* build the tx desc regd header and add it to the tx desc dto */ mdesc->type = ISCSI_TX_CONTROL; + mdesc->cqe.done = iser_ctrl_comp; iser_create_send_desc(iser_conn, mdesc); device = iser_conn->ib_conn.device; @@ -520,25 +513,25 @@ int iser_send_control(struct iscsi_conn *conn, data_seg_len = ntoh24(task->hdr->dlength); if (data_seg_len > 0) { + struct iser_login_desc *desc = &iser_conn->login_desc; struct ib_sge *tx_dsg = &mdesc->tx_sg[1]; + if (task != conn->login_task) { iser_err("data present on non login task!!!\n"); goto send_control_error; } - ib_dma_sync_single_for_cpu(device->ib_device, - iser_conn->login_req_dma, task->data_count, - DMA_TO_DEVICE); + ib_dma_sync_single_for_cpu(device->ib_device, desc->req_dma, + task->data_count, DMA_TO_DEVICE); - memcpy(iser_conn->login_req_buf, task->data, task->data_count); + memcpy(desc->req, task->data, task->data_count); - ib_dma_sync_single_for_device(device->ib_device, - iser_conn->login_req_dma, task->data_count, - DMA_TO_DEVICE); + ib_dma_sync_single_for_device(device->ib_device, desc->req_dma, + task->data_count, DMA_TO_DEVICE); - tx_dsg->addr = iser_conn->login_req_dma; - tx_dsg->length = task->data_count; - tx_dsg->lkey = device->pd->local_dma_lkey; + tx_dsg->addr = desc->req_dma; + tx_dsg->length = task->data_count; + tx_dsg->lkey = device->pd->local_dma_lkey; mdesc->num_sge = 2; } @@ -562,41 +555,69 @@ send_control_error: return err; } -/** - * iser_rcv_dto_completion - recv DTO completion - */ -void iser_rcv_completion(struct iser_rx_desc *rx_desc, - unsigned long rx_xfer_len, - struct ib_conn *ib_conn) +void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc) { - struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn, - ib_conn); + struct ib_conn *ib_conn = wc->qp->qp_context; + struct iser_conn *iser_conn = to_iser_conn(ib_conn); + struct iser_login_desc *desc = iser_login(wc->wr_cqe); struct iscsi_hdr *hdr; - u64 rx_dma; - int rx_buflen, outstanding, count, err; + char *data; + int length; - /* differentiate between login to all other PDUs */ - if ((char *)rx_desc == iser_conn->login_resp_buf) { - rx_dma = iser_conn->login_resp_dma; - rx_buflen = ISER_RX_LOGIN_SIZE; - } else { - rx_dma = rx_desc->dma_addr; - rx_buflen = ISER_RX_PAYLOAD_SIZE; + if (unlikely(wc->status != IB_WC_SUCCESS)) { + iser_err_comp(wc, "login_rsp"); + return; } - ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma, - rx_buflen, DMA_FROM_DEVICE); + ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, + desc->rsp_dma, ISER_RX_LOGIN_SIZE, + DMA_FROM_DEVICE); - hdr = &rx_desc->iscsi_header; + hdr = desc->rsp + sizeof(struct iser_hdr); + data = desc->rsp + ISER_HEADERS_LEN; + length = wc->byte_len - ISER_HEADERS_LEN; iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode, - hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN)); + hdr->itt, length); + + iscsi_iser_recv(iser_conn->iscsi_conn, hdr, data, length); + + ib_dma_sync_single_for_device(ib_conn->device->ib_device, + desc->rsp_dma, ISER_RX_LOGIN_SIZE, + DMA_FROM_DEVICE); + + ib_conn->post_recv_buf_count--; +} + +void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc) +{ + struct ib_conn *ib_conn = wc->qp->qp_context; + struct iser_conn *iser_conn = to_iser_conn(ib_conn); + struct iser_rx_desc *desc = iser_rx(wc->wr_cqe); + struct iscsi_hdr *hdr; + int length; + int outstanding, count, err; - iscsi_iser_recv(iser_conn->iscsi_conn, hdr, rx_desc->data, - rx_xfer_len - ISER_HEADERS_LEN); + if (unlikely(wc->status != IB_WC_SUCCESS)) { + iser_err_comp(wc, "task_rsp"); + return; + } - ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma, - rx_buflen, DMA_FROM_DEVICE); + ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, + desc->dma_addr, ISER_RX_PAYLOAD_SIZE, + DMA_FROM_DEVICE); + + hdr = &desc->iscsi_header; + length = wc->byte_len - ISER_HEADERS_LEN; + + iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode, + hdr->itt, length); + + iscsi_iser_recv(iser_conn->iscsi_conn, hdr, desc->data, length); + + ib_dma_sync_single_for_device(ib_conn->device->ib_device, + desc->dma_addr, ISER_RX_PAYLOAD_SIZE, + DMA_FROM_DEVICE); /* decrementing conn->post_recv_buf_count only --after-- freeing the * * task eliminates the need to worry on tasks which are completed in * @@ -604,9 +625,6 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc, * for the posted rx bufs refcount to become zero handles everything */ ib_conn->post_recv_buf_count--; - if (rx_dma == iser_conn->login_resp_dma) - return; - outstanding = ib_conn->post_recv_buf_count; if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) { count = min(iser_conn->qp_max_recv_dtos - outstanding, @@ -617,26 +635,47 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc, } } -void iser_snd_completion(struct iser_tx_desc *tx_desc, - struct ib_conn *ib_conn) +void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc) +{ + if (unlikely(wc->status != IB_WC_SUCCESS)) + iser_err_comp(wc, "command"); +} + +void iser_ctrl_comp(struct ib_cq *cq, struct ib_wc *wc) { + struct iser_tx_desc *desc = iser_tx(wc->wr_cqe); struct iscsi_task *task; - struct iser_device *device = ib_conn->device; - if (tx_desc->type == ISCSI_TX_DATAOUT) { - ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr, - ISER_HEADERS_LEN, DMA_TO_DEVICE); - kmem_cache_free(ig.desc_cache, tx_desc); - tx_desc = NULL; + if (unlikely(wc->status != IB_WC_SUCCESS)) { + iser_err_comp(wc, "control"); + return; } - if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL) { - /* this arithmetic is legal by libiscsi dd_data allocation */ - task = (void *) ((long)(void *)tx_desc - - sizeof(struct iscsi_task)); - if (task->hdr->itt == RESERVED_ITT) - iscsi_put_task(task); - } + /* this arithmetic is legal by libiscsi dd_data allocation */ + task = (void *)desc - sizeof(struct iscsi_task); + if (task->hdr->itt == RESERVED_ITT) + iscsi_put_task(task); +} + +void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc) +{ + struct iser_tx_desc *desc = iser_tx(wc->wr_cqe); + struct ib_conn *ib_conn = wc->qp->qp_context; + struct iser_device *device = ib_conn->device; + + if (unlikely(wc->status != IB_WC_SUCCESS)) + iser_err_comp(wc, "dataout"); + + ib_dma_unmap_single(device->ib_device, desc->dma_addr, + ISER_HEADERS_LEN, DMA_TO_DEVICE); + kmem_cache_free(ig.desc_cache, desc); +} + +void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc) +{ + struct ib_conn *ib_conn = wc->qp->qp_context; + + complete(&ib_conn->last_comp); } void iser_task_rdma_init(struct iscsi_iser_task *iser_task) diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index ea765fb..76ca09b 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -67,6 +67,11 @@ static struct iser_reg_ops fmr_ops = { .reg_desc_put = iser_reg_desc_put_fmr, }; +void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc) +{ + iser_err_comp(wc, "memreg"); +} + int iser_assign_reg_ops(struct iser_device *device) { struct ib_device_attr *dev_attr = &device->dev_attr; @@ -414,12 +419,14 @@ iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask) } static void -iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr) +iser_inv_rkey(struct ib_send_wr *inv_wr, + struct ib_mr *mr, + struct ib_cqe *cqe) { u32 rkey; inv_wr->opcode = IB_WR_LOCAL_INV; - inv_wr->wr_id = ISER_FASTREG_LI_WRID; + inv_wr->wr_cqe = cqe; inv_wr->ex.invalidate_rkey = mr->rkey; inv_wr->send_flags = 0; inv_wr->num_sge = 0; @@ -437,6 +444,7 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task, { struct iser_tx_desc *tx_desc = &iser_task->desc; struct ib_sig_attrs *sig_attrs = &tx_desc->sig_attrs; + struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe; struct ib_sig_handover_wr *wr; int ret; @@ -448,11 +456,11 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task, iser_set_prot_checks(iser_task->sc, &sig_attrs->check_mask); if (!pi_ctx->sig_mr_valid) - iser_inv_rkey(iser_tx_next_wr(tx_desc), pi_ctx->sig_mr); + iser_inv_rkey(iser_tx_next_wr(tx_desc), pi_ctx->sig_mr, cqe); wr = sig_handover_wr(iser_tx_next_wr(tx_desc)); wr->wr.opcode = IB_WR_REG_SIG_MR; - wr->wr.wr_id = ISER_FASTREG_LI_WRID; + wr->wr.wr_cqe = cqe; wr->wr.sg_list = &data_reg->sge; wr->wr.num_sge = 1; wr->wr.send_flags = 0; @@ -485,12 +493,13 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, struct iser_mem_reg *reg) { struct iser_tx_desc *tx_desc = &iser_task->desc; + struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe; struct ib_mr *mr = rsc->mr; struct ib_reg_wr *wr; int n; if (!rsc->mr_valid) - iser_inv_rkey(iser_tx_next_wr(tx_desc), mr); + iser_inv_rkey(iser_tx_next_wr(tx_desc), mr, cqe); n = ib_map_mr_sg(mr, mem->sg, mem->size, SIZE_4K); if (unlikely(n != mem->size)) { @@ -501,7 +510,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, wr = reg_wr(iser_tx_next_wr(tx_desc)); wr->wr.opcode = IB_WR_REG_MR; - wr->wr.wr_id = ISER_FASTREG_LI_WRID; + wr->wr.wr_cqe = cqe; wr->wr.send_flags = 0; wr->wr.num_sge = 0; wr->mr = mr; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 42f4da6..2f2fc42 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -44,17 +44,6 @@ #define ISER_MAX_CQ_LEN (ISER_MAX_RX_LEN + ISER_MAX_TX_LEN + \ ISCSI_ISER_MAX_CONN) -static int iser_cq_poll_limit = 512; - -static void iser_cq_tasklet_fn(unsigned long data); -static void iser_cq_callback(struct ib_cq *cq, void *cq_context); - -static void iser_cq_event_callback(struct ib_event *cause, void *context) -{ - iser_err("cq event %s (%d)\n", - ib_event_msg(cause->event), cause->event); -} - static void iser_qp_event_callback(struct ib_event *cause, void *context) { iser_err("qp event %s (%d)\n", @@ -110,27 +99,14 @@ static int iser_create_device_ib_res(struct iser_device *device) goto pd_err; for (i = 0; i < device->comps_used; i++) { - struct ib_cq_init_attr cq_attr = {}; struct iser_comp *comp = &device->comps[i]; - comp->device = device; - cq_attr.cqe = max_cqe; - cq_attr.comp_vector = i; - comp->cq = ib_create_cq(device->ib_device, - iser_cq_callback, - iser_cq_event_callback, - (void *)comp, - &cq_attr); + comp->cq = ib_alloc_cq(device->ib_device, comp, max_cqe, i, + IB_POLL_SOFTIRQ); if (IS_ERR(comp->cq)) { comp->cq = NULL; goto cq_err; } - - if (ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP)) - goto cq_err; - - tasklet_init(&comp->tasklet, iser_cq_tasklet_fn, - (unsigned long)comp); } if (!iser_always_reg) { @@ -140,7 +116,7 @@ static int iser_create_device_ib_res(struct iser_device *device) device->mr = ib_get_dma_mr(device->pd, access); if (IS_ERR(device->mr)) - goto dma_mr_err; + goto cq_err; } INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device, @@ -153,15 +129,12 @@ static int iser_create_device_ib_res(struct iser_device *device) handler_err: if (device->mr) ib_dereg_mr(device->mr); -dma_mr_err: - for (i = 0; i < device->comps_used; i++) - tasklet_kill(&device->comps[i].tasklet); cq_err: for (i = 0; i < device->comps_used; i++) { struct iser_comp *comp = &device->comps[i]; if (comp->cq) - ib_destroy_cq(comp->cq); + ib_free_cq(comp->cq); } ib_dealloc_pd(device->pd); pd_err: @@ -182,8 +155,7 @@ static void iser_free_device_ib_res(struct iser_device *device) for (i = 0; i < device->comps_used; i++) { struct iser_comp *comp = &device->comps[i]; - tasklet_kill(&comp->tasklet); - ib_destroy_cq(comp->cq); + ib_free_cq(comp->cq); comp->cq = NULL; } @@ -461,8 +433,7 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn) */ static int iser_create_ib_conn_res(struct ib_conn *ib_conn) { - struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn, - ib_conn); + struct iser_conn *iser_conn = to_iser_conn(ib_conn); struct iser_device *device; struct ib_device_attr *dev_attr; struct ib_qp_init_attr init_attr; @@ -724,13 +695,13 @@ int iser_conn_terminate(struct iser_conn *iser_conn) iser_conn, err); /* post an indication that all flush errors were consumed */ - err = ib_post_send(ib_conn->qp, &ib_conn->beacon, &bad_wr); + err = ib_post_send(ib_conn->qp, &ib_conn->last, &bad_wr); if (err) { - iser_err("conn %p failed to post beacon", ib_conn); + iser_err("conn %p failed to post last wr", ib_conn); return 1; } - wait_for_completion(&ib_conn->flush_comp); + wait_for_completion(&ib_conn->last_comp); } return 1; @@ -967,14 +938,21 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve void iser_conn_init(struct iser_conn *iser_conn) { + struct ib_conn *ib_conn = &iser_conn->ib_conn; + iser_conn->state = ISER_CONN_INIT; - iser_conn->ib_conn.post_recv_buf_count = 0; - init_completion(&iser_conn->ib_conn.flush_comp); init_completion(&iser_conn->stop_completion); init_completion(&iser_conn->ib_completion); init_completion(&iser_conn->up_completion); INIT_LIST_HEAD(&iser_conn->conn_list); mutex_init(&iser_conn->state_mutex); + + ib_conn->post_recv_buf_count = 0; + ib_conn->reg_cqe.done = iser_reg_comp; + ib_conn->last_cqe.done = iser_last_comp; + ib_conn->last.wr_cqe = &ib_conn->last_cqe; + ib_conn->last.opcode = IB_WR_SEND; + init_completion(&ib_conn->last_comp); } /** @@ -1000,9 +978,6 @@ int iser_connect(struct iser_conn *iser_conn, iser_conn->state = ISER_CONN_PENDING; - ib_conn->beacon.wr_id = ISER_BEACON_WRID; - ib_conn->beacon.opcode = IB_WR_SEND; - ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler, (void *)iser_conn, RDMA_PS_TCP, IB_QPT_RC); @@ -1045,56 +1020,60 @@ connect_failure: int iser_post_recvl(struct iser_conn *iser_conn) { - struct ib_recv_wr rx_wr, *rx_wr_failed; struct ib_conn *ib_conn = &iser_conn->ib_conn; - struct ib_sge sge; + struct iser_login_desc *desc = &iser_conn->login_desc; + struct ib_recv_wr wr, *wr_failed; int ib_ret; - sge.addr = iser_conn->login_resp_dma; - sge.length = ISER_RX_LOGIN_SIZE; - sge.lkey = ib_conn->device->pd->local_dma_lkey; + desc->sge.addr = desc->rsp_dma; + desc->sge.length = ISER_RX_LOGIN_SIZE; + desc->sge.lkey = ib_conn->device->pd->local_dma_lkey; - rx_wr.wr_id = (uintptr_t)iser_conn->login_resp_buf; - rx_wr.sg_list = &sge; - rx_wr.num_sge = 1; - rx_wr.next = NULL; + desc->cqe.done = iser_login_rsp; + wr.wr_cqe = &desc->cqe; + wr.sg_list = &desc->sge; + wr.num_sge = 1; + wr.next = NULL; ib_conn->post_recv_buf_count++; - ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed); + ib_ret = ib_post_recv(ib_conn->qp, &wr, &wr_failed); if (ib_ret) { iser_err("ib_post_recv failed ret=%d\n", ib_ret); ib_conn->post_recv_buf_count--; } + return ib_ret; } int iser_post_recvm(struct iser_conn *iser_conn, int count) { - struct ib_recv_wr *rx_wr, *rx_wr_failed; - int i, ib_ret; struct ib_conn *ib_conn = &iser_conn->ib_conn; unsigned int my_rx_head = iser_conn->rx_desc_head; struct iser_rx_desc *rx_desc; + struct ib_recv_wr *wr, *wr_failed; + int i, ib_ret; - for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) { - rx_desc = &iser_conn->rx_descs[my_rx_head]; - rx_wr->wr_id = (uintptr_t)rx_desc; - rx_wr->sg_list = &rx_desc->rx_sg; - rx_wr->num_sge = 1; - rx_wr->next = rx_wr + 1; + for (wr = ib_conn->rx_wr, i = 0; i < count; i++, wr++) { + rx_desc = &iser_conn->rx_descs[my_rx_head]; + rx_desc->cqe.done = iser_task_rsp; + wr->wr_cqe = &rx_desc->cqe; + wr->sg_list = &rx_desc->rx_sg; + wr->num_sge = 1; + wr->next = wr + 1; my_rx_head = (my_rx_head + 1) & iser_conn->qp_max_recv_dtos_mask; } - rx_wr--; - rx_wr->next = NULL; /* mark end of work requests list */ + wr--; + wr->next = NULL; /* mark end of work requests list */ ib_conn->post_recv_buf_count += count; - ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed); + ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &wr_failed); if (ib_ret) { iser_err("ib_post_recv failed ret=%d\n", ib_ret); ib_conn->post_recv_buf_count -= count; } else iser_conn->rx_desc_head = my_rx_head; + return ib_ret; } @@ -1115,7 +1094,7 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, DMA_TO_DEVICE); wr->next = NULL; - wr->wr_id = (uintptr_t)tx_desc; + wr->wr_cqe = &tx_desc->cqe; wr->sg_list = tx_desc->tx_sg; wr->num_sge = tx_desc->num_sge; wr->opcode = IB_WR_SEND; @@ -1129,149 +1108,6 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, return ib_ret; } -/** - * is_iser_tx_desc - Indicate if the completion wr_id - * is a TX descriptor or not. - * @iser_conn: iser connection - * @wr_id: completion WR identifier - * - * Since we cannot rely on wc opcode in FLUSH errors - * we must work around it by checking if the wr_id address - * falls in the iser connection rx_descs buffer. If so - * it is an RX descriptor, otherwize it is a TX. - */ -static inline bool -is_iser_tx_desc(struct iser_conn *iser_conn, void *wr_id) -{ - void *start = iser_conn->rx_descs; - int len = iser_conn->num_rx_descs * sizeof(*iser_conn->rx_descs); - - if (wr_id >= start && wr_id < start + len) - return false; - - return true; -} - -/** - * iser_handle_comp_error() - Handle error completion - * @ib_conn: connection RDMA resources - * @wc: work completion - * - * Notes: We may handle a FLUSH error completion and in this case - * we only cleanup in case TX type was DATAOUT. For non-FLUSH - * error completion we should also notify iscsi layer that - * connection is failed (in case we passed bind stage). - */ -static void -iser_handle_comp_error(struct ib_conn *ib_conn, - struct ib_wc *wc) -{ - void *wr_id = (void *)(uintptr_t)wc->wr_id; - struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn, - ib_conn); - - if (wc->status != IB_WC_WR_FLUSH_ERR) - if (iser_conn->iscsi_conn) - iscsi_conn_failure(iser_conn->iscsi_conn, - ISCSI_ERR_CONN_FAILED); - - if (wc->wr_id == ISER_FASTREG_LI_WRID) - return; - - if (is_iser_tx_desc(iser_conn, wr_id)) { - struct iser_tx_desc *desc = wr_id; - - if (desc->type == ISCSI_TX_DATAOUT) - kmem_cache_free(ig.desc_cache, desc); - } else { - ib_conn->post_recv_buf_count--; - } -} - -/** - * iser_handle_wc - handle a single work completion - * @wc: work completion - * - * Soft-IRQ context, work completion can be either - * SEND or RECV, and can turn out successful or - * with error (or flush error). - */ -static void iser_handle_wc(struct ib_wc *wc) -{ - struct ib_conn *ib_conn; - struct iser_tx_desc *tx_desc; - struct iser_rx_desc *rx_desc; - - ib_conn = wc->qp->qp_context; - if (likely(wc->status == IB_WC_SUCCESS)) { - if (wc->opcode == IB_WC_RECV) { - rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id; - iser_rcv_completion(rx_desc, wc->byte_len, - ib_conn); - } else - if (wc->opcode == IB_WC_SEND) { - tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id; - iser_snd_completion(tx_desc, ib_conn); - } else { - iser_err("Unknown wc opcode %d\n", wc->opcode); - } - } else { - if (wc->status != IB_WC_WR_FLUSH_ERR) - iser_err("%s (%d): wr id %llx vend_err %x\n", - ib_wc_status_msg(wc->status), wc->status, - wc->wr_id, wc->vendor_err); - else - iser_dbg("%s (%d): wr id %llx\n", - ib_wc_status_msg(wc->status), wc->status, - wc->wr_id); - - if (wc->wr_id == ISER_BEACON_WRID) - /* all flush errors were consumed */ - complete(&ib_conn->flush_comp); - else - iser_handle_comp_error(ib_conn, wc); - } -} - -/** - * iser_cq_tasklet_fn - iSER completion polling loop - * @data: iSER completion context - * - * Soft-IRQ context, polling connection CQ until - * either CQ was empty or we exausted polling budget - */ -static void iser_cq_tasklet_fn(unsigned long data) -{ - struct iser_comp *comp = (struct iser_comp *)data; - struct ib_cq *cq = comp->cq; - struct ib_wc *const wcs = comp->wcs; - int i, n, completed = 0; - - while ((n = ib_poll_cq(cq, ARRAY_SIZE(comp->wcs), wcs)) > 0) { - for (i = 0; i < n; i++) - iser_handle_wc(&wcs[i]); - - completed += n; - if (completed >= iser_cq_poll_limit) - break; - } - - /* - * It is assumed here that arming CQ only once its empty - * would not cause interrupts to be missed. - */ - ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); - - iser_dbg("got %d completions\n", completed); -} - -static void iser_cq_callback(struct ib_cq *cq, void *cq_context) -{ - struct iser_comp *comp = cq_context; - - tasklet_schedule(&comp->tasklet); -} - u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir, sector_t *sector) { @@ -1319,3 +1155,21 @@ err: /* Not alot we can do here, return ambiguous guard error */ return 0x1; } + +void iser_err_comp(struct ib_wc *wc, const char *type) +{ + if (wc->status != IB_WC_WR_FLUSH_ERR) { + struct iser_conn *iser_conn = to_iser_conn(wc->qp->qp_context); + + iser_err("%s failure: %s (%d) vend_err %x\n", type, + ib_wc_status_msg(wc->status), wc->status, + wc->vendor_err); + + if (iser_conn->iscsi_conn) + iscsi_conn_failure(iser_conn->iscsi_conn, + ISCSI_ERR_CONN_FAILED); + } else { + iser_dbg("%s failure: %s (%d)\n", type, + ib_wc_status_msg(wc->status), wc->status); + } +} diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 3db9a65..20fa332 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -132,8 +132,9 @@ MODULE_PARM_DESC(ch_count, static void srp_add_one(struct ib_device *device); static void srp_remove_one(struct ib_device *device, void *client_data); -static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr); -static void srp_send_completion(struct ib_cq *cq, void *ch_ptr); +static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc); +static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc, + const char *opname); static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event); static struct scsi_transport_template *ib_srp_transport_template; @@ -445,6 +446,17 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target) dev->max_pages_per_mr); } +static void srp_drain_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct srp_rdma_ch *ch = cq->cq_context; + + complete(&ch->done); +} + +static struct ib_cqe srp_drain_cqe = { + .done = srp_drain_done, +}; + /** * srp_destroy_qp() - destroy an RDMA queue pair * @ch: SRP RDMA channel. @@ -457,10 +469,11 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target) static void srp_destroy_qp(struct srp_rdma_ch *ch) { static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; - static struct ib_recv_wr wr = { .wr_id = SRP_LAST_WR_ID }; + static struct ib_recv_wr wr = { 0 }; struct ib_recv_wr *bad_wr; int ret; + wr.wr_cqe = &srp_drain_cqe; /* Destroying a QP and reusing ch->done is only safe if not connected */ WARN_ON_ONCE(ch->connected); @@ -489,34 +502,27 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) struct ib_fmr_pool *fmr_pool = NULL; struct srp_fr_pool *fr_pool = NULL; const int m = dev->use_fast_reg ? 3 : 1; - struct ib_cq_init_attr cq_attr = {}; int ret; init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL); if (!init_attr) return -ENOMEM; - /* + 1 for SRP_LAST_WR_ID */ - cq_attr.cqe = target->queue_size + 1; - cq_attr.comp_vector = ch->comp_vector; - recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, ch, - &cq_attr); + /* queue_size + 1 for ib_drain_qp */ + recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1, + ch->comp_vector, IB_POLL_SOFTIRQ); if (IS_ERR(recv_cq)) { ret = PTR_ERR(recv_cq); goto err; } - cq_attr.cqe = m * target->queue_size; - cq_attr.comp_vector = ch->comp_vector; - send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, ch, - &cq_attr); + send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size, + ch->comp_vector, IB_POLL_DIRECT); if (IS_ERR(send_cq)) { ret = PTR_ERR(send_cq); goto err_recv_cq; } - ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP); - init_attr->event_handler = srp_qp_event; init_attr->cap.max_send_wr = m * target->queue_size; init_attr->cap.max_recv_wr = target->queue_size + 1; @@ -558,9 +564,9 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) if (ch->qp) srp_destroy_qp(ch); if (ch->recv_cq) - ib_destroy_cq(ch->recv_cq); + ib_free_cq(ch->recv_cq); if (ch->send_cq) - ib_destroy_cq(ch->send_cq); + ib_free_cq(ch->send_cq); ch->qp = qp; ch->recv_cq = recv_cq; @@ -580,13 +586,13 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) return 0; err_qp: - ib_destroy_qp(qp); + srp_destroy_qp(ch); err_send_cq: - ib_destroy_cq(send_cq); + ib_free_cq(send_cq); err_recv_cq: - ib_destroy_cq(recv_cq); + ib_free_cq(recv_cq); err: kfree(init_attr); @@ -622,9 +628,10 @@ static void srp_free_ch_ib(struct srp_target_port *target, if (ch->fmr_pool) ib_destroy_fmr_pool(ch->fmr_pool); } + srp_destroy_qp(ch); - ib_destroy_cq(ch->send_cq); - ib_destroy_cq(ch->recv_cq); + ib_free_cq(ch->send_cq); + ib_free_cq(ch->recv_cq); /* * Avoid that the SCSI error handler tries to use this channel after @@ -1041,18 +1048,25 @@ out: return ret <= 0 ? ret : -ENODEV; } -static int srp_inv_rkey(struct srp_rdma_ch *ch, u32 rkey) +static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc) +{ + srp_handle_qp_err(cq, wc, "INV RKEY"); +} + +static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch, + u32 rkey) { struct ib_send_wr *bad_wr; struct ib_send_wr wr = { .opcode = IB_WR_LOCAL_INV, - .wr_id = LOCAL_INV_WR_ID_MASK, .next = NULL, .num_sge = 0, .send_flags = 0, .ex.invalidate_rkey = rkey, }; + wr.wr_cqe = &req->reg_cqe; + req->reg_cqe.done = srp_inv_rkey_err_done; return ib_post_send(ch->qp, &wr, &bad_wr); } @@ -1074,7 +1088,7 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd, struct srp_fr_desc **pfr; for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) { - res = srp_inv_rkey(ch, (*pfr)->mr->rkey); + res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey); if (res < 0) { shost_printk(KERN_ERR, target->scsi_host, PFX "Queueing INV WR for rkey %#x failed (%d)\n", @@ -1312,7 +1326,13 @@ reset_state: return 0; } +static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc) +{ + srp_handle_qp_err(cq, wc, "FAST REG"); +} + static int srp_map_finish_fr(struct srp_map_state *state, + struct srp_request *req, struct srp_rdma_ch *ch, int sg_nents) { struct srp_target_port *target = ch->target; @@ -1349,9 +1369,11 @@ static int srp_map_finish_fr(struct srp_map_state *state, if (unlikely(n < 0)) return n; + req->reg_cqe.done = srp_reg_mr_err_done; + wr.wr.next = NULL; wr.wr.opcode = IB_WR_REG_MR; - wr.wr.wr_id = FAST_REG_WR_ID_MASK; + wr.wr.wr_cqe = &req->reg_cqe; wr.wr.num_sge = 0; wr.wr.send_flags = 0; wr.mr = desc->mr; @@ -1455,7 +1477,7 @@ static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch, while (count) { int i, n; - n = srp_map_finish_fr(state, ch, count); + n = srp_map_finish_fr(state, req, ch, count); if (unlikely(n < 0)) return n; @@ -1524,7 +1546,7 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req, #ifdef CONFIG_NEED_SG_DMA_LENGTH idb_sg->dma_length = idb_sg->length; /* hack^2 */ #endif - ret = srp_map_finish_fr(&state, ch, 1); + ret = srp_map_finish_fr(&state, req, ch, 1); if (ret < 0) return ret; } else if (dev->use_fmr) { @@ -1719,7 +1741,7 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch, s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE; struct srp_iu *iu; - srp_send_completion(ch->send_cq, ch); + ib_process_cq_direct(ch->send_cq, -1); if (list_empty(&ch->free_tx)) return NULL; @@ -1739,6 +1761,19 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch, return iu; } +static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe); + struct srp_rdma_ch *ch = cq->cq_context; + + if (unlikely(wc->status != IB_WC_SUCCESS)) { + srp_handle_qp_err(cq, wc, "SEND"); + return; + } + + list_add(&iu->list, &ch->free_tx); +} + static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len) { struct srp_target_port *target = ch->target; @@ -1749,8 +1784,10 @@ static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len) list.length = len; list.lkey = target->lkey; + iu->cqe.done = srp_send_done; + wr.next = NULL; - wr.wr_id = (uintptr_t) iu; + wr.wr_cqe = &iu->cqe; wr.sg_list = &list; wr.num_sge = 1; wr.opcode = IB_WR_SEND; @@ -1769,8 +1806,10 @@ static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu) list.length = iu->size; list.lkey = target->lkey; + iu->cqe.done = srp_recv_done; + wr.next = NULL; - wr.wr_id = (uintptr_t) iu; + wr.wr_cqe = &iu->cqe; wr.sg_list = &list; wr.num_sge = 1; @@ -1902,14 +1941,20 @@ static void srp_process_aer_req(struct srp_rdma_ch *ch, "problems processing SRP_AER_REQ\n"); } -static void srp_handle_recv(struct srp_rdma_ch *ch, struct ib_wc *wc) +static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc) { + struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe); + struct srp_rdma_ch *ch = cq->cq_context; struct srp_target_port *target = ch->target; struct ib_device *dev = target->srp_host->srp_dev->dev; - struct srp_iu *iu = (struct srp_iu *) (uintptr_t) wc->wr_id; int res; u8 opcode; + if (unlikely(wc->status != IB_WC_SUCCESS)) { + srp_handle_qp_err(cq, wc, "RECV"); + return; + } + ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len, DMA_FROM_DEVICE); @@ -1972,68 +2017,22 @@ static void srp_tl_err_work(struct work_struct *work) srp_start_tl_fail_timers(target->rport); } -static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status, - bool send_err, struct srp_rdma_ch *ch) +static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc, + const char *opname) { + struct srp_rdma_ch *ch = cq->cq_context; struct srp_target_port *target = ch->target; - if (wr_id == SRP_LAST_WR_ID) { - complete(&ch->done); - return; - } - if (ch->connected && !target->qp_in_error) { - if (wr_id & LOCAL_INV_WR_ID_MASK) { - shost_printk(KERN_ERR, target->scsi_host, PFX - "LOCAL_INV failed with status %s (%d)\n", - ib_wc_status_msg(wc_status), wc_status); - } else if (wr_id & FAST_REG_WR_ID_MASK) { - shost_printk(KERN_ERR, target->scsi_host, PFX - "FAST_REG_MR failed status %s (%d)\n", - ib_wc_status_msg(wc_status), wc_status); - } else { - shost_printk(KERN_ERR, target->scsi_host, - PFX "failed %s status %s (%d) for iu %p\n", - send_err ? "send" : "receive", - ib_wc_status_msg(wc_status), wc_status, - (void *)(uintptr_t)wr_id); - } + shost_printk(KERN_ERR, target->scsi_host, + PFX "failed %s status %s (%d) for CQE %p\n", + opname, ib_wc_status_msg(wc->status), wc->status, + wc->wr_cqe); queue_work(system_long_wq, &target->tl_err_work); } target->qp_in_error = true; } -static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr) -{ - struct srp_rdma_ch *ch = ch_ptr; - struct ib_wc wc; - - ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); - while (ib_poll_cq(cq, 1, &wc) > 0) { - if (likely(wc.status == IB_WC_SUCCESS)) { - srp_handle_recv(ch, &wc); - } else { - srp_handle_qp_err(wc.wr_id, wc.status, false, ch); - } - } -} - -static void srp_send_completion(struct ib_cq *cq, void *ch_ptr) -{ - struct srp_rdma_ch *ch = ch_ptr; - struct ib_wc wc; - struct srp_iu *iu; - - while (ib_poll_cq(cq, 1, &wc) > 0) { - if (likely(wc.status == IB_WC_SUCCESS)) { - iu = (struct srp_iu *) (uintptr_t) wc.wr_id; - list_add(&iu->list, &ch->free_tx); - } else { - srp_handle_qp_err(wc.wr_id, wc.status, true, ch); - } - } -} - static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) { struct srp_target_port *target = host_to_target(shost); @@ -3587,8 +3586,6 @@ static int __init srp_init_module(void) { int ret; - BUILD_BUG_ON(FIELD_SIZEOF(struct ib_wc, wr_id) < sizeof(void *)); - if (srp_sg_tablesize) { pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n"); if (!cmd_sg_entries) diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index f6af531..9e05ce4 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -66,11 +66,6 @@ enum { SRP_TAG_TSK_MGMT = 1U << 31, SRP_MAX_PAGES_PER_MR = 512, - - LOCAL_INV_WR_ID_MASK = 1, - FAST_REG_WR_ID_MASK = 2, - - SRP_LAST_WR_ID = 0xfffffffcU, }; enum srp_target_state { @@ -128,6 +123,7 @@ struct srp_request { struct srp_direct_buf *indirect_desc; dma_addr_t indirect_dma_addr; short nmdesc; + struct ib_cqe reg_cqe; }; /** @@ -231,6 +227,7 @@ struct srp_iu { void *buf; size_t size; enum dma_data_direction direction; + struct ib_cqe cqe; }; /** diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 2e2fe81..8068aff 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -93,6 +93,8 @@ MODULE_PARM_DESC(srpt_service_guid, static struct ib_client srpt_client; static void srpt_release_channel(struct srpt_rdma_ch *ch); static int srpt_queue_status(struct se_cmd *cmd); +static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc); +static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc); /** * opposite_dma_dir() - Swap DMA_TO_DEVICE and DMA_FROM_DEVICE. @@ -778,12 +780,12 @@ static int srpt_post_recv(struct srpt_device *sdev, struct ib_recv_wr wr, *bad_wr; BUG_ON(!sdev); - wr.wr_id = encode_wr_id(SRPT_RECV, ioctx->ioctx.index); - list.addr = ioctx->ioctx.dma; list.length = srp_max_req_size; list.lkey = sdev->pd->local_dma_lkey; + ioctx->ioctx.cqe.done = srpt_recv_done; + wr.wr_cqe = &ioctx->ioctx.cqe; wr.next = NULL; wr.sg_list = &list; wr.num_sge = 1; @@ -819,8 +821,9 @@ static int srpt_post_send(struct srpt_rdma_ch *ch, list.length = len; list.lkey = sdev->pd->local_dma_lkey; + ioctx->ioctx.cqe.done = srpt_send_done; wr.next = NULL; - wr.wr_id = encode_wr_id(SRPT_SEND, ioctx->ioctx.index); + wr.wr_cqe = &ioctx->ioctx.cqe; wr.sg_list = &list; wr.num_sge = 1; wr.opcode = IB_WR_SEND; @@ -1052,13 +1055,13 @@ static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch, BUG_ON(!ch); BUG_ON(!ioctx); - BUG_ON(ioctx->n_rdma && !ioctx->rdma_ius); + BUG_ON(ioctx->n_rdma && !ioctx->rdma_wrs); while (ioctx->n_rdma) - kfree(ioctx->rdma_ius[--ioctx->n_rdma].sge); + kfree(ioctx->rdma_wrs[--ioctx->n_rdma].wr.sg_list); - kfree(ioctx->rdma_ius); - ioctx->rdma_ius = NULL; + kfree(ioctx->rdma_wrs); + ioctx->rdma_wrs = NULL; if (ioctx->mapped_sg_count) { sg = ioctx->sg; @@ -1082,7 +1085,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, struct scatterlist *sg, *sg_orig; int sg_cnt; enum dma_data_direction dir; - struct rdma_iu *riu; + struct ib_rdma_wr *riu; struct srp_direct_buf *db; dma_addr_t dma_addr; struct ib_sge *sge; @@ -1109,23 +1112,24 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, ioctx->mapped_sg_count = count; - if (ioctx->rdma_ius && ioctx->n_rdma_ius) - nrdma = ioctx->n_rdma_ius; + if (ioctx->rdma_wrs && ioctx->n_rdma_wrs) + nrdma = ioctx->n_rdma_wrs; else { nrdma = (count + SRPT_DEF_SG_PER_WQE - 1) / SRPT_DEF_SG_PER_WQE + ioctx->n_rbuf; - ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu, GFP_KERNEL); - if (!ioctx->rdma_ius) + ioctx->rdma_wrs = kcalloc(nrdma, sizeof(*ioctx->rdma_wrs), + GFP_KERNEL); + if (!ioctx->rdma_wrs) goto free_mem; - ioctx->n_rdma_ius = nrdma; + ioctx->n_rdma_wrs = nrdma; } db = ioctx->rbufs; tsize = cmd->data_length; dma_len = ib_sg_dma_len(dev, &sg[0]); - riu = ioctx->rdma_ius; + riu = ioctx->rdma_wrs; /* * For each remote desc - calculate the #ib_sge. @@ -1139,9 +1143,9 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) { rsize = be32_to_cpu(db->len); raddr = be64_to_cpu(db->va); - riu->raddr = raddr; + riu->remote_addr = raddr; riu->rkey = be32_to_cpu(db->key); - riu->sge_cnt = 0; + riu->wr.num_sge = 0; /* calculate how many sge required for this remote_buf */ while (rsize > 0 && tsize > 0) { @@ -1165,33 +1169,35 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, rsize = 0; } - ++riu->sge_cnt; + ++riu->wr.num_sge; - if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) { + if (rsize > 0 && + riu->wr.num_sge == SRPT_DEF_SG_PER_WQE) { ++ioctx->n_rdma; - riu->sge = - kmalloc(riu->sge_cnt * sizeof *riu->sge, - GFP_KERNEL); - if (!riu->sge) + riu->wr.sg_list = kmalloc_array(riu->wr.num_sge, + sizeof(*riu->wr.sg_list), + GFP_KERNEL); + if (!riu->wr.sg_list) goto free_mem; ++riu; - riu->sge_cnt = 0; - riu->raddr = raddr; + riu->wr.num_sge = 0; + riu->remote_addr = raddr; riu->rkey = be32_to_cpu(db->key); } } ++ioctx->n_rdma; - riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge, - GFP_KERNEL); - if (!riu->sge) + riu->wr.sg_list = kmalloc_array(riu->wr.num_sge, + sizeof(*riu->wr.sg_list), + GFP_KERNEL); + if (!riu->wr.sg_list) goto free_mem; } db = ioctx->rbufs; tsize = cmd->data_length; - riu = ioctx->rdma_ius; + riu = ioctx->rdma_wrs; sg = sg_orig; dma_len = ib_sg_dma_len(dev, &sg[0]); dma_addr = ib_sg_dma_address(dev, &sg[0]); @@ -1200,7 +1206,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, for (i = 0, j = 0; j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) { rsize = be32_to_cpu(db->len); - sge = riu->sge; + sge = riu->wr.sg_list; k = 0; while (rsize > 0 && tsize > 0) { @@ -1232,9 +1238,9 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, } ++k; - if (k == riu->sge_cnt && rsize > 0 && tsize > 0) { + if (k == riu->wr.num_sge && rsize > 0 && tsize > 0) { ++riu; - sge = riu->sge; + sge = riu->wr.sg_list; k = 0; } else if (rsize > 0 && tsize > 0) ++sge; @@ -1277,8 +1283,8 @@ static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch) ioctx->n_rbuf = 0; ioctx->rbufs = NULL; ioctx->n_rdma = 0; - ioctx->n_rdma_ius = 0; - ioctx->rdma_ius = NULL; + ioctx->n_rdma_wrs = 0; + ioctx->rdma_wrs = NULL; ioctx->mapped_sg_count = 0; init_completion(&ioctx->tx_done); ioctx->queue_status_only = false; @@ -1380,118 +1386,44 @@ out: } /** - * srpt_handle_send_err_comp() - Process an IB_WC_SEND error completion. - */ -static void srpt_handle_send_err_comp(struct srpt_rdma_ch *ch, u64 wr_id) -{ - struct srpt_send_ioctx *ioctx; - enum srpt_command_state state; - u32 index; - - atomic_inc(&ch->sq_wr_avail); - - index = idx_from_wr_id(wr_id); - ioctx = ch->ioctx_ring[index]; - state = srpt_get_cmd_state(ioctx); - - WARN_ON(state != SRPT_STATE_CMD_RSP_SENT - && state != SRPT_STATE_MGMT_RSP_SENT - && state != SRPT_STATE_NEED_DATA - && state != SRPT_STATE_DONE); - - /* If SRP_RSP sending failed, undo the ch->req_lim change. */ - if (state == SRPT_STATE_CMD_RSP_SENT - || state == SRPT_STATE_MGMT_RSP_SENT) - atomic_dec(&ch->req_lim); - - srpt_abort_cmd(ioctx); -} - -/** - * srpt_handle_send_comp() - Process an IB send completion notification. - */ -static void srpt_handle_send_comp(struct srpt_rdma_ch *ch, - struct srpt_send_ioctx *ioctx) -{ - enum srpt_command_state state; - - atomic_inc(&ch->sq_wr_avail); - - state = srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); - - if (WARN_ON(state != SRPT_STATE_CMD_RSP_SENT - && state != SRPT_STATE_MGMT_RSP_SENT - && state != SRPT_STATE_DONE)) - pr_debug("state = %d\n", state); - - if (state != SRPT_STATE_DONE) { - srpt_unmap_sg_to_ib_sge(ch, ioctx); - transport_generic_free_cmd(&ioctx->cmd, 0); - } else { - pr_err("IB completion has been received too late for" - " wr_id = %u.\n", ioctx->ioctx.index); - } -} - -/** - * srpt_handle_rdma_comp() - Process an IB RDMA completion notification. - * * XXX: what is now target_execute_cmd used to be asynchronous, and unmapping * the data that has been transferred via IB RDMA had to be postponed until the * check_stop_free() callback. None of this is necessary anymore and needs to * be cleaned up. */ -static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch, - struct srpt_send_ioctx *ioctx, - enum srpt_opcode opcode) +static void srpt_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc) { + struct srpt_rdma_ch *ch = cq->cq_context; + struct srpt_send_ioctx *ioctx = + container_of(wc->wr_cqe, struct srpt_send_ioctx, ioctx.cqe); + WARN_ON(ioctx->n_rdma <= 0); atomic_add(ioctx->n_rdma, &ch->sq_wr_avail); - if (opcode == SRPT_RDMA_READ_LAST) { - if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA, - SRPT_STATE_DATA_IN)) - target_execute_cmd(&ioctx->cmd); - else - pr_err("%s[%d]: wrong state = %d\n", __func__, - __LINE__, srpt_get_cmd_state(ioctx)); - } else if (opcode == SRPT_RDMA_ABORT) { - ioctx->rdma_aborted = true; - } else { - WARN(true, "unexpected opcode %d\n", opcode); + if (unlikely(wc->status != IB_WC_SUCCESS)) { + pr_info("RDMA_READ for ioctx 0x%p failed with status %d\n", + ioctx, wc->status); + srpt_abort_cmd(ioctx); + return; } + + if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA, + SRPT_STATE_DATA_IN)) + target_execute_cmd(&ioctx->cmd); + else + pr_err("%s[%d]: wrong state = %d\n", __func__, + __LINE__, srpt_get_cmd_state(ioctx)); } -/** - * srpt_handle_rdma_err_comp() - Process an IB RDMA error completion. - */ -static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch, - struct srpt_send_ioctx *ioctx, - enum srpt_opcode opcode) +static void srpt_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) { - enum srpt_command_state state; + struct srpt_send_ioctx *ioctx = + container_of(wc->wr_cqe, struct srpt_send_ioctx, ioctx.cqe); - state = srpt_get_cmd_state(ioctx); - switch (opcode) { - case SRPT_RDMA_READ_LAST: - if (ioctx->n_rdma <= 0) { - pr_err("Received invalid RDMA read" - " error completion with idx %d\n", - ioctx->ioctx.index); - break; - } - atomic_add(ioctx->n_rdma, &ch->sq_wr_avail); - if (state == SRPT_STATE_NEED_DATA) - srpt_abort_cmd(ioctx); - else - pr_err("%s[%d]: wrong state = %d\n", - __func__, __LINE__, state); - break; - case SRPT_RDMA_WRITE_LAST: - break; - default: - pr_err("%s[%d]: opcode = %u\n", __func__, __LINE__, opcode); - break; + if (unlikely(wc->status != IB_WC_SUCCESS)) { + pr_info("RDMA_WRITE for ioctx 0x%p failed with status %d\n", + ioctx, wc->status); + srpt_abort_cmd(ioctx); } } @@ -1926,32 +1858,26 @@ out: return; } -static void srpt_process_rcv_completion(struct ib_cq *cq, - struct srpt_rdma_ch *ch, - struct ib_wc *wc) +static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc) { - struct srpt_device *sdev = ch->sport->sdev; - struct srpt_recv_ioctx *ioctx; - u32 index; + struct srpt_rdma_ch *ch = cq->cq_context; + struct srpt_recv_ioctx *ioctx = + container_of(wc->wr_cqe, struct srpt_recv_ioctx, ioctx.cqe); - index = idx_from_wr_id(wc->wr_id); if (wc->status == IB_WC_SUCCESS) { int req_lim; req_lim = atomic_dec_return(&ch->req_lim); if (unlikely(req_lim < 0)) pr_err("req_lim = %d < 0\n", req_lim); - ioctx = sdev->ioctx_ring[index]; srpt_handle_new_iu(ch, ioctx, NULL); } else { - pr_info("receiving failed for idx %u with status %d\n", - index, wc->status); + pr_info("receiving failed for ioctx %p with status %d\n", + ioctx, wc->status); } } /** - * srpt_process_send_completion() - Process an IB send completion. - * * Note: Although this has not yet been observed during tests, at least in * theory it is possible that the srpt_get_send_ioctx() call invoked by * srpt_handle_new_iu() fails. This is possible because the req_lim_delta @@ -1964,108 +1890,51 @@ static void srpt_process_rcv_completion(struct ib_cq *cq, * are queued on cmd_wait_list. The code below processes these delayed * requests one at a time. */ -static void srpt_process_send_completion(struct ib_cq *cq, - struct srpt_rdma_ch *ch, - struct ib_wc *wc) +static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc) { - struct srpt_send_ioctx *send_ioctx; - uint32_t index; - enum srpt_opcode opcode; + struct srpt_rdma_ch *ch = cq->cq_context; + struct srpt_send_ioctx *ioctx = + container_of(wc->wr_cqe, struct srpt_send_ioctx, ioctx.cqe); + enum srpt_command_state state; - index = idx_from_wr_id(wc->wr_id); - opcode = opcode_from_wr_id(wc->wr_id); - send_ioctx = ch->ioctx_ring[index]; - if (wc->status == IB_WC_SUCCESS) { - if (opcode == SRPT_SEND) - srpt_handle_send_comp(ch, send_ioctx); - else { - WARN_ON(opcode != SRPT_RDMA_ABORT && - wc->opcode != IB_WC_RDMA_READ); - srpt_handle_rdma_comp(ch, send_ioctx, opcode); - } + state = srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); + + WARN_ON(state != SRPT_STATE_CMD_RSP_SENT && + state != SRPT_STATE_MGMT_RSP_SENT); + + atomic_inc(&ch->sq_wr_avail); + + if (wc->status != IB_WC_SUCCESS) { + pr_info("sending response for ioctx 0x%p failed" + " with status %d\n", ioctx, wc->status); + + atomic_dec(&ch->req_lim); + srpt_abort_cmd(ioctx); + goto out; + } + + if (state != SRPT_STATE_DONE) { + srpt_unmap_sg_to_ib_sge(ch, ioctx); + transport_generic_free_cmd(&ioctx->cmd, 0); } else { - if (opcode == SRPT_SEND) { - pr_info("sending response for idx %u failed" - " with status %d\n", index, wc->status); - srpt_handle_send_err_comp(ch, wc->wr_id); - } else if (opcode != SRPT_RDMA_MID) { - pr_info("RDMA t %d for idx %u failed with" - " status %d\n", opcode, index, wc->status); - srpt_handle_rdma_err_comp(ch, send_ioctx, opcode); - } + pr_err("IB completion has been received too late for" + " wr_id = %u.\n", ioctx->ioctx.index); } - while (unlikely(opcode == SRPT_SEND - && !list_empty(&ch->cmd_wait_list) - && srpt_get_ch_state(ch) == CH_LIVE - && (send_ioctx = srpt_get_send_ioctx(ch)) != NULL)) { +out: + while (!list_empty(&ch->cmd_wait_list) && + srpt_get_ch_state(ch) == CH_LIVE && + (ioctx = srpt_get_send_ioctx(ch)) != NULL) { struct srpt_recv_ioctx *recv_ioctx; recv_ioctx = list_first_entry(&ch->cmd_wait_list, struct srpt_recv_ioctx, wait_list); list_del(&recv_ioctx->wait_list); - srpt_handle_new_iu(ch, recv_ioctx, send_ioctx); + srpt_handle_new_iu(ch, recv_ioctx, ioctx); } } -static void srpt_process_completion(struct ib_cq *cq, struct srpt_rdma_ch *ch) -{ - struct ib_wc *const wc = ch->wc; - int i, n; - - WARN_ON(cq != ch->cq); - - ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); - while ((n = ib_poll_cq(cq, ARRAY_SIZE(ch->wc), wc)) > 0) { - for (i = 0; i < n; i++) { - if (opcode_from_wr_id(wc[i].wr_id) == SRPT_RECV) - srpt_process_rcv_completion(cq, ch, &wc[i]); - else - srpt_process_send_completion(cq, ch, &wc[i]); - } - } -} - -/** - * srpt_completion() - IB completion queue callback function. - * - * Notes: - * - It is guaranteed that a completion handler will never be invoked - * concurrently on two different CPUs for the same completion queue. See also - * Documentation/infiniband/core_locking.txt and the implementation of - * handle_edge_irq() in kernel/irq/chip.c. - * - When threaded IRQs are enabled, completion handlers are invoked in thread - * context instead of interrupt context. - */ -static void srpt_completion(struct ib_cq *cq, void *ctx) -{ - struct srpt_rdma_ch *ch = ctx; - - wake_up_interruptible(&ch->wait_queue); -} - -static int srpt_compl_thread(void *arg) -{ - struct srpt_rdma_ch *ch; - - /* Hibernation / freezing of the SRPT kernel thread is not supported. */ - current->flags |= PF_NOFREEZE; - - ch = arg; - BUG_ON(!ch); - pr_info("Session %s: kernel thread %s (PID %d) started\n", - ch->sess_name, ch->thread->comm, current->pid); - while (!kthread_should_stop()) { - wait_event_interruptible(ch->wait_queue, - (srpt_process_completion(ch->cq, ch), - kthread_should_stop())); - } - pr_info("Session %s: kernel thread %s (PID %d) stopped\n", - ch->sess_name, ch->thread->comm, current->pid); - return 0; -} - /** * srpt_create_ch_ib() - Create receive and send completion queues. */ @@ -2075,7 +1944,6 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) struct srpt_port *sport = ch->sport; struct srpt_device *sdev = sport->sdev; u32 srp_sq_size = sport->port_attrib.srp_sq_size; - struct ib_cq_init_attr cq_attr = {}; int ret; WARN_ON(ch->rq_size < 1); @@ -2086,9 +1954,8 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) goto out; retry: - cq_attr.cqe = ch->rq_size + srp_sq_size; - ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, - &cq_attr); + ch->cq = ib_alloc_cq(sdev->device, ch, ch->rq_size + srp_sq_size, + 0 /* XXX: spread CQs */, IB_POLL_WORKQUEUE); if (IS_ERR(ch->cq)) { ret = PTR_ERR(ch->cq); pr_err("failed to create CQ cqe= %d ret= %d\n", @@ -2131,18 +1998,6 @@ retry: if (ret) goto err_destroy_qp; - init_waitqueue_head(&ch->wait_queue); - - pr_debug("creating thread for session %s\n", ch->sess_name); - - ch->thread = kthread_run(srpt_compl_thread, ch, "ib_srpt_compl"); - if (IS_ERR(ch->thread)) { - pr_err("failed to create kernel thread %ld\n", - PTR_ERR(ch->thread)); - ch->thread = NULL; - goto err_destroy_qp; - } - out: kfree(qp_init); return ret; @@ -2150,17 +2005,14 @@ out: err_destroy_qp: ib_destroy_qp(ch->qp); err_destroy_cq: - ib_destroy_cq(ch->cq); + ib_free_cq(ch->cq); goto out; } static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch) { - if (ch->thread) - kthread_stop(ch->thread); - ib_destroy_qp(ch->qp); - ib_destroy_cq(ch->cq); + ib_free_cq(ch->cq); } /** @@ -2821,12 +2673,8 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, struct srpt_send_ioctx *ioctx) { - struct ib_rdma_wr wr; struct ib_send_wr *bad_wr; - struct rdma_iu *riu; - int i; - int ret; - int sq_wr_avail; + int sq_wr_avail, ret, i; enum dma_data_direction dir; const int n_rdma = ioctx->n_rdma; @@ -2842,59 +2690,32 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, } } - ioctx->rdma_aborted = false; - ret = 0; - riu = ioctx->rdma_ius; - memset(&wr, 0, sizeof wr); - - for (i = 0; i < n_rdma; ++i, ++riu) { - if (dir == DMA_FROM_DEVICE) { - wr.wr.opcode = IB_WR_RDMA_WRITE; - wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ? - SRPT_RDMA_WRITE_LAST : - SRPT_RDMA_MID, - ioctx->ioctx.index); - } else { - wr.wr.opcode = IB_WR_RDMA_READ; - wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ? - SRPT_RDMA_READ_LAST : - SRPT_RDMA_MID, - ioctx->ioctx.index); - } - wr.wr.next = NULL; - wr.remote_addr = riu->raddr; - wr.rkey = riu->rkey; - wr.wr.num_sge = riu->sge_cnt; - wr.wr.sg_list = riu->sge; + for (i = 0; i < n_rdma; i++) { + struct ib_send_wr *wr = &ioctx->rdma_wrs[i].wr; - /* only get completion event for the last rdma write */ - if (i == (n_rdma - 1) && dir == DMA_TO_DEVICE) - wr.wr.send_flags = IB_SEND_SIGNALED; + wr->opcode = (dir == DMA_FROM_DEVICE) ? + IB_WR_RDMA_WRITE : IB_WR_RDMA_READ; - ret = ib_post_send(ch->qp, &wr.wr, &bad_wr); - if (ret) - break; + if (i == n_rdma - 1) { + /* only get completion event for the last rdma read */ + if (dir == DMA_TO_DEVICE) { + wr->send_flags = IB_SEND_SIGNALED; + ioctx->rdma_cqe.done = srpt_rdma_read_done; + } else { + ioctx->rdma_cqe.done = srpt_rdma_write_done; + } + wr->wr_cqe = &ioctx->rdma_cqe; + wr->next = NULL; + } else { + wr->wr_cqe = NULL; + wr->next = &ioctx->rdma_wrs[i + 1].wr; + } } + ret = ib_post_send(ch->qp, &ioctx->rdma_wrs->wr, &bad_wr); if (ret) pr_err("%s[%d]: ib_post_send() returned %d for %d/%d\n", __func__, __LINE__, ret, i, n_rdma); - if (ret && i > 0) { - wr.wr.num_sge = 0; - wr.wr.wr_id = encode_wr_id(SRPT_RDMA_ABORT, ioctx->ioctx.index); - wr.wr.send_flags = IB_SEND_SIGNALED; - while (ch->state == CH_LIVE && - ib_post_send(ch->qp, &wr.wr, &bad_wr) != 0) { - pr_info("Trying to abort failed RDMA transfer [%d]\n", - ioctx->ioctx.index); - msleep(1000); - } - while (ch->state != CH_RELEASING && !ioctx->rdma_aborted) { - pr_info("Waiting until RDMA abort finished [%d]\n", - ioctx->ioctx.index); - msleep(1000); - } - } out: if (unlikely(dir == DMA_TO_DEVICE && ret < 0)) atomic_add(n_rdma, &ch->sq_wr_avail); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index 5faad8ac..a98b86b 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -128,36 +128,6 @@ enum { DEFAULT_MAX_RDMA_SIZE = 65536, }; -enum srpt_opcode { - SRPT_RECV, - SRPT_SEND, - SRPT_RDMA_MID, - SRPT_RDMA_ABORT, - SRPT_RDMA_READ_LAST, - SRPT_RDMA_WRITE_LAST, -}; - -static inline u64 encode_wr_id(u8 opcode, u32 idx) -{ - return ((u64)opcode << 32) | idx; -} -static inline enum srpt_opcode opcode_from_wr_id(u64 wr_id) -{ - return wr_id >> 32; -} -static inline u32 idx_from_wr_id(u64 wr_id) -{ - return (u32)wr_id; -} - -struct rdma_iu { - u64 raddr; - u32 rkey; - struct ib_sge *sge; - u32 sge_cnt; - int mem_id; -}; - /** * enum srpt_command_state - SCSI command state managed by SRPT. * @SRPT_STATE_NEW: New command arrived and is being processed. @@ -189,6 +159,7 @@ enum srpt_command_state { * @index: Index of the I/O context in its ioctx_ring array. */ struct srpt_ioctx { + struct ib_cqe cqe; void *buf; dma_addr_t dma; uint32_t index; @@ -215,32 +186,30 @@ struct srpt_recv_ioctx { * @sg: Pointer to sg-list associated with this I/O context. * @sg_cnt: SG-list size. * @mapped_sg_count: ib_dma_map_sg() return value. - * @n_rdma_ius: Number of elements in the rdma_ius array. - * @rdma_ius: Array with information about the RDMA mapping. + * @n_rdma_wrs: Number of elements in the rdma_wrs array. + * @rdma_wrs: Array with information about the RDMA mapping. * @tag: Tag of the received SRP information unit. * @spinlock: Protects 'state'. * @state: I/O context state. - * @rdma_aborted: If initiating a multipart RDMA transfer failed, whether - * the already initiated transfers have finished. * @cmd: Target core command data structure. * @sense_data: SCSI sense data. */ struct srpt_send_ioctx { struct srpt_ioctx ioctx; struct srpt_rdma_ch *ch; - struct rdma_iu *rdma_ius; + struct ib_rdma_wr *rdma_wrs; + struct ib_cqe rdma_cqe; struct srp_direct_buf *rbufs; struct srp_direct_buf single_rbuf; struct scatterlist *sg; struct list_head free_list; spinlock_t spinlock; enum srpt_command_state state; - bool rdma_aborted; struct se_cmd cmd; struct completion tx_done; int sg_cnt; int mapped_sg_count; - u16 n_rdma_ius; + u16 n_rdma_wrs; u8 n_rdma; u8 n_rbuf; bool queue_status_only; @@ -267,9 +236,6 @@ enum rdma_ch_state { /** * struct srpt_rdma_ch - RDMA channel. - * @wait_queue: Allows the kernel thread to wait for more work. - * @thread: Kernel thread that processes the IB queues associated with - * the channel. * @cm_id: IB CM ID associated with the channel. * @qp: IB queue pair used for communicating over this channel. * @cq: IB completion queue for this channel. @@ -299,8 +265,6 @@ enum rdma_ch_state { * @release_done: Enables waiting for srpt_release_channel() completion. */ struct srpt_rdma_ch { - wait_queue_head_t wait_queue; - struct task_struct *thread; struct ib_cm_id *cm_id; struct ib_qp *qp; struct ib_cq *cq; diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 64eed87..9900e61 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -1104,6 +1104,7 @@ config SCSI_IPR tristate "IBM Power Linux RAID adapter support" depends on PCI && SCSI && ATA select FW_LOADER + select IRQ_POLL ---help--- This driver supports the IBM Power Linux family RAID adapters. This includes IBM pSeries 5712, 5703, 5709, and 570A, as well diff --git a/drivers/scsi/be2iscsi/Kconfig b/drivers/scsi/be2iscsi/Kconfig index 4e7cad2..bad5f32 100644 --- a/drivers/scsi/be2iscsi/Kconfig +++ b/drivers/scsi/be2iscsi/Kconfig @@ -3,6 +3,7 @@ config BE2ISCSI depends on PCI && SCSI && NET select SCSI_ISCSI_ATTRS select ISCSI_BOOT_SYSFS + select IRQ_POLL help This driver implements the iSCSI functionality for Emulex diff --git a/drivers/scsi/be2iscsi/be.h b/drivers/scsi/be2iscsi/be.h index 77f992e..a41c643 100644 --- a/drivers/scsi/be2iscsi/be.h +++ b/drivers/scsi/be2iscsi/be.h @@ -20,7 +20,7 @@ #include <linux/pci.h> #include <linux/if_vlan.h> -#include <linux/blk-iopoll.h> +#include <linux/irq_poll.h> #define FW_VER_LEN 32 #define MCC_Q_LEN 128 #define MCC_CQ_LEN 256 @@ -101,7 +101,7 @@ struct be_eq_obj { struct beiscsi_hba *phba; struct be_queue_info *cq; struct work_struct work_cqs; /* Work Item */ - struct blk_iopoll iopoll; + struct irq_poll iopoll; }; struct be_mcc_obj { diff --git a/drivers/scsi/be2iscsi/be_iscsi.c b/drivers/scsi/be2iscsi/be_iscsi.c index b7087ba..022e87b 100644 --- a/drivers/scsi/be2iscsi/be_iscsi.c +++ b/drivers/scsi/be2iscsi/be_iscsi.c @@ -1292,9 +1292,9 @@ static void beiscsi_flush_cq(struct beiscsi_hba *phba) for (i = 0; i < phba->num_cpus; i++) { pbe_eq = &phwi_context->be_eq[i]; - blk_iopoll_disable(&pbe_eq->iopoll); + irq_poll_disable(&pbe_eq->iopoll); beiscsi_process_cq(pbe_eq); - blk_iopoll_enable(&pbe_eq->iopoll); + irq_poll_enable(&pbe_eq->iopoll); } } diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index fe0c514..cb9072a 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -910,8 +910,7 @@ static irqreturn_t be_isr_msix(int irq, void *dev_id) num_eq_processed = 0; while (eqe->dw[offsetof(struct amap_eq_entry, valid) / 32] & EQE_VALID_MASK) { - if (!blk_iopoll_sched_prep(&pbe_eq->iopoll)) - blk_iopoll_sched(&pbe_eq->iopoll); + irq_poll_sched(&pbe_eq->iopoll); AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0); queue_tail_inc(eq); @@ -972,8 +971,7 @@ static irqreturn_t be_isr(int irq, void *dev_id) spin_unlock_irqrestore(&phba->isr_lock, flags); num_mcceq_processed++; } else { - if (!blk_iopoll_sched_prep(&pbe_eq->iopoll)) - blk_iopoll_sched(&pbe_eq->iopoll); + irq_poll_sched(&pbe_eq->iopoll); num_ioeq_processed++; } AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0); @@ -2295,7 +2293,7 @@ void beiscsi_process_all_cqs(struct work_struct *work) hwi_ring_eq_db(phba, pbe_eq->q.id, 0, 0, 1, 1); } -static int be_iopoll(struct blk_iopoll *iop, int budget) +static int be_iopoll(struct irq_poll *iop, int budget) { unsigned int ret; struct beiscsi_hba *phba; @@ -2306,7 +2304,7 @@ static int be_iopoll(struct blk_iopoll *iop, int budget) pbe_eq->cq_count += ret; if (ret < budget) { phba = pbe_eq->phba; - blk_iopoll_complete(iop); + irq_poll_complete(iop); beiscsi_log(phba, KERN_INFO, BEISCSI_LOG_CONFIG | BEISCSI_LOG_IO, "BM_%d : rearm pbe_eq->q.id =%d\n", @@ -5293,7 +5291,7 @@ static void beiscsi_quiesce(struct beiscsi_hba *phba, for (i = 0; i < phba->num_cpus; i++) { pbe_eq = &phwi_context->be_eq[i]; - blk_iopoll_disable(&pbe_eq->iopoll); + irq_poll_disable(&pbe_eq->iopoll); } if (unload_state == BEISCSI_CLEAN_UNLOAD) { @@ -5579,9 +5577,8 @@ static void beiscsi_eeh_resume(struct pci_dev *pdev) for (i = 0; i < phba->num_cpus; i++) { pbe_eq = &phwi_context->be_eq[i]; - blk_iopoll_init(&pbe_eq->iopoll, be_iopoll_budget, + irq_poll_init(&pbe_eq->iopoll, be_iopoll_budget, be_iopoll); - blk_iopoll_enable(&pbe_eq->iopoll); } i = (phba->msix_enabled) ? i : 0; @@ -5752,9 +5749,8 @@ static int beiscsi_dev_probe(struct pci_dev *pcidev, for (i = 0; i < phba->num_cpus; i++) { pbe_eq = &phwi_context->be_eq[i]; - blk_iopoll_init(&pbe_eq->iopoll, be_iopoll_budget, + irq_poll_init(&pbe_eq->iopoll, be_iopoll_budget, be_iopoll); - blk_iopoll_enable(&pbe_eq->iopoll); } i = (phba->msix_enabled) ? i : 0; @@ -5795,7 +5791,7 @@ free_blkenbld: destroy_workqueue(phba->wq); for (i = 0; i < phba->num_cpus; i++) { pbe_eq = &phwi_context->be_eq[i]; - blk_iopoll_disable(&pbe_eq->iopoll); + irq_poll_disable(&pbe_eq->iopoll); } free_twq: beiscsi_clean_port(phba); diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 536cd5a..82031e0 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -3638,7 +3638,7 @@ static struct device_attribute ipr_ioa_reset_attr = { .store = ipr_store_reset_adapter }; -static int ipr_iopoll(struct blk_iopoll *iop, int budget); +static int ipr_iopoll(struct irq_poll *iop, int budget); /** * ipr_show_iopoll_weight - Show ipr polling mode * @dev: class device struct @@ -3681,34 +3681,33 @@ static ssize_t ipr_store_iopoll_weight(struct device *dev, int i; if (!ioa_cfg->sis64) { - dev_info(&ioa_cfg->pdev->dev, "blk-iopoll not supported on this adapter\n"); + dev_info(&ioa_cfg->pdev->dev, "irq_poll not supported on this adapter\n"); return -EINVAL; } if (kstrtoul(buf, 10, &user_iopoll_weight)) return -EINVAL; if (user_iopoll_weight > 256) { - dev_info(&ioa_cfg->pdev->dev, "Invalid blk-iopoll weight. It must be less than 256\n"); + dev_info(&ioa_cfg->pdev->dev, "Invalid irq_poll weight. It must be less than 256\n"); return -EINVAL; } if (user_iopoll_weight == ioa_cfg->iopoll_weight) { - dev_info(&ioa_cfg->pdev->dev, "Current blk-iopoll weight has the same weight\n"); + dev_info(&ioa_cfg->pdev->dev, "Current irq_poll weight has the same weight\n"); return strlen(buf); } if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { for (i = 1; i < ioa_cfg->hrrq_num; i++) - blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll); + irq_poll_disable(&ioa_cfg->hrrq[i].iopoll); } spin_lock_irqsave(shost->host_lock, lock_flags); ioa_cfg->iopoll_weight = user_iopoll_weight; if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { for (i = 1; i < ioa_cfg->hrrq_num; i++) { - blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll, + irq_poll_init(&ioa_cfg->hrrq[i].iopoll, ioa_cfg->iopoll_weight, ipr_iopoll); - blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll); } } spin_unlock_irqrestore(shost->host_lock, lock_flags); @@ -5569,7 +5568,7 @@ static int ipr_process_hrrq(struct ipr_hrr_queue *hrr_queue, int budget, return num_hrrq; } -static int ipr_iopoll(struct blk_iopoll *iop, int budget) +static int ipr_iopoll(struct irq_poll *iop, int budget) { struct ipr_ioa_cfg *ioa_cfg; struct ipr_hrr_queue *hrrq; @@ -5585,7 +5584,7 @@ static int ipr_iopoll(struct blk_iopoll *iop, int budget) completed_ops = ipr_process_hrrq(hrrq, budget, &doneq); if (completed_ops < budget) - blk_iopoll_complete(iop); + irq_poll_complete(iop); spin_unlock_irqrestore(hrrq->lock, hrrq_flags); list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) { @@ -5693,8 +5692,7 @@ static irqreturn_t ipr_isr_mhrrq(int irq, void *devp) if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { if ((be32_to_cpu(*hrrq->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) == hrrq->toggle_bit) { - if (!blk_iopoll_sched_prep(&hrrq->iopoll)) - blk_iopoll_sched(&hrrq->iopoll); + irq_poll_sched(&hrrq->iopoll); spin_unlock_irqrestore(hrrq->lock, hrrq_flags); return IRQ_HANDLED; } @@ -10405,9 +10403,8 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { for (i = 1; i < ioa_cfg->hrrq_num; i++) { - blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll, + irq_poll_init(&ioa_cfg->hrrq[i].iopoll, ioa_cfg->iopoll_weight, ipr_iopoll); - blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll); } } @@ -10436,7 +10433,7 @@ static void ipr_shutdown(struct pci_dev *pdev) if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { ioa_cfg->iopoll_weight = 0; for (i = 1; i < ioa_cfg->hrrq_num; i++) - blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll); + irq_poll_disable(&ioa_cfg->hrrq[i].iopoll); } while (ioa_cfg->in_reset_reload) { diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h index a34c7a5..56c5706 100644 --- a/drivers/scsi/ipr.h +++ b/drivers/scsi/ipr.h @@ -32,7 +32,7 @@ #include <linux/libata.h> #include <linux/list.h> #include <linux/kref.h> -#include <linux/blk-iopoll.h> +#include <linux/irq_poll.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> @@ -517,7 +517,7 @@ struct ipr_hrr_queue { u8 allow_cmds:1; u8 removing_ioa:1; - struct blk_iopoll iopoll; + struct irq_poll iopoll; }; /* Command packet structure */ diff --git a/include/linux/blk-iopoll.h b/include/linux/blk-iopoll.h deleted file mode 100644 index 77ae77c..0000000 --- a/include/linux/blk-iopoll.h +++ /dev/null @@ -1,46 +0,0 @@ -#ifndef BLK_IOPOLL_H -#define BLK_IOPOLL_H - -struct blk_iopoll; -typedef int (blk_iopoll_fn)(struct blk_iopoll *, int); - -struct blk_iopoll { - struct list_head list; - unsigned long state; - unsigned long data; - int weight; - int max; - blk_iopoll_fn *poll; -}; - -enum { - IOPOLL_F_SCHED = 0, - IOPOLL_F_DISABLE = 1, -}; - -/* - * Returns 0 if we successfully set the IOPOLL_F_SCHED bit, indicating - * that we were the first to acquire this iop for scheduling. If this iop - * is currently disabled, return "failure". - */ -static inline int blk_iopoll_sched_prep(struct blk_iopoll *iop) -{ - if (!test_bit(IOPOLL_F_DISABLE, &iop->state)) - return test_and_set_bit(IOPOLL_F_SCHED, &iop->state); - - return 1; -} - -static inline int blk_iopoll_disable_pending(struct blk_iopoll *iop) -{ - return test_bit(IOPOLL_F_DISABLE, &iop->state); -} - -extern void blk_iopoll_sched(struct blk_iopoll *); -extern void blk_iopoll_init(struct blk_iopoll *, int, blk_iopoll_fn *); -extern void blk_iopoll_complete(struct blk_iopoll *); -extern void __blk_iopoll_complete(struct blk_iopoll *); -extern void blk_iopoll_enable(struct blk_iopoll *); -extern void blk_iopoll_disable(struct blk_iopoll *); - -#endif diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index ad16809..7ff98c2 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -412,7 +412,7 @@ enum NET_TX_SOFTIRQ, NET_RX_SOFTIRQ, BLOCK_SOFTIRQ, - BLOCK_IOPOLL_SOFTIRQ, + IRQ_POLL_SOFTIRQ, TASKLET_SOFTIRQ, SCHED_SOFTIRQ, HRTIMER_SOFTIRQ, /* Unused, but kept as tools rely on the diff --git a/include/linux/irq_poll.h b/include/linux/irq_poll.h new file mode 100644 index 0000000..3e8c1b8 --- /dev/null +++ b/include/linux/irq_poll.h @@ -0,0 +1,25 @@ +#ifndef IRQ_POLL_H +#define IRQ_POLL_H + +struct irq_poll; +typedef int (irq_poll_fn)(struct irq_poll *, int); + +struct irq_poll { + struct list_head list; + unsigned long state; + int weight; + irq_poll_fn *poll; +}; + +enum { + IRQ_POLL_F_SCHED = 0, + IRQ_POLL_F_DISABLE = 1, +}; + +extern void irq_poll_sched(struct irq_poll *); +extern void irq_poll_init(struct irq_poll *, int, irq_poll_fn *); +extern void irq_poll_complete(struct irq_poll *); +extern void irq_poll_enable(struct irq_poll *); +extern void irq_poll_disable(struct irq_poll *); + +#endif diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 120da1d..89ef8d1 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -49,6 +49,7 @@ #include <linux/scatterlist.h> #include <linux/workqueue.h> #include <linux/socket.h> +#include <linux/irq_poll.h> #include <uapi/linux/if_ether.h> #include <linux/atomic.h> @@ -56,6 +57,7 @@ #include <asm/uaccess.h> extern struct workqueue_struct *ib_wq; +extern struct workqueue_struct *ib_comp_wq; union ib_gid { u8 raw[16]; @@ -758,7 +760,10 @@ enum ib_wc_flags { }; struct ib_wc { - u64 wr_id; + union { + u64 wr_id; + struct ib_cqe *wr_cqe; + }; enum ib_wc_status status; enum ib_wc_opcode opcode; u32 vendor_err; @@ -1079,9 +1084,16 @@ struct ib_mw_bind_info { int mw_access_flags; }; +struct ib_cqe { + void (*done)(struct ib_cq *cq, struct ib_wc *wc); +}; + struct ib_send_wr { struct ib_send_wr *next; - u64 wr_id; + union { + u64 wr_id; + struct ib_cqe *wr_cqe; + }; struct ib_sge *sg_list; int num_sge; enum ib_wr_opcode opcode; @@ -1175,7 +1187,10 @@ static inline struct ib_sig_handover_wr *sig_handover_wr(struct ib_send_wr *wr) struct ib_recv_wr { struct ib_recv_wr *next; - u64 wr_id; + union { + u64 wr_id; + struct ib_cqe *wr_cqe; + }; struct ib_sge *sg_list; int num_sge; }; @@ -1307,6 +1322,12 @@ struct ib_ah { typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context); +enum ib_poll_context { + IB_POLL_DIRECT, /* caller context, no hw completions */ + IB_POLL_SOFTIRQ, /* poll from softirq context */ + IB_POLL_WORKQUEUE, /* poll from workqueue */ +}; + struct ib_cq { struct ib_device *device; struct ib_uobject *uobject; @@ -1315,6 +1336,12 @@ struct ib_cq { void *cq_context; int cqe; atomic_t usecnt; /* count number of work queues */ + enum ib_poll_context poll_ctx; + struct ib_wc *wc; + union { + struct irq_poll iop; + struct work_struct work; + }; }; struct ib_srq { @@ -2454,6 +2481,11 @@ static inline int ib_post_recv(struct ib_qp *qp, return qp->device->post_recv(qp, recv_wr, bad_recv_wr); } +struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private, + int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx); +void ib_free_cq(struct ib_cq *cq); +int ib_process_cq_direct(struct ib_cq *cq, int budget); + /** * ib_create_cq - Creates a CQ on the specified device. * @device: The device on which to create the CQ. diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index ff8f6c0..f95f25e 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -15,7 +15,7 @@ struct softirq_action; softirq_name(NET_TX) \ softirq_name(NET_RX) \ softirq_name(BLOCK) \ - softirq_name(BLOCK_IOPOLL) \ + softirq_name(IRQ_POLL) \ softirq_name(TASKLET) \ softirq_name(SCHED) \ softirq_name(HRTIMER) \ diff --git a/lib/Kconfig b/lib/Kconfig index f0df318..e00e196 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -475,6 +475,11 @@ config DDR information. This data is useful for drivers handling DDR SDRAM controllers. +config IRQ_POLL + bool "IRQ polling library" + help + Helper library to poll interrupt mitigation using polling. + config MPILIB tristate select CLZ_TAB diff --git a/lib/Makefile b/lib/Makefile index 7f1de26..1478ae2 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -164,6 +164,7 @@ obj-$(CONFIG_GENERIC_NET_UTILS) += net_utils.o obj-$(CONFIG_SG_SPLIT) += sg_split.o obj-$(CONFIG_STMP_DEVICE) += stmp_device.o +obj-$(CONFIG_IRQ_POLL) += irq_poll.o libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \ fdt_empty_tree.o diff --git a/block/blk-iopoll.c b/lib/irq_poll.c index 0736729..2836620 100644 --- a/block/blk-iopoll.c +++ b/lib/irq_poll.c @@ -6,84 +6,84 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/bio.h> -#include <linux/blkdev.h> #include <linux/interrupt.h> #include <linux/cpu.h> -#include <linux/blk-iopoll.h> +#include <linux/irq_poll.h> #include <linux/delay.h> -#include "blk.h" - -static unsigned int blk_iopoll_budget __read_mostly = 256; +static unsigned int irq_poll_budget __read_mostly = 256; static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll); /** - * blk_iopoll_sched - Schedule a run of the iopoll handler + * irq_poll_sched - Schedule a run of the iopoll handler * @iop: The parent iopoll structure * * Description: - * Add this blk_iopoll structure to the pending poll list and trigger the - * raise of the blk iopoll softirq. The driver must already have gotten a - * successful return from blk_iopoll_sched_prep() before calling this. + * Add this irq_poll structure to the pending poll list and trigger the + * raise of the blk iopoll softirq. **/ -void blk_iopoll_sched(struct blk_iopoll *iop) +void irq_poll_sched(struct irq_poll *iop) { unsigned long flags; + if (test_bit(IRQ_POLL_F_DISABLE, &iop->state)) + return; + if (!test_and_set_bit(IRQ_POLL_F_SCHED, &iop->state)) + return; + local_irq_save(flags); list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll)); - __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ); + __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); local_irq_restore(flags); } -EXPORT_SYMBOL(blk_iopoll_sched); +EXPORT_SYMBOL(irq_poll_sched); /** - * __blk_iopoll_complete - Mark this @iop as un-polled again + * __irq_poll_complete - Mark this @iop as un-polled again * @iop: The parent iopoll structure * * Description: - * See blk_iopoll_complete(). This function must be called with interrupts + * See irq_poll_complete(). This function must be called with interrupts * disabled. **/ -void __blk_iopoll_complete(struct blk_iopoll *iop) +static void __irq_poll_complete(struct irq_poll *iop) { list_del(&iop->list); smp_mb__before_atomic(); - clear_bit_unlock(IOPOLL_F_SCHED, &iop->state); + clear_bit_unlock(IRQ_POLL_F_SCHED, &iop->state); } -EXPORT_SYMBOL(__blk_iopoll_complete); /** - * blk_iopoll_complete - Mark this @iop as un-polled again + * irq_poll_complete - Mark this @iop as un-polled again * @iop: The parent iopoll structure * * Description: * If a driver consumes less than the assigned budget in its run of the * iopoll handler, it'll end the polled mode by calling this function. The - * iopoll handler will not be invoked again before blk_iopoll_sched_prep() + * iopoll handler will not be invoked again before irq_poll_sched() * is called. **/ -void blk_iopoll_complete(struct blk_iopoll *iop) +void irq_poll_complete(struct irq_poll *iop) { unsigned long flags; local_irq_save(flags); - __blk_iopoll_complete(iop); + __irq_poll_complete(iop); local_irq_restore(flags); } -EXPORT_SYMBOL(blk_iopoll_complete); +EXPORT_SYMBOL(irq_poll_complete); -static void blk_iopoll_softirq(struct softirq_action *h) +static void irq_poll_softirq(struct softirq_action *h) { struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll); - int rearm = 0, budget = blk_iopoll_budget; + int rearm = 0, budget = irq_poll_budget; unsigned long start_time = jiffies; local_irq_disable(); while (!list_empty(list)) { - struct blk_iopoll *iop; + struct irq_poll *iop; int work, weight; /* @@ -101,11 +101,11 @@ static void blk_iopoll_softirq(struct softirq_action *h) * entries to the tail of this list, and only ->poll() * calls can remove this head entry from the list. */ - iop = list_entry(list->next, struct blk_iopoll, list); + iop = list_entry(list->next, struct irq_poll, list); weight = iop->weight; work = 0; - if (test_bit(IOPOLL_F_SCHED, &iop->state)) + if (test_bit(IRQ_POLL_F_SCHED, &iop->state)) work = iop->poll(iop, weight); budget -= work; @@ -121,72 +121,70 @@ static void blk_iopoll_softirq(struct softirq_action *h) * move the instance around on the list at-will. */ if (work >= weight) { - if (blk_iopoll_disable_pending(iop)) - __blk_iopoll_complete(iop); + if (test_bit(IRQ_POLL_F_DISABLE, &iop->state)) + __irq_poll_complete(iop); else list_move_tail(&iop->list, list); } } if (rearm) - __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ); + __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); local_irq_enable(); } /** - * blk_iopoll_disable - Disable iopoll on this @iop + * irq_poll_disable - Disable iopoll on this @iop * @iop: The parent iopoll structure * * Description: * Disable io polling and wait for any pending callbacks to have completed. **/ -void blk_iopoll_disable(struct blk_iopoll *iop) +void irq_poll_disable(struct irq_poll *iop) { - set_bit(IOPOLL_F_DISABLE, &iop->state); - while (test_and_set_bit(IOPOLL_F_SCHED, &iop->state)) + set_bit(IRQ_POLL_F_DISABLE, &iop->state); + while (test_and_set_bit(IRQ_POLL_F_SCHED, &iop->state)) msleep(1); - clear_bit(IOPOLL_F_DISABLE, &iop->state); + clear_bit(IRQ_POLL_F_DISABLE, &iop->state); } -EXPORT_SYMBOL(blk_iopoll_disable); +EXPORT_SYMBOL(irq_poll_disable); /** - * blk_iopoll_enable - Enable iopoll on this @iop + * irq_poll_enable - Enable iopoll on this @iop * @iop: The parent iopoll structure * * Description: * Enable iopoll on this @iop. Note that the handler run will not be * scheduled, it will only mark it as active. **/ -void blk_iopoll_enable(struct blk_iopoll *iop) +void irq_poll_enable(struct irq_poll *iop) { - BUG_ON(!test_bit(IOPOLL_F_SCHED, &iop->state)); + BUG_ON(!test_bit(IRQ_POLL_F_SCHED, &iop->state)); smp_mb__before_atomic(); - clear_bit_unlock(IOPOLL_F_SCHED, &iop->state); + clear_bit_unlock(IRQ_POLL_F_SCHED, &iop->state); } -EXPORT_SYMBOL(blk_iopoll_enable); +EXPORT_SYMBOL(irq_poll_enable); /** - * blk_iopoll_init - Initialize this @iop + * irq_poll_init - Initialize this @iop * @iop: The parent iopoll structure * @weight: The default weight (or command completion budget) * @poll_fn: The handler to invoke * * Description: - * Initialize this blk_iopoll structure. Before being actively used, the - * driver must call blk_iopoll_enable(). + * Initialize and enable this irq_poll structure. **/ -void blk_iopoll_init(struct blk_iopoll *iop, int weight, blk_iopoll_fn *poll_fn) +void irq_poll_init(struct irq_poll *iop, int weight, irq_poll_fn *poll_fn) { memset(iop, 0, sizeof(*iop)); INIT_LIST_HEAD(&iop->list); iop->weight = weight; iop->poll = poll_fn; - set_bit(IOPOLL_F_SCHED, &iop->state); } -EXPORT_SYMBOL(blk_iopoll_init); +EXPORT_SYMBOL(irq_poll_init); -static int blk_iopoll_cpu_notify(struct notifier_block *self, +static int irq_poll_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { /* @@ -199,26 +197,26 @@ static int blk_iopoll_cpu_notify(struct notifier_block *self, local_irq_disable(); list_splice_init(&per_cpu(blk_cpu_iopoll, cpu), this_cpu_ptr(&blk_cpu_iopoll)); - __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ); + __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); local_irq_enable(); } return NOTIFY_OK; } -static struct notifier_block blk_iopoll_cpu_notifier = { - .notifier_call = blk_iopoll_cpu_notify, +static struct notifier_block irq_poll_cpu_notifier = { + .notifier_call = irq_poll_cpu_notify, }; -static __init int blk_iopoll_setup(void) +static __init int irq_poll_setup(void) { int i; for_each_possible_cpu(i) INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i)); - open_softirq(BLOCK_IOPOLL_SOFTIRQ, blk_iopoll_softirq); - register_hotcpu_notifier(&blk_iopoll_cpu_notifier); + open_softirq(IRQ_POLL_SOFTIRQ, irq_poll_softirq); + register_hotcpu_notifier(&irq_poll_cpu_notifier); return 0; } -subsys_initcall(blk_iopoll_setup); +subsys_initcall(irq_poll_setup); diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 2a912df..af5a316 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -3746,7 +3746,7 @@ static const struct flag flags[] = { { "NET_TX_SOFTIRQ", 2 }, { "NET_RX_SOFTIRQ", 3 }, { "BLOCK_SOFTIRQ", 4 }, - { "BLOCK_IOPOLL_SOFTIRQ", 5 }, + { "IRQ_POLL_SOFTIRQ", 5 }, { "TASKLET_SOFTIRQ", 6 }, { "SCHED_SOFTIRQ", 7 }, { "HRTIMER_SOFTIRQ", 8 }, diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 8ff7d62..33b52ea 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -209,7 +209,7 @@ static const struct flag flags[] = { { "NET_TX_SOFTIRQ", 2 }, { "NET_RX_SOFTIRQ", 3 }, { "BLOCK_SOFTIRQ", 4 }, - { "BLOCK_IOPOLL_SOFTIRQ", 5 }, + { "IRQ_POLL_SOFTIRQ", 5 }, { "TASKLET_SOFTIRQ", 6 }, { "SCHED_SOFTIRQ", 7 }, { "HRTIMER_SOFTIRQ", 8 }, |