From a00ebd1cf12c378a1d4f7a1d6daf1d76c1eaad82 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 11 May 2017 10:21:46 +0200 Subject: drbd: fix request leak introduced by locking/atomic, kref: Kill kref_sub() When killing kref_sub(), the unconditional additional kref_get() was not properly paired with the necessary kref_put(), causing a leak of struct drbd_requests (~ 224 Bytes) per submitted bio, and breaking DRBD in general, as the destructor of those "drbd_requests" does more than just the mempoll_free(). Fixes: bdfafc4ffdd2 ("locking/atomic, kref: Kill kref_sub()") Signed-off-by: Lars Ellenberg Cc: stable@vger.kernel.org # v4.11 Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_req.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index b5730e1..6566243 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -315,24 +315,32 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m) } /* still holds resource->req_lock */ -static int drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_error *m, int put) +static void drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_error *m, int put) { struct drbd_device *device = req->device; D_ASSERT(device, m || (req->rq_state & RQ_POSTPONED)); + if (!put) + return; + if (!atomic_sub_and_test(put, &req->completion_ref)) - return 0; + return; drbd_req_complete(req, m); + /* local completion may still come in later, + * we need to keep the req object around. */ + if (req->rq_state & RQ_LOCAL_ABORTED) + return; + if (req->rq_state & RQ_POSTPONED) { /* don't destroy the req object just yet, * but queue it for retry */ drbd_restart_request(req); - return 0; + return; } - return 1; + kref_put(&req->kref, drbd_req_destroy); } static void set_if_null_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req) @@ -519,12 +527,8 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, if (req->i.waiting) wake_up(&device->misc_wait); - if (c_put) { - if (drbd_req_put_completion_ref(req, m, c_put)) - kref_put(&req->kref, drbd_req_destroy); - } else { - kref_put(&req->kref, drbd_req_destroy); - } + drbd_req_put_completion_ref(req, m, c_put); + kref_put(&req->kref, drbd_req_destroy); } static void drbd_report_io_error(struct drbd_device *device, struct drbd_request *req) @@ -1366,8 +1370,7 @@ nodata: } out: - if (drbd_req_put_completion_ref(req, &m, 1)) - kref_put(&req->kref, drbd_req_destroy); + drbd_req_put_completion_ref(req, &m, 1); spin_unlock_irq(&resource->req_lock); /* Even though above is a kref_put(), this is safe. -- cgit v1.1 From 2d4456c73a487abe53863e10641c2f73537edf5c Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 11 May 2017 10:27:35 -0500 Subject: block: xen-blkback: add null check to avoid null pointer dereference Add null check before calling xen_blkif_put() to avoid potential null pointer dereference. Addresses-Coverity-ID: 1350942 Cc: Juergen Gross Signed-off-by: Gustavo A. R. Silva Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/xenbus.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 8fe61b5..1f3dfaa 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -504,11 +504,13 @@ static int xen_blkbk_remove(struct xenbus_device *dev) dev_set_drvdata(&dev->dev, NULL); - if (be->blkif) + if (be->blkif) { xen_blkif_disconnect(be->blkif); - /* Put the reference we set in xen_blkif_alloc(). */ - xen_blkif_put(be->blkif); + /* Put the reference we set in xen_blkif_alloc(). */ + xen_blkif_put(be->blkif); + } + kfree(be->mode); kfree(be); return 0; -- cgit v1.1 From 69c8ebf83213e6165b13d94ec599b861467ee2dc Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 16 May 2017 12:22:22 +0200 Subject: fuseblk: Fix warning in super_setup_bdi_name() Commit 5f7f7543f52e "fuse: Convert to separately allocated bdi" didn't properly handle fuseblk filesystem. When fuse_bdi_init() is called for that filesystem type, sb->s_bdi is already initialized (by set_bdev_super()) to point to block device's bdi and consequently super_setup_bdi_name() complains about this fact when reseting bdi to the private one. Fix the problem by properly dropping bdi reference in fuse_bdi_init() before creating a private bdi in super_setup_bdi_name(). Fixes: 5f7f7543f52e ("fuse: Convert to separately allocated bdi") Reported-by: Rakesh Pandit Tested-by: Rakesh Pandit Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- fs/fuse/inode.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 73cf051..9da1a61 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -972,8 +972,15 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) int err; char *suffix = ""; - if (sb->s_bdev) + if (sb->s_bdev) { suffix = "-fuseblk"; + /* + * sb->s_bdi points to blkdev's bdi however we want to redirect + * it to our private bdi... + */ + bdi_put(sb->s_bdi); + sb->s_bdi = &noop_backing_dev_info; + } err = super_setup_bdi_name(sb, "%u:%u%s", MAJOR(fc->dev), MINOR(fc->dev), suffix); if (err) -- cgit v1.1 From 5f3394530fbe90d3bcd1c204618960bc50236578 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 19 May 2017 08:04:59 -0700 Subject: blktrace: fix integer parse sscanf is a very poor way to parse integer. For example, I input "discard" for act_mask, it gets 0xd and completely messes up. Using correct API to do integer parse. This patch also makes attributes accept any base of integer. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- kernel/trace/blktrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index bd8ae8d..193c5f5 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1662,14 +1662,14 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, goto out; if (attr == &dev_attr_act_mask) { - if (sscanf(buf, "%llx", &value) != 1) { + if (kstrtoull(buf, 0, &value)) { /* Assume it is a list of trace category names */ ret = blk_trace_str2mask(buf); if (ret < 0) goto out; value = ret; } - } else if (sscanf(buf, "%llu", &value) != 1) + } else if (kstrtoull(buf, 0, &value)) goto out; ret = -ENXIO; -- cgit v1.1 From f63572dff1421b6ca6abce71d46e03411e605c94 Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Fri, 5 May 2017 14:52:06 -0600 Subject: nvme: unmap CMB and remove sysfs file in reset path CMB doesn't get unmapped until removal while getting remapped on every reset. Add the unmapping and sysfs file removal to the reset path in nvme_pci_disable to match the mapping path in nvme_pci_enable. Fixes: 202021c1a ("nvme : Add sysfs entry for NVMe CMBs when appropriate") Signed-off-by: Jon Derrick Acked-by: Keith Busch Reviewed-By: Stephen Bates Cc: # 4.9+ Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 56a315b..0866f64 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1510,6 +1510,11 @@ static inline void nvme_release_cmb(struct nvme_dev *dev) if (dev->cmb) { iounmap(dev->cmb); dev->cmb = NULL; + if (dev->cmbsz) { + sysfs_remove_file_from_group(&dev->ctrl.device->kobj, + &dev_attr_cmb.attr, NULL); + dev->cmbsz = 0; + } } } @@ -1783,6 +1788,7 @@ static void nvme_pci_disable(struct nvme_dev *dev) { struct pci_dev *pdev = to_pci_dev(dev->dev); + nvme_release_cmb(dev); pci_free_irq_vectors(pdev); if (pci_is_enabled(pdev)) { @@ -2188,7 +2194,6 @@ static void nvme_remove(struct pci_dev *pdev) nvme_dev_disable(dev, true); nvme_dev_remove_admin(dev); nvme_free_queues(dev, 0); - nvme_release_cmb(dev); nvme_release_prp_pools(dev); nvme_dev_unmap(dev); nvme_put_ctrl(&dev->ctrl); -- cgit v1.1 From 4123109050a869a8871e58a50f28f383d41e49ad Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 5 May 2017 16:13:02 -0700 Subject: nvme-fc: correct port role bits FC Port roles is a bit mask, not individual values. Correct nvme definitions to unique bits. Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme-fc-driver.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 0db3715..12e344b 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -27,8 +27,8 @@ /* FC Port role bitmask - can merge with FC Port Roles in fc transport */ #define FC_PORT_ROLE_NVME_INITIATOR 0x10 -#define FC_PORT_ROLE_NVME_TARGET 0x11 -#define FC_PORT_ROLE_NVME_DISCOVERY 0x12 +#define FC_PORT_ROLE_NVME_TARGET 0x20 +#define FC_PORT_ROLE_NVME_DISCOVERY 0x40 /** -- cgit v1.1 From 85e6a6adf8de7f992e01d2c3c59d9875d658b276 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 5 May 2017 16:13:15 -0700 Subject: nvme-fc: require target or discovery role for fc-nvme targets In order to create an association, the remoteport must be serving either a target role or a discovery role. Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 70e689b..912d457 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2720,6 +2720,12 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, unsigned long flags; int ret, idx; + if (!(rport->remoteport.port_role & + (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) { + ret = -EBADR; + goto out_fail; + } + ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); if (!ctrl) { ret = -ENOMEM; -- cgit v1.1 From 2952a879bacbfae8b03fd886754e64fe14b8041e Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 25 Apr 2017 15:32:01 -0700 Subject: nvme-fc: stop queues on error detection Per the recommendation by Sagi on: http://lists.infradead.org/pipermail/linux-nvme/2017-April/009261.html Rather than waiting for reset work thread to stop queues and abort the ios, immediately stop the queues on error detection. Reset thread will restop the queues (as it's called on other paths), but it does not appear to have a side effect. Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 912d457..dca7165 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1754,6 +1754,10 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) dev_info(ctrl->ctrl.device, "NVME-FC{%d}: resetting controller\n", ctrl->cnum); + /* stop the queues on error, cleanup is in reset thread */ + if (ctrl->queue_count > 1) + nvme_stop_queues(&ctrl->ctrl); + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) { dev_err(ctrl->ctrl.device, "NVME-FC{%d}: error_recovery: Couldn't change state " -- cgit v1.1 From 4b8ba5fa525bc8bdaaed2a5c5433f0f2008d7bc5 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 25 Apr 2017 16:23:09 -0700 Subject: nvmet-fc: remove target cpu scheduling flag Remove NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED. It's unnecessary. Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/fc.c | 4 +--- drivers/nvme/target/fcloop.c | 1 - drivers/scsi/lpfc/lpfc_nvmet.c | 1 - include/linux/nvme-fc-driver.h | 12 ++---------- 4 files changed, 3 insertions(+), 15 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 62eba29..2006fae 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -517,9 +517,7 @@ nvmet_fc_queue_to_cpu(struct nvmet_fc_tgtport *tgtport, int qid) { int cpu, idx, cnt; - if (!(tgtport->ops->target_features & - NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED) || - tgtport->ops->max_hw_queues == 1) + if (tgtport->ops->max_hw_queues == 1) return WORK_CPU_UNBOUND; /* Simple cpu selection based on qid modulo active cpu count */ diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 15551ef..294a661 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -698,7 +698,6 @@ static struct nvmet_fc_target_template tgttemplate = { .dma_boundary = FCLOOP_DMABOUND_4G, /* optional features */ .target_features = NVMET_FCTGTFEAT_CMD_IN_ISR | - NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED | NVMET_FCTGTFEAT_OPDONE_IN_ISR, /* sizes of additional private data for data structures */ .target_priv_sz = sizeof(struct fcloop_tport), diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 94434e6..0488580 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -764,7 +764,6 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba) lpfc_tgttemplate.max_sgl_segments = phba->cfg_nvme_seg_cnt + 1; lpfc_tgttemplate.max_hw_queues = phba->cfg_nvme_io_channel; lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP | - NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED | NVMET_FCTGTFEAT_CMD_IN_ISR | NVMET_FCTGTFEAT_OPDONE_IN_ISR; diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 12e344b..6c8c5d8 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -642,15 +642,7 @@ enum { * sequence in one LLDD operation. Errors during Data * sequence transmit must not allow RSP sequence to be sent. */ - NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED = (1 << 1), - /* Bit 1: When 0, the LLDD will deliver FCP CMD - * on the CPU it should be affinitized to. Thus work will - * be scheduled on the cpu received on. When 1, the LLDD - * may not deliver the CMD on the CPU it should be worked - * on. The transport should pick a cpu to schedule the work - * on. - */ - NVMET_FCTGTFEAT_CMD_IN_ISR = (1 << 2), + NVMET_FCTGTFEAT_CMD_IN_ISR = (1 << 1), /* Bit 2: When 0, the LLDD is calling the cmd rcv handler * in a non-isr context, allowing the transport to finish * op completion in the calling context. When 1, the LLDD @@ -658,7 +650,7 @@ enum { * requiring the transport to transition to a workqueue * for op completion. */ - NVMET_FCTGTFEAT_OPDONE_IN_ISR = (1 << 3), + NVMET_FCTGTFEAT_OPDONE_IN_ISR = (1 << 2), /* Bit 3: When 0, the LLDD is calling the op done handler * in a non-isr context, allowing the transport to finish * op completion in the calling context. When 1, the LLDD -- cgit v1.1 From 549f01ae7b913355bea76100d3f17694bc9ec769 Mon Sep 17 00:00:00 2001 From: Vijay Immanuel Date: Mon, 8 May 2017 16:38:35 -0700 Subject: nvmet: release the sq ref on rdma read errors On rdma read errors, release the sq ref that was taken when the req was initialized. This avoids a hang in nvmet_sq_destroy() when the queue is being freed. Signed-off-by: Vijay Immanuel Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/core.c | 6 ++++++ drivers/nvme/target/nvmet.h | 1 + drivers/nvme/target/rdma.c | 1 + 3 files changed, 8 insertions(+) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index cf90713..eb9399a 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -529,6 +529,12 @@ fail: } EXPORT_SYMBOL_GPL(nvmet_req_init); +void nvmet_req_uninit(struct nvmet_req *req) +{ + percpu_ref_put(&req->sq->ref); +} +EXPORT_SYMBOL_GPL(nvmet_req_uninit); + static inline bool nvmet_cc_en(u32 cc) { return cc & 0x1; diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 7cb77ba..cfc5c7f 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -261,6 +261,7 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req); bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, struct nvmet_sq *sq, struct nvmet_fabrics_ops *ops); +void nvmet_req_uninit(struct nvmet_req *req); void nvmet_req_complete(struct nvmet_req *req, u16 status); void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq, u16 qid, diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 99c6901..9e45cde 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -567,6 +567,7 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc) rsp->n_rdma = 0; if (unlikely(wc->status != IB_WC_SUCCESS)) { + nvmet_req_uninit(&rsp->req); nvmet_rdma_release_rsp(rsp); if (wc->status != IB_WC_WR_FLUSH_ERR) { pr_info("RDMA READ for CQE 0x%p failed with status %s (%d).\n", -- cgit v1.1