From 8a82dbf19129dde9e6fc9ab25a00dbc7569abe6a Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 9 Oct 2017 13:39:44 -0700 Subject: nvme-fc: fix iowait hang MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add missing iowait head initialization. Fix irqsave vs irq: wait_event_lock_irq() doesn't do irq save/restore Fixes: 36715cf4b366 ("nvme_fc: replace ioabort msleep loop with completion”) Cc: # 4.13 Signed-off-by: James Smart Reviewed-by: Sagi Grimberg Reviewed-by: Himanshu Madhani Tested-by: Himanshu Madhani Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/nvme/host/fc.c') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index af075e9..8182b19 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2545,10 +2545,10 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) nvme_fc_abort_aen_ops(ctrl); /* wait for all io that had to be aborted */ - spin_lock_irqsave(&ctrl->lock, flags); + spin_lock_irq(&ctrl->lock); wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock); ctrl->flags &= ~FCCTRL_TERMIO; - spin_unlock_irqrestore(&ctrl->lock, flags); + spin_unlock_irq(&ctrl->lock); nvme_fc_term_aen_ops(ctrl); @@ -2760,6 +2760,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->rport = rport; ctrl->dev = lport->dev; ctrl->cnum = idx; + init_waitqueue_head(&ctrl->ioabort_wait); get_device(ctrl->dev); kref_init(&ctrl->ref); -- cgit v1.1 From 17c4dc6eb7e1b2fb1ce6a52467e3be635224606e Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 9 Oct 2017 16:39:22 -0700 Subject: nvme-fc: retry initial controller connections 3 times Currently, if a frame is lost of command fails as part of initial association create for a new controller, the new controller connection request will immediately fail. Add in an immediate 3 retry loop before giving up. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) (limited to 'drivers/nvme/host/fc.c') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 8182b19..be49d0f 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2734,7 +2734,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, { struct nvme_fc_ctrl *ctrl; unsigned long flags; - int ret, idx; + int ret, idx, retry; if (!(rport->remoteport.port_role & (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) { @@ -2826,9 +2826,37 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list); spin_unlock_irqrestore(&rport->lock, flags); - ret = nvme_fc_create_association(ctrl); + /* + * It's possible that transactions used to create the association + * may fail. Examples: CreateAssociation LS or CreateIOConnection + * LS gets dropped/corrupted/fails; or a frame gets dropped or a + * command times out for one of the actions to init the controller + * (Connect, Get/Set_Property, Set_Features, etc). Many of these + * transport errors (frame drop, LS failure) inherently must kill + * the association. The transport is coded so that any command used + * to create the association (prior to a LIVE state transition + * while NEW or RECONNECTING) will fail if it completes in error or + * times out. + * + * As such: as the connect request was mostly likely due to a + * udev event that discovered the remote port, meaning there is + * not an admin or script there to restart if the connect + * request fails, retry the initial connection creation up to + * three times before giving up and declaring failure. + */ + for (retry = 0; retry < 3; retry++) { + ret = nvme_fc_create_association(ctrl); + if (!ret) + break; + } + if (ret) { + /* couldn't schedule retry - fail out */ + dev_err(ctrl->ctrl.device, + "NVME-FC{%d}: Connect retry failed\n", ctrl->cnum); + ctrl->ctrl.opts = NULL; + /* initiate nvme ctrl ref counting teardown */ nvme_uninit_ctrl(&ctrl->ctrl); nvme_put_ctrl(&ctrl->ctrl); -- cgit v1.1