diff options
author | jimharris <jimharris@FreeBSD.org> | 2013-03-26 19:50:46 +0000 |
---|---|---|
committer | jimharris <jimharris@FreeBSD.org> | 2013-03-26 19:50:46 +0000 |
commit | 93fd264895a68a10d395cb75c7f67339f8811d4a (patch) | |
tree | 82359dedd1ed281aa3fd524aef64c2d6bb5a5a28 /sys/dev/nvme | |
parent | bd33256583e92ae27c4215f57aa7bbdee3d50799 (diff) | |
download | FreeBSD-src-93fd264895a68a10d395cb75c7f67339f8811d4a.zip FreeBSD-src-93fd264895a68a10d395cb75c7f67339f8811d4a.tar.gz |
Add controller reset capability to nvme(4) and ability to explicitly
invoke it from nvmecontrol(8).
Controller reset will be performed in cases where I/O are repeatedly
timing out, the controller reports an unrecoverable condition, or
when explicitly requested via IOCTL or an nvme consumer. Since the
controller may be in such a state where it cannot even process queue
deletion requests, we will perform a controller reset without trying
to clean up anything on the controller first.
Sponsored by: Intel
Reviewed by: carl
Diffstat (limited to 'sys/dev/nvme')
-rw-r--r-- | sys/dev/nvme/nvme.c | 6 | ||||
-rw-r--r-- | sys/dev/nvme/nvme.h | 1 | ||||
-rw-r--r-- | sys/dev/nvme/nvme_ctrlr.c | 64 | ||||
-rw-r--r-- | sys/dev/nvme/nvme_ns.c | 17 | ||||
-rw-r--r-- | sys/dev/nvme/nvme_private.h | 18 | ||||
-rw-r--r-- | sys/dev/nvme/nvme_qpair.c | 207 |
6 files changed, 214 insertions, 99 deletions
diff --git a/sys/dev/nvme/nvme.c b/sys/dev/nvme/nvme.c index b5e010e..7630a1d 100644 --- a/sys/dev/nvme/nvme.c +++ b/sys/dev/nvme/nvme.c @@ -255,7 +255,7 @@ nvme_payload_map(void *arg, bus_dma_segment_t *seg, int nseg, int error) } } - nvme_qpair_submit_cmd(tr->qpair, tr); + nvme_qpair_submit_tracker(tr->qpair, tr); } static int @@ -274,11 +274,11 @@ nvme_attach(device_t dev) * to cc.en==0. This is because we don't really know what status * the controller was left in when boot handed off to OS. */ - status = nvme_ctrlr_reset(ctrlr); + status = nvme_ctrlr_hw_reset(ctrlr); if (status != 0) return (status); - status = nvme_ctrlr_reset(ctrlr); + status = nvme_ctrlr_hw_reset(ctrlr); if (status != 0) return (status); diff --git a/sys/dev/nvme/nvme.h b/sys/dev/nvme/nvme.h index a9d214e..654294a 100644 --- a/sys/dev/nvme/nvme.h +++ b/sys/dev/nvme/nvme.h @@ -37,6 +37,7 @@ #define NVME_IDENTIFY_NAMESPACE _IOR('n', 1, struct nvme_namespace_data) #define NVME_IO_TEST _IOWR('n', 2, struct nvme_io_test) #define NVME_BIO_TEST _IOWR('n', 4, struct nvme_io_test) +#define NVME_RESET_CONTROLLER _IO('n', 5) /* * Use to mark a command to apply to all namespaces, or to retrieve global diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c index 668ac7f..4a4956d 100644 --- a/sys/dev/nvme/nvme_ctrlr.c +++ b/sys/dev/nvme/nvme_ctrlr.c @@ -405,13 +405,31 @@ nvme_ctrlr_enable(struct nvme_controller *ctrlr) } int -nvme_ctrlr_reset(struct nvme_controller *ctrlr) +nvme_ctrlr_hw_reset(struct nvme_controller *ctrlr) { + int i; + + nvme_admin_qpair_disable(&ctrlr->adminq); + for (i = 0; i < ctrlr->num_io_queues; i++) + nvme_io_qpair_disable(&ctrlr->ioq[i]); + + DELAY(100*1000); nvme_ctrlr_disable(ctrlr); return (nvme_ctrlr_enable(ctrlr)); } +void +nvme_ctrlr_reset(struct nvme_controller *ctrlr) +{ + int status; + + status = nvme_ctrlr_hw_reset(ctrlr); + DELAY(100*1000); + if (status == 0) + nvme_ctrlr_start(ctrlr); +} + static int nvme_ctrlr_identify(struct nvme_controller *ctrlr) { @@ -626,6 +644,9 @@ void nvme_ctrlr_start(void *ctrlr_arg) { struct nvme_controller *ctrlr = ctrlr_arg; + int i; + + nvme_admin_qpair_enable(&ctrlr->adminq); if (nvme_ctrlr_identify(ctrlr) != 0) goto err; @@ -642,16 +663,26 @@ nvme_ctrlr_start(void *ctrlr_arg) nvme_ctrlr_configure_aer(ctrlr); nvme_ctrlr_configure_int_coalescing(ctrlr); + for (i = 0; i < ctrlr->num_io_queues; i++) + nvme_io_qpair_enable(&ctrlr->ioq[i]); + ctrlr->is_started = TRUE; err: - /* - * Initialize sysctls, even if controller failed to start, to - * assist with debugging admin queue pair. - */ - nvme_sysctl_initialize_ctrlr(ctrlr); - config_intrhook_disestablish(&ctrlr->config_hook); + if (ctrlr->num_start_attempts == 0) { + /* + * Initialize sysctls, even if controller failed to start, to + * assist with debugging admin queue pair. Only run this + * code on the initial start attempt though, and not + * subsequent start attempts due to controller-level resets. + * + */ + nvme_sysctl_initialize_ctrlr(ctrlr); + config_intrhook_disestablish(&ctrlr->config_hook); + } + + ctrlr->num_start_attempts++; } static void @@ -730,6 +761,9 @@ nvme_ctrlr_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag, return (ENXIO); memcpy(arg, &ctrlr->cdata, sizeof(ctrlr->cdata)); break; + case NVME_RESET_CONTROLLER: + nvme_ctrlr_reset(ctrlr); + break; default: return (ENOTTY); } @@ -752,6 +786,7 @@ nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev) ctrlr->dev = dev; ctrlr->is_started = FALSE; + ctrlr->num_start_attempts = 0; status = nvme_ctrlr_allocate_bar(ctrlr); @@ -835,14 +870,10 @@ intx: void nvme_ctrlr_destruct(struct nvme_controller *ctrlr, device_t dev) { - struct nvme_namespace *ns; int i; - for (i = 0; i < NVME_MAX_NAMESPACES; i++) { - ns = &ctrlr->ns[i]; - if (ns->cdev) - destroy_dev(ns->cdev); - } + for (i = 0; i < NVME_MAX_NAMESPACES; i++) + nvme_ns_destruct(&ctrlr->ns[i]); if (ctrlr->cdev) destroy_dev(ctrlr->cdev); @@ -853,13 +884,6 @@ nvme_ctrlr_destruct(struct nvme_controller *ctrlr, device_t dev) free(ctrlr->ioq, M_NVME); - /* Manually abort outstanding async event requests. */ - for (i = 0; i < ctrlr->num_aers; i++) { - nvme_qpair_manual_abort_request(&ctrlr->adminq, - ctrlr->aer[i].req, NVME_SCT_GENERIC, - NVME_SC_ABORTED_SQ_DELETION, FALSE); - } - nvme_admin_qpair_destroy(&ctrlr->adminq); if (ctrlr->resource != NULL) { diff --git a/sys/dev/nvme/nvme_ns.c b/sys/dev/nvme/nvme_ns.c index a7d7c6f..27ff504 100644 --- a/sys/dev/nvme/nvme_ns.c +++ b/sys/dev/nvme/nvme_ns.c @@ -345,6 +345,13 @@ nvme_ns_construct(struct nvme_namespace *ns, uint16_t id, if (ctrlr->cdata.vwc.present) ns->flags |= NVME_NS_FLUSH_SUPPORTED; + /* + * cdev may have already been created, if we are reconstructing the + * namespace after a controller-level reset. + */ + if (ns->cdev != NULL) + return (0); + /* * MAKEDEV_ETERNAL was added in r210923, for cdevs that will never * be destroyed. This avoids refcounting on the cdev object. @@ -361,9 +368,15 @@ nvme_ns_construct(struct nvme_namespace *ns, uint16_t id, device_get_unit(ctrlr->dev), ns->id); #endif - if (ns->cdev) { + if (ns->cdev != NULL) ns->cdev->si_drv1 = ns; - } return (0); } + +void nvme_ns_destruct(struct nvme_namespace *ns) +{ + + if (ns->cdev != NULL) + destroy_dev(ns->cdev); +} diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h index 4876bd5..695ce5e 100644 --- a/sys/dev/nvme/nvme_private.h +++ b/sys/dev/nvme/nvme_private.h @@ -180,6 +180,8 @@ struct nvme_qpair { struct nvme_tracker **act_tr; + boolean_t is_enabled; + struct mtx lock __aligned(CACHE_LINE_SIZE); } __aligned(CACHE_LINE_SIZE); @@ -233,6 +235,7 @@ struct nvme_controller { struct intr_config_hook config_hook; uint32_t ns_identified; uint32_t queues_created; + uint32_t num_start_attempts; /* For shared legacy interrupt. */ int rid; @@ -361,7 +364,8 @@ void nvme_payload_map_uio(void *arg, bus_dma_segment_t *seg, int nseg, int nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev); void nvme_ctrlr_destruct(struct nvme_controller *ctrlr, device_t dev); -int nvme_ctrlr_reset(struct nvme_controller *ctrlr); +int nvme_ctrlr_hw_reset(struct nvme_controller *ctrlr); +void nvme_ctrlr_reset(struct nvme_controller *ctrlr); /* ctrlr defined as void * to allow use with config_intrhook. */ void nvme_ctrlr_start(void *ctrlr_arg); void nvme_ctrlr_submit_admin_request(struct nvme_controller *ctrlr, @@ -373,21 +377,23 @@ void nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id, uint16_t vector, uint32_t num_entries, uint32_t num_trackers, uint32_t max_xfer_size, struct nvme_controller *ctrlr); -void nvme_qpair_submit_cmd(struct nvme_qpair *qpair, - struct nvme_tracker *tr); +void nvme_qpair_submit_tracker(struct nvme_qpair *qpair, + struct nvme_tracker *tr); void nvme_qpair_process_completions(struct nvme_qpair *qpair); void nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req); -void nvme_qpair_manual_abort_request(struct nvme_qpair *qpair, - struct nvme_request *req, uint32_t sct, - uint32_t sc, boolean_t print_on_error); +void nvme_admin_qpair_enable(struct nvme_qpair *qpair); +void nvme_admin_qpair_disable(struct nvme_qpair *qpair); void nvme_admin_qpair_destroy(struct nvme_qpair *qpair); +void nvme_io_qpair_enable(struct nvme_qpair *qpair); +void nvme_io_qpair_disable(struct nvme_qpair *qpair); void nvme_io_qpair_destroy(struct nvme_qpair *qpair); int nvme_ns_construct(struct nvme_namespace *ns, uint16_t id, struct nvme_controller *ctrlr); +void nvme_ns_destruct(struct nvme_namespace *ns); int nvme_ns_physio(struct cdev *dev, struct uio *uio, int ioflag); diff --git a/sys/dev/nvme/nvme_qpair.c b/sys/dev/nvme/nvme_qpair.c index 25b1a89..f98125f 100644 --- a/sys/dev/nvme/nvme_qpair.c +++ b/sys/dev/nvme/nvme_qpair.c @@ -87,23 +87,6 @@ nvme_completion_is_retry(const struct nvme_completion *cpl) } } -static struct nvme_tracker * -nvme_qpair_find_tracker(struct nvme_qpair *qpair, struct nvme_request *req) -{ - struct nvme_tracker *tr; - uint32_t i; - - KASSERT(req != NULL, ("%s: called with NULL req\n", __func__)); - - for (i = 0; i < qpair->num_entries; ++i) { - tr = qpair->act_tr[i]; - if (tr != NULL && tr->req == req) - return (tr); - } - - return (NULL); -} - static void nvme_qpair_construct_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr, uint16_t cid) @@ -147,7 +130,7 @@ nvme_qpair_complete_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr, callout_stop(&tr->timer); if (retry) - nvme_qpair_submit_cmd(qpair, tr); + nvme_qpair_submit_tracker(qpair, tr); else { if (req->payload_size > 0 || req->uio != NULL) bus_dmamap_unload(qpair->dma_tag, @@ -169,6 +152,21 @@ nvme_qpair_complete_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr, mtx_unlock(&qpair->lock); } +static void +nvme_qpair_manual_complete_tracker(struct nvme_qpair *qpair, + struct nvme_tracker *tr, uint32_t sct, uint32_t sc, + boolean_t print_on_error) +{ + struct nvme_completion cpl; + + memset(&cpl, 0, sizeof(cpl)); + cpl.sqid = qpair->id; + cpl.cid = tr->cid; + cpl.sf_sct = sct; + cpl.sf_sc = sc; + nvme_qpair_complete_tracker(qpair, tr, &cpl, print_on_error); +} + void nvme_qpair_process_completions(struct nvme_qpair *qpair) { @@ -177,6 +175,15 @@ nvme_qpair_process_completions(struct nvme_qpair *qpair) qpair->num_intr_handler_calls++; + if (!qpair->is_enabled) + /* + * qpair is not enabled, likely because a controller reset is + * is in progress. Ignore the interrupt - any I/O that was + * associated with this interrupt will get retried when the + * reset is complete. + */ + return; + while (1) { cpl = &qpair->cpl[qpair->cq_head]; @@ -236,15 +243,6 @@ nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id, qpair->max_xfer_size = max_xfer_size; qpair->ctrlr = ctrlr; - /* - * First time through the completion queue, HW will set phase - * bit on completions to 1. So set this to 1 here, indicating - * we're looking for a 1 to know which entries have completed. - * we'll toggle the bit each time when the completion queue - * rolls over. - */ - qpair->phase = 1; - if (ctrlr->msix_enabled) { /* @@ -271,7 +269,6 @@ nvme_qpair_construct(struct nvme_qpair *qpair, uint32_t id, qpair->num_cmds = 0; qpair->num_intr_handler_calls = 0; - qpair->sq_head = qpair->sq_tail = qpair->cq_head = 0; /* TODO: error checking on contigmalloc, bus_dmamap_load calls */ qpair->cmd = contigmalloc(qpair->num_entries * @@ -341,10 +338,30 @@ nvme_qpair_destroy(struct nvme_qpair *qpair) } } +static void +nvme_admin_qpair_abort_aers(struct nvme_qpair *qpair) +{ + struct nvme_tracker *tr; + + tr = TAILQ_FIRST(&qpair->outstanding_tr); + while (tr != NULL) { + if (tr->req->cmd.opc == NVME_OPC_ASYNC_EVENT_REQUEST) { + nvme_qpair_manual_complete_tracker(qpair, tr, + NVME_SCT_GENERIC, NVME_SC_ABORTED_SQ_DELETION, + FALSE); + tr = TAILQ_FIRST(&qpair->outstanding_tr); + } else { + tr = TAILQ_NEXT(tr, tailq); + } + } +} + void nvme_admin_qpair_destroy(struct nvme_qpair *qpair) { + nvme_admin_qpair_abort_aers(qpair); + /* * For NVMe, you don't send delete queue commands for the admin * queue, so we just need to unload and free the cmd and cpl memory. @@ -413,39 +430,6 @@ nvme_io_qpair_destroy(struct nvme_qpair *qpair) } static void -nvme_qpair_manual_abort_tracker(struct nvme_qpair *qpair, - struct nvme_tracker *tr, uint32_t sct, uint32_t sc, - boolean_t print_on_error) -{ - struct nvme_completion cpl; - - memset(&cpl, 0, sizeof(cpl)); - cpl.sqid = qpair->id; - cpl.cid = tr->cid; - cpl.sf_sct = sct; - cpl.sf_sc = sc; - nvme_qpair_complete_tracker(qpair, tr, &cpl, print_on_error); -} - -void -nvme_qpair_manual_abort_request(struct nvme_qpair *qpair, - struct nvme_request *req, uint32_t sct, uint32_t sc, - boolean_t print_on_error) -{ - struct nvme_tracker *tr; - - tr = nvme_qpair_find_tracker(qpair, req); - - if (tr == NULL) { - printf("%s: request not found\n", __func__); - nvme_dump_command(&req->cmd); - return; - } - - nvme_qpair_manual_abort_tracker(qpair, tr, sct, sc, print_on_error); -} - -static void nvme_abort_complete(void *arg, const struct nvme_completion *status) { struct nvme_tracker *tr = arg; @@ -463,7 +447,7 @@ nvme_abort_complete(void *arg, const struct nvme_completion *status) * status, and then complete the I/O's tracker manually. */ printf("abort command failed, aborting command manually\n"); - nvme_qpair_manual_abort_tracker(tr->qpair, tr, + nvme_qpair_manual_complete_tracker(tr->qpair, tr, NVME_SCT_GENERIC, NVME_SC_ABORTED_BY_REQUEST, TRUE); } } @@ -478,10 +462,12 @@ nvme_timeout(void *arg) } void -nvme_qpair_submit_cmd(struct nvme_qpair *qpair, struct nvme_tracker *tr) +nvme_qpair_submit_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr) { struct nvme_request *req; + mtx_assert(&qpair->lock, MA_OWNED); + req = tr->req; req->cmd.cid = tr->cid; qpair->act_tr[tr->cid] = tr; @@ -517,11 +503,14 @@ _nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req) tr = TAILQ_FIRST(&qpair->free_tr); - if (tr == NULL) { + if (tr == NULL || !qpair->is_enabled) { /* - * No tracker is available. Put the request on the qpair's - * request queue to be processed when a tracker frees up - * via a command completion. + * No tracker is available, or the qpair is disabled due to + * an in-progress controller-level reset. + * + * Put the request on the qpair's request queue to be processed + * when a tracker frees up via a command completion or when + * the controller reset is completed. */ STAILQ_INSERT_TAIL(&qpair->queued_req, req, stailq); return; @@ -540,7 +529,7 @@ _nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req) if (err != 0) panic("bus_dmamap_load returned non-zero!\n"); } else - nvme_qpair_submit_cmd(tr->qpair, tr); + nvme_qpair_submit_tracker(tr->qpair, tr); } else { err = bus_dmamap_load_uio(tr->qpair->dma_tag, tr->payload_dma_map, req->uio, @@ -558,3 +547,85 @@ nvme_qpair_submit_request(struct nvme_qpair *qpair, struct nvme_request *req) _nvme_qpair_submit_request(qpair, req); mtx_unlock(&qpair->lock); } + +static void +nvme_qpair_enable(struct nvme_qpair *qpair) +{ + + qpair->is_enabled = TRUE; + qpair->sq_head = qpair->sq_tail = qpair->cq_head = 0; + + /* + * First time through the completion queue, HW will set phase + * bit on completions to 1. So set this to 1 here, indicating + * we're looking for a 1 to know which entries have completed. + * we'll toggle the bit each time when the completion queue + * rolls over. + */ + qpair->phase = 1; + + memset(qpair->cmd, 0, + qpair->num_entries * sizeof(struct nvme_command)); + memset(qpair->cpl, 0, + qpair->num_entries * sizeof(struct nvme_completion)); +} + +void +nvme_admin_qpair_enable(struct nvme_qpair *qpair) +{ + + nvme_qpair_enable(qpair); +} + +void +nvme_io_qpair_enable(struct nvme_qpair *qpair) +{ + STAILQ_HEAD(, nvme_request) temp; + struct nvme_tracker *tr; + struct nvme_request *req; + + mtx_lock(&qpair->lock); + + nvme_qpair_enable(qpair); + + TAILQ_FOREACH(tr, &qpair->outstanding_tr, tailq) + nvme_qpair_submit_tracker(qpair, tr); + + STAILQ_INIT(&temp); + STAILQ_SWAP(&qpair->queued_req, &temp, nvme_request); + + while (!STAILQ_EMPTY(&temp)) { + req = STAILQ_FIRST(&temp); + STAILQ_REMOVE_HEAD(&temp, stailq); + _nvme_qpair_submit_request(qpair, req); + } + + mtx_unlock(&qpair->lock); +} + +static void +nvme_qpair_disable(struct nvme_qpair *qpair) +{ + struct nvme_tracker *tr; + + qpair->is_enabled = FALSE; + mtx_lock(&qpair->lock); + TAILQ_FOREACH(tr, &qpair->outstanding_tr, tailq) + callout_stop(&tr->timer); + mtx_unlock(&qpair->lock); +} + +void +nvme_admin_qpair_disable(struct nvme_qpair *qpair) +{ + + nvme_qpair_disable(qpair); + nvme_admin_qpair_abort_aers(qpair); +} + +void +nvme_io_qpair_disable(struct nvme_qpair *qpair) +{ + + nvme_qpair_disable(qpair); +} |