From 711dabaf432762eec4942b5f5f7611e7df825260 Mon Sep 17 00:00:00 2001 From: jimharris Date: Tue, 26 Mar 2013 19:58:17 +0000 Subject: Add handling for controller fatal status (csts.cfs). On any I/O timeout, check for csts.cfs==1. If set, the controller is reporting fatal status and we reset the controller immediately, rather than trying to abort the timed out command. This changeset also includes deferring the controller start portion of the reset to a separate task. This ensures we are always performing a controller start operation from a consistent context. Sponsored by: Intel Reviewed by: carl --- sys/dev/nvme/nvme_ctrlr.c | 17 ++++++++++++++++- sys/dev/nvme/nvme_private.h | 3 +++ sys/dev/nvme/nvme_qpair.c | 20 ++++++++++++++++++-- 3 files changed, 37 insertions(+), 3 deletions(-) (limited to 'sys/dev/nvme') diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c index a07f9f0..f759a60 100644 --- a/sys/dev/nvme/nvme_ctrlr.c +++ b/sys/dev/nvme/nvme_ctrlr.c @@ -427,7 +427,7 @@ nvme_ctrlr_reset(struct nvme_controller *ctrlr) status = nvme_ctrlr_hw_reset(ctrlr); DELAY(100*1000); if (status == 0) - nvme_ctrlr_start(ctrlr); + taskqueue_enqueue(ctrlr->taskqueue, &ctrlr->restart_task); } static int @@ -686,6 +686,14 @@ err: } static void +nvme_ctrlr_restart_task(void *arg, int pending) +{ + struct nvme_controller *ctrlr = arg; + + nvme_ctrlr_start(ctrlr); +} + +static void nvme_ctrlr_intx_handler(void *arg) { struct nvme_controller *ctrlr = arg; @@ -864,6 +872,11 @@ intx: ctrlr->cdev->si_drv1 = (void *)ctrlr; + TASK_INIT(&ctrlr->restart_task, 0, nvme_ctrlr_restart_task, ctrlr); + ctrlr->taskqueue = taskqueue_create("nvme_taskq", M_WAITOK, + taskqueue_thread_enqueue, &ctrlr->taskqueue); + taskqueue_start_threads(&ctrlr->taskqueue, 1, PI_DISK, "nvme taskq"); + return (0); } @@ -872,6 +885,8 @@ nvme_ctrlr_destruct(struct nvme_controller *ctrlr, device_t dev) { int i; + taskqueue_free(ctrlr->taskqueue); + for (i = 0; i < NVME_MAX_NAMESPACES; i++) nvme_ns_destruct(&ctrlr->ns[i]); diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h index 695ce5e..2fde631 100644 --- a/sys/dev/nvme/nvme_private.h +++ b/sys/dev/nvme/nvme_private.h @@ -36,6 +36,7 @@ #include #include #include +#include #include @@ -236,6 +237,8 @@ struct nvme_controller { uint32_t ns_identified; uint32_t queues_created; uint32_t num_start_attempts; + struct task restart_task; + struct taskqueue *taskqueue; /* For shared legacy interrupt. */ int rid; diff --git a/sys/dev/nvme/nvme_qpair.c b/sys/dev/nvme/nvme_qpair.c index f98125f..db9abf2 100644 --- a/sys/dev/nvme/nvme_qpair.c +++ b/sys/dev/nvme/nvme_qpair.c @@ -98,7 +98,7 @@ nvme_qpair_construct_tracker(struct nvme_qpair *qpair, struct nvme_tracker *tr, bus_dmamap_load(qpair->dma_tag, tr->prp_dma_map, tr->prp, sizeof(tr->prp), nvme_single_map, &tr->prp_bus_addr, 0); - callout_init_mtx(&tr->timer, &qpair->lock, 0); + callout_init(&tr->timer, 1); tr->cid = cid; tr->qpair = qpair; } @@ -456,8 +456,24 @@ static void nvme_timeout(void *arg) { struct nvme_tracker *tr = arg; + struct nvme_qpair *qpair = tr->qpair; + struct nvme_controller *ctrlr = qpair->ctrlr; + union csts_register csts; - nvme_ctrlr_cmd_abort(tr->qpair->ctrlr, tr->cid, tr->qpair->id, + csts.raw = nvme_mmio_read_4(ctrlr, csts); + if (csts.bits.cfs == 1) { + /* + * The controller is reporting fatal status. Don't bother + * trying to abort the timed out command - proceed + * immediately to a controller-level reset. + */ + device_printf(ctrlr->dev, + "controller reports fatal status, resetting...\n"); + nvme_ctrlr_reset(ctrlr); + return; + } + + nvme_ctrlr_cmd_abort(ctrlr, tr->cid, qpair->id, nvme_abort_complete, tr); } -- cgit v1.1