summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjimharris <jimharris@FreeBSD.org>2013-10-08 16:00:12 +0000
committerjimharris <jimharris@FreeBSD.org>2013-10-08 16:00:12 +0000
commit64e2a5a8e6e7ec29b095128042e02877af474bfa (patch)
treef9ff055d457112f01ee8b03b6d0e4f1c3be8e20b
parent9cdb85e5c10e1d1e964b04d57d17b35b32e55c0c (diff)
downloadFreeBSD-src-64e2a5a8e6e7ec29b095128042e02877af474bfa.zip
FreeBSD-src-64e2a5a8e6e7ec29b095128042e02877af474bfa.tar.gz
Log and then disable asynchronous notification of persistent events after
they occur. This prevents repeated notifications of the same event. Status of these events may be viewed at any time by viewing the SMART/Health Info Page using nvmecontrol, whether or not asynchronous events notifications for those events are enabled. This log page can be viewed using: nvmecontrol logpage -p 2 <ctrlr id> Future enhancements may re-enable these notifications on a periodic basis so that if the notified condition persists, it will continue to be logged. Sponsored by: Intel Reviewed by: carl Approved by: re (hrs) MFC after: 1 week
-rw-r--r--sys/dev/nvme/nvme_ctrlr.c60
-rw-r--r--sys/dev/nvme/nvme_private.h3
2 files changed, 56 insertions, 7 deletions
diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c
index 33e77a8..322d5a4 100644
--- a/sys/dev/nvme/nvme_ctrlr.c
+++ b/sys/dev/nvme/nvme_ctrlr.c
@@ -617,9 +617,35 @@ nvme_ctrlr_get_log_page_size(struct nvme_controller *ctrlr, uint8_t page_id)
}
static void
+nvme_ctrlr_log_critical_warnings(struct nvme_controller *ctrlr,
+ union nvme_critical_warning_state state)
+{
+
+ if (state.bits.available_spare == 1)
+ nvme_printf(ctrlr, "available spare space below threshold\n");
+
+ if (state.bits.temperature == 1)
+ nvme_printf(ctrlr, "temperature above threshold\n");
+
+ if (state.bits.device_reliability == 1)
+ nvme_printf(ctrlr, "device reliability degraded\n");
+
+ if (state.bits.read_only == 1)
+ nvme_printf(ctrlr, "media placed in read only mode\n");
+
+ if (state.bits.volatile_memory_backup == 1)
+ nvme_printf(ctrlr, "volatile memory backup device failed\n");
+
+ if (state.bits.reserved != 0)
+ nvme_printf(ctrlr,
+ "unknown critical warning(s): state = 0x%02x\n", state.raw);
+}
+
+static void
nvme_ctrlr_async_event_log_page_cb(void *arg, const struct nvme_completion *cpl)
{
- struct nvme_async_event_request *aer = arg;
+ struct nvme_async_event_request *aer = arg;
+ struct nvme_health_information_page *health_info;
/*
* If the log page fetch for some reason completed with an error,
@@ -629,13 +655,33 @@ nvme_ctrlr_async_event_log_page_cb(void *arg, const struct nvme_completion *cpl)
if (nvme_completion_is_error(cpl))
nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
aer->log_page_id, NULL, 0);
- else
+ else {
+ if (aer->log_page_id == NVME_LOG_HEALTH_INFORMATION) {
+ health_info = (struct nvme_health_information_page *)
+ aer->log_page_buffer;
+ nvme_ctrlr_log_critical_warnings(aer->ctrlr,
+ health_info->critical_warning);
+ /*
+ * Critical warnings reported through the
+ * SMART/health log page are persistent, so
+ * clear the associated bits in the async event
+ * config so that we do not receive repeated
+ * notifications for the same event.
+ */
+ aer->ctrlr->async_event_config.raw &=
+ ~health_info->critical_warning.raw;
+ nvme_ctrlr_cmd_set_async_event_config(aer->ctrlr,
+ aer->ctrlr->async_event_config, NULL, NULL);
+ }
+
+
/*
* Pass the cpl data from the original async event completion,
* not the log page fetch.
*/
nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
aer->log_page_id, aer->log_page_buffer, aer->log_page_size);
+ }
/*
* Repost another asynchronous event request to replace the one
@@ -709,12 +755,11 @@ static void
nvme_ctrlr_configure_aer(struct nvme_controller *ctrlr)
{
struct nvme_completion_poll_status status;
- union nvme_critical_warning_state state;
struct nvme_async_event_request *aer;
uint32_t i;
- state.raw = 0xFF;
- state.bits.reserved = 0;
+ ctrlr->async_event_config.raw = 0xFF;
+ ctrlr->async_event_config.bits.reserved = 0;
status.done = FALSE;
nvme_ctrlr_cmd_get_feature(ctrlr, NVME_FEAT_TEMPERATURE_THRESHOLD,
@@ -725,10 +770,11 @@ nvme_ctrlr_configure_aer(struct nvme_controller *ctrlr)
(status.cpl.cdw0 & 0xFFFF) == 0xFFFF ||
(status.cpl.cdw0 & 0xFFFF) == 0x0000) {
nvme_printf(ctrlr, "temperature threshold not supported\n");
- state.bits.temperature = 0;
+ ctrlr->async_event_config.bits.temperature = 0;
}
- nvme_ctrlr_cmd_set_async_event_config(ctrlr, state, NULL, NULL);
+ nvme_ctrlr_cmd_set_async_event_config(ctrlr,
+ ctrlr->async_event_config, NULL, NULL);
/* aerl is a zero-based value, so we need to add 1 here. */
ctrlr->num_aers = min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl+1));
diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h
index 1c2333e..f6bd041 100644
--- a/sys/dev/nvme/nvme_private.h
+++ b/sys/dev/nvme/nvme_private.h
@@ -322,6 +322,9 @@ struct nvme_controller {
struct cdev *cdev;
+ /** bit mask of warning types currently enabled for async events */
+ union nvme_critical_warning_state async_event_config;
+
uint32_t num_aers;
struct nvme_async_event_request aer[NVME_MAX_ASYNC_EVENTS];
OpenPOWER on IntegriCloud