diff options
author | trociny <trociny@FreeBSD.org> | 2013-02-25 20:09:07 +0000 |
---|---|---|
committer | trociny <trociny@FreeBSD.org> | 2013-02-25 20:09:07 +0000 |
commit | 8690e69f6a4dba617b6c0fadf7cb2139a60500b8 (patch) | |
tree | 7c3a030feb23f647bd2528a4d8e72a4d5621913d /sbin | |
parent | 1f658c88bed0f72839a6caee6fdc9731cfd3a516 (diff) | |
download | FreeBSD-src-8690e69f6a4dba617b6c0fadf7cb2139a60500b8.zip FreeBSD-src-8690e69f6a4dba617b6c0fadf7cb2139a60500b8.tar.gz |
Add i/o error counters to hastd(8) and make hastctl(8) display
them. This may be useful for detecting problems with HAST disks.
Discussed with and reviewed by: pjd
MFC after: 1 week
Diffstat (limited to 'sbin')
-rw-r--r-- | sbin/hastctl/hastctl.c | 6 | ||||
-rw-r--r-- | sbin/hastd/control.c | 18 | ||||
-rw-r--r-- | sbin/hastd/hast.h | 12 | ||||
-rw-r--r-- | sbin/hastd/primary.c | 18 | ||||
-rw-r--r-- | sbin/hastd/secondary.c | 18 |
5 files changed, 71 insertions, 1 deletions
diff --git a/sbin/hastctl/hastctl.c b/sbin/hastctl/hastctl.c index 0bd47f2..503072c 100644 --- a/sbin/hastctl/hastctl.c +++ b/sbin/hastctl/hastctl.c @@ -351,6 +351,12 @@ control_status(struct nv *nv) (uint64_t)nv_get_uint64(nv, "stat_flush%u", ii)); printf(" activemap updates: %ju\n", (uint64_t)nv_get_uint64(nv, "stat_activemap_update%u", ii)); + printf(" local errors: " + "read: %ju, write: %ju, delete: %ju, flush: %ju\n", + (uintmax_t)nv_get_uint64(nv, "stat_read_error%u", ii), + (uintmax_t)nv_get_uint64(nv, "stat_write_error%u", ii), + (uintmax_t)nv_get_uint64(nv, "stat_delete_error%u", ii), + (uintmax_t)nv_get_uint64(nv, "stat_flush_error%u", ii)); } return (ret); } diff --git a/sbin/hastd/control.c b/sbin/hastd/control.c index 925fd32..3619fc6 100644 --- a/sbin/hastd/control.c +++ b/sbin/hastd/control.c @@ -207,6 +207,14 @@ control_status_worker(struct hast_resource *res, struct nv *nvout, "stat_flush%u", no); nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_activemap_update"), "stat_activemap_update%u", no); + nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_read_error"), + "stat_read_error%u", no); + nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_write_error"), + "stat_write_error%u", no); + nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_delete_error"), + "stat_delete_error%u", no); + nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_flush_error"), + "stat_flush_error%u", no); end: if (cnvin != NULL) nv_free(cnvin); @@ -459,6 +467,16 @@ ctrl_thread(void *arg) nv_add_uint64(nvout, res->hr_stat_flush, "stat_flush"); nv_add_uint64(nvout, res->hr_stat_activemap_update, "stat_activemap_update"); + nv_add_uint64(nvout, res->hr_stat_read_error, + "stat_read_error"); + nv_add_uint64(nvout, res->hr_stat_write_error + + res->hr_stat_activemap_write_error, + "stat_write_error"); + nv_add_uint64(nvout, res->hr_stat_delete_error, + "stat_delete_error"); + nv_add_uint64(nvout, res->hr_stat_flush_error + + res->hr_stat_activemap_flush_error, + "stat_flush_error"); nv_add_int16(nvout, 0, "error"); break; case CONTROL_RELOAD: diff --git a/sbin/hastd/hast.h b/sbin/hastd/hast.h index c7a6b49..b757994 100644 --- a/sbin/hastd/hast.h +++ b/sbin/hastd/hast.h @@ -239,6 +239,18 @@ struct hast_resource { uint64_t hr_stat_flush; /* Number of activemap updates. */ uint64_t hr_stat_activemap_update; + /* Number of local read errors. */ + uint64_t hr_stat_read_error; + /* Number of local write errors. */ + uint64_t hr_stat_write_error; + /* Number of local delete errors. */ + uint64_t hr_stat_delete_error; + /* Number of flush errors. */ + uint64_t hr_stat_flush_error; + /* Number of activemap write errors. */ + uint64_t hr_stat_activemap_write_error; + /* Number of activemap flush errors. */ + uint64_t hr_stat_activemap_flush_error; /* Next resource. */ TAILQ_ENTRY(hast_resource) hr_next; diff --git a/sbin/hastd/primary.c b/sbin/hastd/primary.c index fb49ef6..a9dfa2b 100644 --- a/sbin/hastd/primary.c +++ b/sbin/hastd/primary.c @@ -303,6 +303,7 @@ hast_activemap_flush(struct hast_resource *res) if (pwrite(res->hr_localfd, buf, size, METADATA_SIZE) != (ssize_t)size) { pjdlog_errno(LOG_ERR, "Unable to flush activemap to disk"); + res->hr_stat_activemap_write_error++; return (-1); } if (res->hr_metaflush == 1 && g_flush(res->hr_localfd) == -1) { @@ -313,6 +314,7 @@ hast_activemap_flush(struct hast_resource *res) } else { pjdlog_errno(LOG_ERR, "Unable to flush disk cache on activemap update"); + res->hr_stat_activemap_flush_error++; return (-1); } } @@ -1936,6 +1938,22 @@ ggate_send_thread(void *arg) "G_GATE_CMD_DONE failed"); } } + if (hio->hio_errors[0]) { + switch (ggio->gctl_cmd) { + case BIO_READ: + res->hr_stat_read_error++; + break; + case BIO_WRITE: + res->hr_stat_write_error++; + break; + case BIO_DELETE: + res->hr_stat_delete_error++; + break; + case BIO_FLUSH: + res->hr_stat_flush_error++; + break; + } + } pjdlog_debug(2, "ggate_send: (%p) Moving request to the free queue.", hio); QUEUE_INSERT2(hio, free); diff --git a/sbin/hastd/secondary.c b/sbin/hastd/secondary.c index 71524e9..febdc05 100644 --- a/sbin/hastd/secondary.c +++ b/sbin/hastd/secondary.c @@ -765,6 +765,7 @@ disk_thread(void *arg) pjdlog_errno(LOG_WARNING, "Unable to store cleared activemap"); free(map); + res->hr_stat_activemap_write_error++; break; } free(map); @@ -883,8 +884,23 @@ send_thread(void *arg) PJDLOG_ABORT("Unexpected command (cmd=%hhu).", hio->hio_cmd); } - if (hio->hio_error != 0) + if (hio->hio_error != 0) { + switch (hio->hio_cmd) { + case HIO_READ: + res->hr_stat_read_error++; + break; + case HIO_WRITE: + res->hr_stat_write_error++; + break; + case HIO_DELETE: + res->hr_stat_delete_error++; + break; + case HIO_FLUSH: + res->hr_stat_flush_error++; + break; + } nv_add_int16(nvout, hio->hio_error, "error"); + } if (hast_proto_send(res, res->hr_remoteout, nvout, data, length) == -1) { secondary_exit(EX_TEMPFAIL, "Unable to send reply"); |