summaryrefslogtreecommitdiffstats
path: root/sbin/hastd
diff options
context:
space:
mode:
authortrociny <trociny@FreeBSD.org>2013-02-25 20:09:07 +0000
committertrociny <trociny@FreeBSD.org>2013-02-25 20:09:07 +0000
commit8690e69f6a4dba617b6c0fadf7cb2139a60500b8 (patch)
tree7c3a030feb23f647bd2528a4d8e72a4d5621913d /sbin/hastd
parent1f658c88bed0f72839a6caee6fdc9731cfd3a516 (diff)
downloadFreeBSD-src-8690e69f6a4dba617b6c0fadf7cb2139a60500b8.zip
FreeBSD-src-8690e69f6a4dba617b6c0fadf7cb2139a60500b8.tar.gz
Add i/o error counters to hastd(8) and make hastctl(8) display
them. This may be useful for detecting problems with HAST disks. Discussed with and reviewed by: pjd MFC after: 1 week
Diffstat (limited to 'sbin/hastd')
-rw-r--r--sbin/hastd/control.c18
-rw-r--r--sbin/hastd/hast.h12
-rw-r--r--sbin/hastd/primary.c18
-rw-r--r--sbin/hastd/secondary.c18
4 files changed, 65 insertions, 1 deletions
diff --git a/sbin/hastd/control.c b/sbin/hastd/control.c
index 925fd32..3619fc6 100644
--- a/sbin/hastd/control.c
+++ b/sbin/hastd/control.c
@@ -207,6 +207,14 @@ control_status_worker(struct hast_resource *res, struct nv *nvout,
"stat_flush%u", no);
nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_activemap_update"),
"stat_activemap_update%u", no);
+ nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_read_error"),
+ "stat_read_error%u", no);
+ nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_write_error"),
+ "stat_write_error%u", no);
+ nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_delete_error"),
+ "stat_delete_error%u", no);
+ nv_add_uint64(nvout, nv_get_uint64(cnvin, "stat_flush_error"),
+ "stat_flush_error%u", no);
end:
if (cnvin != NULL)
nv_free(cnvin);
@@ -459,6 +467,16 @@ ctrl_thread(void *arg)
nv_add_uint64(nvout, res->hr_stat_flush, "stat_flush");
nv_add_uint64(nvout, res->hr_stat_activemap_update,
"stat_activemap_update");
+ nv_add_uint64(nvout, res->hr_stat_read_error,
+ "stat_read_error");
+ nv_add_uint64(nvout, res->hr_stat_write_error +
+ res->hr_stat_activemap_write_error,
+ "stat_write_error");
+ nv_add_uint64(nvout, res->hr_stat_delete_error,
+ "stat_delete_error");
+ nv_add_uint64(nvout, res->hr_stat_flush_error +
+ res->hr_stat_activemap_flush_error,
+ "stat_flush_error");
nv_add_int16(nvout, 0, "error");
break;
case CONTROL_RELOAD:
diff --git a/sbin/hastd/hast.h b/sbin/hastd/hast.h
index c7a6b49..b757994 100644
--- a/sbin/hastd/hast.h
+++ b/sbin/hastd/hast.h
@@ -239,6 +239,18 @@ struct hast_resource {
uint64_t hr_stat_flush;
/* Number of activemap updates. */
uint64_t hr_stat_activemap_update;
+ /* Number of local read errors. */
+ uint64_t hr_stat_read_error;
+ /* Number of local write errors. */
+ uint64_t hr_stat_write_error;
+ /* Number of local delete errors. */
+ uint64_t hr_stat_delete_error;
+ /* Number of flush errors. */
+ uint64_t hr_stat_flush_error;
+ /* Number of activemap write errors. */
+ uint64_t hr_stat_activemap_write_error;
+ /* Number of activemap flush errors. */
+ uint64_t hr_stat_activemap_flush_error;
/* Next resource. */
TAILQ_ENTRY(hast_resource) hr_next;
diff --git a/sbin/hastd/primary.c b/sbin/hastd/primary.c
index fb49ef6..a9dfa2b 100644
--- a/sbin/hastd/primary.c
+++ b/sbin/hastd/primary.c
@@ -303,6 +303,7 @@ hast_activemap_flush(struct hast_resource *res)
if (pwrite(res->hr_localfd, buf, size, METADATA_SIZE) !=
(ssize_t)size) {
pjdlog_errno(LOG_ERR, "Unable to flush activemap to disk");
+ res->hr_stat_activemap_write_error++;
return (-1);
}
if (res->hr_metaflush == 1 && g_flush(res->hr_localfd) == -1) {
@@ -313,6 +314,7 @@ hast_activemap_flush(struct hast_resource *res)
} else {
pjdlog_errno(LOG_ERR,
"Unable to flush disk cache on activemap update");
+ res->hr_stat_activemap_flush_error++;
return (-1);
}
}
@@ -1936,6 +1938,22 @@ ggate_send_thread(void *arg)
"G_GATE_CMD_DONE failed");
}
}
+ if (hio->hio_errors[0]) {
+ switch (ggio->gctl_cmd) {
+ case BIO_READ:
+ res->hr_stat_read_error++;
+ break;
+ case BIO_WRITE:
+ res->hr_stat_write_error++;
+ break;
+ case BIO_DELETE:
+ res->hr_stat_delete_error++;
+ break;
+ case BIO_FLUSH:
+ res->hr_stat_flush_error++;
+ break;
+ }
+ }
pjdlog_debug(2,
"ggate_send: (%p) Moving request to the free queue.", hio);
QUEUE_INSERT2(hio, free);
diff --git a/sbin/hastd/secondary.c b/sbin/hastd/secondary.c
index 71524e9..febdc05 100644
--- a/sbin/hastd/secondary.c
+++ b/sbin/hastd/secondary.c
@@ -765,6 +765,7 @@ disk_thread(void *arg)
pjdlog_errno(LOG_WARNING,
"Unable to store cleared activemap");
free(map);
+ res->hr_stat_activemap_write_error++;
break;
}
free(map);
@@ -883,8 +884,23 @@ send_thread(void *arg)
PJDLOG_ABORT("Unexpected command (cmd=%hhu).",
hio->hio_cmd);
}
- if (hio->hio_error != 0)
+ if (hio->hio_error != 0) {
+ switch (hio->hio_cmd) {
+ case HIO_READ:
+ res->hr_stat_read_error++;
+ break;
+ case HIO_WRITE:
+ res->hr_stat_write_error++;
+ break;
+ case HIO_DELETE:
+ res->hr_stat_delete_error++;
+ break;
+ case HIO_FLUSH:
+ res->hr_stat_flush_error++;
+ break;
+ }
nv_add_int16(nvout, hio->hio_error, "error");
+ }
if (hast_proto_send(res, res->hr_remoteout, nvout, data,
length) == -1) {
secondary_exit(EX_TEMPFAIL, "Unable to send reply");
OpenPOWER on IntegriCloud