summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorphk <phk@FreeBSD.org>2003-02-09 17:04:57 +0000
committerphk <phk@FreeBSD.org>2003-02-09 17:04:57 +0000
commitee425616b768cca2e028b7d9cf9b06f11c383e73 (patch)
treec9948b59943ec45a609a2f6bdd484f92a5170557
parentaa5fb3b42f7db355c41e057868665ba93060e768 (diff)
downloadFreeBSD-src-ee425616b768cca2e028b7d9cf9b06f11c383e73.zip
FreeBSD-src-ee425616b768cca2e028b7d9cf9b06f11c383e73.tar.gz
Update the statistics collection code to track busy time instead of
idle time. Statistics now default to "on" and can be turned off with sysctl kern.geom.collectstats=0 Performance impact of statistics collection is on the order of 800 nsec per consumer/provider set on a 700MHz Athlon.
-rw-r--r--sys/geom/geom_io.c80
-rw-r--r--sys/geom/geom_kern.c2
-rw-r--r--sys/geom/geom_stats.h42
-rw-r--r--sys/geom/notes48
4 files changed, 125 insertions, 47 deletions
diff --git a/sys/geom/geom_io.c b/sys/geom/geom_io.c
index 865f584..1912587 100644
--- a/sys/geom/geom_io.c
+++ b/sys/geom/geom_io.c
@@ -277,24 +277,12 @@ g_io_request(struct bio *bp, struct g_consumer *cp)
bp->bio_completed = 0;
if (g_collectstats) {
- /* Collect statistics */
- binuptime(&bp->bio_t0);
- if (cp->stat->nop == cp->stat->nend) {
- /* Consumer is idle */
- bt = bp->bio_t0;
- bintime_sub(&bt, &cp->stat->wentidle);
- bintime_add(&cp->stat->it, &bt);
- if (pp->stat->nop == pp->stat->nend) {
- /*
- * NB: Provider can only be idle if the
- * consumer is but we cannot trust them
- * to have gone idle at the same time.
- */
- bt = bp->bio_t0;
- bintime_sub(&bt, &pp->stat->wentidle);
- bintime_add(&pp->stat->it, &bt);
- }
- }
+ binuptime(&bt);
+ bp->bio_t0 = bt;
+ if (cp->stat->nop == cp->stat->nend)
+ cp->stat->wentbusy = bt; /* Consumer is idle */
+ if (pp->stat->nop == pp->stat->nend)
+ pp->stat->wentbusy = bt; /* Provider is idle */
}
cp->stat->nop++;
pp->stat->nop++;
@@ -311,7 +299,7 @@ g_io_deliver(struct bio *bp, int error)
{
struct g_consumer *cp;
struct g_provider *pp;
- struct bintime t1;
+ struct bintime t1, dt;
int idx;
cp = bp->bio_from;
@@ -326,31 +314,33 @@ g_io_deliver(struct bio *bp, int error)
bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd, error,
(intmax_t)bp->bio_offset, (intmax_t)bp->bio_length);
- switch (bp->bio_cmd) {
- case BIO_READ: idx = G_STAT_IDX_READ; break;
- case BIO_WRITE: idx = G_STAT_IDX_WRITE; break;
- case BIO_DELETE: idx = G_STAT_IDX_DELETE; break;
- case BIO_GETATTR: idx = -1; break;
- case BIO_SETATTR: idx = -1; break;
- default:
- panic("unknown bio_cmd in g_io_deliver");
- break;
- }
-
- /* Collect statistics */
if (g_collectstats) {
+ switch (bp->bio_cmd) {
+ case BIO_READ: idx = G_STAT_IDX_READ; break;
+ case BIO_WRITE: idx = G_STAT_IDX_WRITE; break;
+ case BIO_DELETE: idx = G_STAT_IDX_DELETE; break;
+ case BIO_GETATTR: idx = -1; break;
+ case BIO_SETATTR: idx = -1; break;
+ default:
+ panic("unknown bio_cmd in g_io_deliver");
+ break;
+ }
binuptime(&t1);
- pp->stat->wentidle = t1;
- cp->stat->wentidle = t1;
-
+ /* Raise the "inconsistent" flag for userland */
+ atomic_set_acq_int(&cp->stat->updating, 1);
+ atomic_set_acq_int(&pp->stat->updating, 1);
if (idx >= 0) {
- bintime_sub(&t1, &bp->bio_t0);
- bintime_add(&cp->stat->ops[idx].dt, &t1);
- bintime_add(&pp->stat->ops[idx].dt, &t1);
+ /* Account the service time */
+ dt = t1;
+ bintime_sub(&dt, &bp->bio_t0);
+ bintime_add(&cp->stat->ops[idx].dt, &dt);
+ bintime_add(&pp->stat->ops[idx].dt, &dt);
+ /* ... and the metrics */
pp->stat->ops[idx].nbyte += bp->bio_completed;
cp->stat->ops[idx].nbyte += bp->bio_completed;
pp->stat->ops[idx].nop++;
cp->stat->ops[idx].nop++;
+ /* ... and any errors */
if (error == ENOMEM) {
cp->stat->ops[idx].nmem++;
pp->stat->ops[idx].nmem++;
@@ -359,10 +349,22 @@ g_io_deliver(struct bio *bp, int error)
pp->stat->ops[idx].nerr++;
}
}
+ /* Account for busy time on the consumer */
+ dt = t1;
+ bintime_sub(&dt, &cp->stat->wentbusy);
+ bintime_add(&cp->stat->bt, &dt);
+ cp->stat->wentbusy = t1;
+ /* Account for busy time on the provider */
+ dt = t1;
+ bintime_sub(&dt, &pp->stat->wentbusy);
+ bintime_add(&pp->stat->bt, &dt);
+ pp->stat->wentbusy = t1;
+ /* Mark the structures as consistent again */
+ atomic_store_rel_int(&cp->stat->updating, 0);
+ atomic_store_rel_int(&pp->stat->updating, 0);
}
-
- pp->stat->nend++; /* In reverse order of g_io_request() */
cp->stat->nend++;
+ pp->stat->nend++;
if (error == ENOMEM) {
printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name);
diff --git a/sys/geom/geom_kern.c b/sys/geom/geom_kern.c
index a925d9a..567cded 100644
--- a/sys/geom/geom_kern.c
+++ b/sys/geom/geom_kern.c
@@ -58,7 +58,7 @@ struct sx topology_lock;
static struct proc *g_up_proc;
int g_debugflags;
-int g_collectstats;
+int g_collectstats = 1;
/*
* G_UP and G_DOWN are the two threads which push I/O through the
diff --git a/sys/geom/geom_stats.h b/sys/geom/geom_stats.h
index 8a0cf83..159cba1 100644
--- a/sys/geom/geom_stats.h
+++ b/sys/geom/geom_stats.h
@@ -34,25 +34,53 @@
#define GEOM_STATS_DEVICE "geom.stats"
/*
- * A g_stat contains the statistics we collect on consumers and
- * providers.
+ * A g_stat contains the statistics the kernel collect on consumers and
+ * providers. See libgeom(3) for how to get hold of these.
*/
struct g_stat {
- void *id;
- uint64_t nop;
- uint64_t nend;
- struct bintime it;
- struct bintime wentidle;
+ int updating;
+ /*
+ * If non-zero, the structure is being
+ * updated by the kernel and the contents
+ * should not be used.
+ */
+
+ void *id;
+ /* GEOM-identifier for the consumer/provider */
+
+ uint64_t nop;
+ /* Number of requests started */
+
+ uint64_t nend;
+ /* Number of requests completed */
+
+ struct bintime bt;
+ /* Accumulated busy time */
+
+ struct bintime wentbusy;
+ /* Busy time accounted for until here */
struct {
uint64_t nop;
+ /* Number of requests completed */
+
uint64_t nbyte;
+ /* Number of bytes completed */
+
uint64_t nmem;
+ /* Number of ENOMEM request errors */
+
uint64_t nerr;
+ /* Number of other request errors */
+
struct bintime dt;
+ /* Accumulated request processing time */
+
} ops[3];
+
#define G_STAT_IDX_READ 0
#define G_STAT_IDX_WRITE 1
#define G_STAT_IDX_DELETE 2
+
};
#endif /* _GEOM_GEOM_STATS_H_ */
diff --git a/sys/geom/notes b/sys/geom/notes
index 88e0f52..eff24c5 100644
--- a/sys/geom/notes
+++ b/sys/geom/notes
@@ -38,3 +38,51 @@ by cloning all children before I/O is request on any of them.
Notice that cloning an "extra" child and calling g_std_done() on
it directly opens another race since the assumption is that
g_std_done() only is called in the g_up thread.
+
+-----------------------------------------------------------------------
+Statistics collection
+
+Statistics collection can run at three levels controlled by the
+"kern.geom.collectstats" sysctl.
+
+At level zero, only the number of transactions started and completed
+are counted, and this is only because GEOM internally uses the difference
+between these two as sanity checks.
+
+At level one we collect the full statistics. Higher levels are
+reserved for future use. Statistics are collected independently
+on both the provider and the consumer, because multiple consumers
+can be active against the same provider at the same time.
+
+The statistics collection falls in two parts:
+
+The first and simpler part consists of g_io_request() timestamping
+the struct bio when the request is first started and g_io_deliver()
+updating the consumer and providers statistics based on fields in
+the bio when it is completed. There are no concurrency or locking
+concerns in this part. The statistics collected consists of number
+of requests, number of bytes, number of ENOMEM errors, number of
+other errors and duration of the request for each of the three
+major request types: BIO_READ, BIO_WRITE and BIO_DELETE.
+
+The second part is trying to keep track of the "busy%".
+
+If in g_io_request() we find that there are no outstanding requests,
+(based on the counters for scheduled and completed requests being
+equal), we set a timestamp in the "wentbusy" field. Since there
+are no outstanding requests, and as long as there is only one thread
+pushing the g_down queue, we cannot possibly conflict with
+g_io_deliver() until we ship the current request down.
+
+In g_io_deliver() we calculate the delta-T from wentbusy and add this
+to the "bt" field, and set wentbusy to the current timestamp. We
+take care to do this before we increment the "requests completed"
+counter, since that prevents g_io_request() from touching the
+"wentbusy" timestamp concurrently.
+
+The statistics data is made available to userland through the use
+of a special allocator (in geom_stats.c) which through a device
+allows userland to mmap(2) the pages containing the statistics data.
+In order to indicate to userland when the data in a statstics
+structure might be inconsistent, g_io_deliver() atomically sets a
+flag "updating" and resets it when the structure is again consistent.
OpenPOWER on IntegriCloud