diff options
-rw-r--r-- | sys/geom/geom_io.c | 80 | ||||
-rw-r--r-- | sys/geom/geom_kern.c | 2 | ||||
-rw-r--r-- | sys/geom/geom_stats.h | 42 | ||||
-rw-r--r-- | sys/geom/notes | 48 |
4 files changed, 125 insertions, 47 deletions
diff --git a/sys/geom/geom_io.c b/sys/geom/geom_io.c index 865f584..1912587 100644 --- a/sys/geom/geom_io.c +++ b/sys/geom/geom_io.c @@ -277,24 +277,12 @@ g_io_request(struct bio *bp, struct g_consumer *cp) bp->bio_completed = 0; if (g_collectstats) { - /* Collect statistics */ - binuptime(&bp->bio_t0); - if (cp->stat->nop == cp->stat->nend) { - /* Consumer is idle */ - bt = bp->bio_t0; - bintime_sub(&bt, &cp->stat->wentidle); - bintime_add(&cp->stat->it, &bt); - if (pp->stat->nop == pp->stat->nend) { - /* - * NB: Provider can only be idle if the - * consumer is but we cannot trust them - * to have gone idle at the same time. - */ - bt = bp->bio_t0; - bintime_sub(&bt, &pp->stat->wentidle); - bintime_add(&pp->stat->it, &bt); - } - } + binuptime(&bt); + bp->bio_t0 = bt; + if (cp->stat->nop == cp->stat->nend) + cp->stat->wentbusy = bt; /* Consumer is idle */ + if (pp->stat->nop == pp->stat->nend) + pp->stat->wentbusy = bt; /* Provider is idle */ } cp->stat->nop++; pp->stat->nop++; @@ -311,7 +299,7 @@ g_io_deliver(struct bio *bp, int error) { struct g_consumer *cp; struct g_provider *pp; - struct bintime t1; + struct bintime t1, dt; int idx; cp = bp->bio_from; @@ -326,31 +314,33 @@ g_io_deliver(struct bio *bp, int error) bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd, error, (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length); - switch (bp->bio_cmd) { - case BIO_READ: idx = G_STAT_IDX_READ; break; - case BIO_WRITE: idx = G_STAT_IDX_WRITE; break; - case BIO_DELETE: idx = G_STAT_IDX_DELETE; break; - case BIO_GETATTR: idx = -1; break; - case BIO_SETATTR: idx = -1; break; - default: - panic("unknown bio_cmd in g_io_deliver"); - break; - } - - /* Collect statistics */ if (g_collectstats) { + switch (bp->bio_cmd) { + case BIO_READ: idx = G_STAT_IDX_READ; break; + case BIO_WRITE: idx = G_STAT_IDX_WRITE; break; + case BIO_DELETE: idx = G_STAT_IDX_DELETE; break; + case BIO_GETATTR: idx = -1; break; + case BIO_SETATTR: idx = -1; break; + default: + panic("unknown bio_cmd in g_io_deliver"); + break; + } binuptime(&t1); - pp->stat->wentidle = t1; - cp->stat->wentidle = t1; - + /* Raise the "inconsistent" flag for userland */ + atomic_set_acq_int(&cp->stat->updating, 1); + atomic_set_acq_int(&pp->stat->updating, 1); if (idx >= 0) { - bintime_sub(&t1, &bp->bio_t0); - bintime_add(&cp->stat->ops[idx].dt, &t1); - bintime_add(&pp->stat->ops[idx].dt, &t1); + /* Account the service time */ + dt = t1; + bintime_sub(&dt, &bp->bio_t0); + bintime_add(&cp->stat->ops[idx].dt, &dt); + bintime_add(&pp->stat->ops[idx].dt, &dt); + /* ... and the metrics */ pp->stat->ops[idx].nbyte += bp->bio_completed; cp->stat->ops[idx].nbyte += bp->bio_completed; pp->stat->ops[idx].nop++; cp->stat->ops[idx].nop++; + /* ... and any errors */ if (error == ENOMEM) { cp->stat->ops[idx].nmem++; pp->stat->ops[idx].nmem++; @@ -359,10 +349,22 @@ g_io_deliver(struct bio *bp, int error) pp->stat->ops[idx].nerr++; } } + /* Account for busy time on the consumer */ + dt = t1; + bintime_sub(&dt, &cp->stat->wentbusy); + bintime_add(&cp->stat->bt, &dt); + cp->stat->wentbusy = t1; + /* Account for busy time on the provider */ + dt = t1; + bintime_sub(&dt, &pp->stat->wentbusy); + bintime_add(&pp->stat->bt, &dt); + pp->stat->wentbusy = t1; + /* Mark the structures as consistent again */ + atomic_store_rel_int(&cp->stat->updating, 0); + atomic_store_rel_int(&pp->stat->updating, 0); } - - pp->stat->nend++; /* In reverse order of g_io_request() */ cp->stat->nend++; + pp->stat->nend++; if (error == ENOMEM) { printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name); diff --git a/sys/geom/geom_kern.c b/sys/geom/geom_kern.c index a925d9a..567cded 100644 --- a/sys/geom/geom_kern.c +++ b/sys/geom/geom_kern.c @@ -58,7 +58,7 @@ struct sx topology_lock; static struct proc *g_up_proc; int g_debugflags; -int g_collectstats; +int g_collectstats = 1; /* * G_UP and G_DOWN are the two threads which push I/O through the diff --git a/sys/geom/geom_stats.h b/sys/geom/geom_stats.h index 8a0cf83..159cba1 100644 --- a/sys/geom/geom_stats.h +++ b/sys/geom/geom_stats.h @@ -34,25 +34,53 @@ #define GEOM_STATS_DEVICE "geom.stats" /* - * A g_stat contains the statistics we collect on consumers and - * providers. + * A g_stat contains the statistics the kernel collect on consumers and + * providers. See libgeom(3) for how to get hold of these. */ struct g_stat { - void *id; - uint64_t nop; - uint64_t nend; - struct bintime it; - struct bintime wentidle; + int updating; + /* + * If non-zero, the structure is being + * updated by the kernel and the contents + * should not be used. + */ + + void *id; + /* GEOM-identifier for the consumer/provider */ + + uint64_t nop; + /* Number of requests started */ + + uint64_t nend; + /* Number of requests completed */ + + struct bintime bt; + /* Accumulated busy time */ + + struct bintime wentbusy; + /* Busy time accounted for until here */ struct { uint64_t nop; + /* Number of requests completed */ + uint64_t nbyte; + /* Number of bytes completed */ + uint64_t nmem; + /* Number of ENOMEM request errors */ + uint64_t nerr; + /* Number of other request errors */ + struct bintime dt; + /* Accumulated request processing time */ + } ops[3]; + #define G_STAT_IDX_READ 0 #define G_STAT_IDX_WRITE 1 #define G_STAT_IDX_DELETE 2 + }; #endif /* _GEOM_GEOM_STATS_H_ */ diff --git a/sys/geom/notes b/sys/geom/notes index 88e0f52..eff24c5 100644 --- a/sys/geom/notes +++ b/sys/geom/notes @@ -38,3 +38,51 @@ by cloning all children before I/O is request on any of them. Notice that cloning an "extra" child and calling g_std_done() on it directly opens another race since the assumption is that g_std_done() only is called in the g_up thread. + +----------------------------------------------------------------------- +Statistics collection + +Statistics collection can run at three levels controlled by the +"kern.geom.collectstats" sysctl. + +At level zero, only the number of transactions started and completed +are counted, and this is only because GEOM internally uses the difference +between these two as sanity checks. + +At level one we collect the full statistics. Higher levels are +reserved for future use. Statistics are collected independently +on both the provider and the consumer, because multiple consumers +can be active against the same provider at the same time. + +The statistics collection falls in two parts: + +The first and simpler part consists of g_io_request() timestamping +the struct bio when the request is first started and g_io_deliver() +updating the consumer and providers statistics based on fields in +the bio when it is completed. There are no concurrency or locking +concerns in this part. The statistics collected consists of number +of requests, number of bytes, number of ENOMEM errors, number of +other errors and duration of the request for each of the three +major request types: BIO_READ, BIO_WRITE and BIO_DELETE. + +The second part is trying to keep track of the "busy%". + +If in g_io_request() we find that there are no outstanding requests, +(based on the counters for scheduled and completed requests being +equal), we set a timestamp in the "wentbusy" field. Since there +are no outstanding requests, and as long as there is only one thread +pushing the g_down queue, we cannot possibly conflict with +g_io_deliver() until we ship the current request down. + +In g_io_deliver() we calculate the delta-T from wentbusy and add this +to the "bt" field, and set wentbusy to the current timestamp. We +take care to do this before we increment the "requests completed" +counter, since that prevents g_io_request() from touching the +"wentbusy" timestamp concurrently. + +The statistics data is made available to userland through the use +of a special allocator (in geom_stats.c) which through a device +allows userland to mmap(2) the pages containing the statistics data. +In order to indicate to userland when the data in a statstics +structure might be inconsistent, g_io_deliver() atomically sets a +flag "updating" and resets it when the structure is again consistent. |