summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authorphk <phk@FreeBSD.org>2003-02-07 23:08:24 +0000
committerphk <phk@FreeBSD.org>2003-02-07 23:08:24 +0000
commite8f2dea5ddc06968c8deacd684154cd56b9664ff (patch)
tree926f1fc791d8d4e6893f5573cb0b3fed3a603f7c /sys
parent3c791c8729585027435477ccde6a53be4ba01225 (diff)
downloadFreeBSD-src-e8f2dea5ddc06968c8deacd684154cd56b9664ff.zip
FreeBSD-src-e8f2dea5ddc06968c8deacd684154cd56b9664ff.tar.gz
Commit the correct copy of the g_stat structure.
Add debug.sizeof.g_stat sysctl. Set the id field of the g_stat when we create consumers and providers. Remove biocount from consumer, we will use the counters in the g_stat structure instead. Replace one field which will need to be atomically manipulated with two fields which will not (stat.nop and stat.nend). Change add companion field to bio_children: bio_inbed for the exact same reason. Don't output the biocount in the confdot output. Fix KASSERT in g_io_request(). Add sysctl kern.geom.collectstats defaulting to off. Collect the following raw statistics conditioned on this sysctl: for each consumer and provider { total number of operations started. total number of operations completed. time last operation completed. sum of idle-time. for each of BIO_READ, BIO_WRITE and BIO_DELETE { number of operations completed. number of bytes completed. number of ENOMEM errors. number of other errors. sum of transaction time. } } API for getting hold of these statistics data not included yet.
Diffstat (limited to 'sys')
-rw-r--r--sys/geom/geom.h10
-rw-r--r--sys/geom/geom_dev.c2
-rw-r--r--sys/geom/geom_dump.c4
-rw-r--r--sys/geom/geom_int.h1
-rw-r--r--sys/geom/geom_io.c103
-rw-r--r--sys/geom/geom_kern.c6
-rw-r--r--sys/geom/geom_subr.c8
-rw-r--r--sys/geom/notes40
-rw-r--r--sys/sys/bio.h1
9 files changed, 139 insertions, 36 deletions
diff --git a/sys/geom/geom.h b/sys/geom/geom.h
index dfb582b..f406fb9 100644
--- a/sys/geom/geom.h
+++ b/sys/geom/geom.h
@@ -151,12 +151,10 @@ struct g_stat {
uint64_t nmem;
uint64_t nerr;
struct bintime dt;
- } ops[5];
+ } ops[3];
#define G_STAT_IDX_READ 0
-#define G_STAT_IDX_WRITE 2
-#define G_STAT_IDX_DELETE 3
-#define G_STAT_IDX_GETATTR 4
-#define G_STAT_IDX_SETATTR 5
+#define G_STAT_IDX_WRITE 1
+#define G_STAT_IDX_DELETE 2
};
/*
@@ -173,8 +171,6 @@ struct g_consumer {
LIST_ENTRY(g_consumer) consumers; /* XXX: better name */
int acr, acw, ace;
struct g_event *event;
-
- int biocount;
int spoiled;
struct g_stat stat;
};
diff --git a/sys/geom/geom_dev.c b/sys/geom/geom_dev.c
index 99f4cc7..24234b9 100644
--- a/sys/geom/geom_dev.c
+++ b/sys/geom/geom_dev.c
@@ -430,7 +430,7 @@ g_dev_orphan(struct g_consumer *cp)
gp = cp->geom;
g_trace(G_T_TOPOLOGY, "g_dev_orphan(%p(%s))", cp, gp->name);
g_topology_assert();
- if (cp->biocount > 0)
+ if (cp->stat.nop != cp->stat.nend) /* XXX ? */
return;
dev = gp->softc;
if (dev->si_flags & SI_DUMPDEV)
diff --git a/sys/geom/geom_dump.c b/sys/geom/geom_dump.c
index f65803c..561ae51 100644
--- a/sys/geom/geom_dump.c
+++ b/sys/geom/geom_dump.c
@@ -59,8 +59,8 @@ static void
g_confdot_consumer(struct sbuf *sb, struct g_consumer *cp)
{
- sbuf_printf(sb, "z%p [label=\"r%dw%de%d\\nbio #%d\"];\n",
- cp, cp->acr, cp->acw, cp->ace, cp->biocount);
+ sbuf_printf(sb, "z%p [label=\"r%dw%de%d\"];\n",
+ cp, cp->acr, cp->acw, cp->ace);
if (cp->provider)
sbuf_printf(sb, "z%p -> z%p;\n", cp, cp->provider);
}
diff --git a/sys/geom/geom_int.h b/sys/geom/geom_int.h
index 3224a2d..5962c86 100644
--- a/sys/geom/geom_int.h
+++ b/sys/geom/geom_int.h
@@ -40,6 +40,7 @@ TAILQ_HEAD(g_tailq_head, g_geom);
TAILQ_HEAD(event_tailq_head, g_event);
extern struct event_tailq_head events;
+extern int g_collectstats;
extern int g_debugflags;
/* 1 G_T_TOPOLOGY */
/* 2 G_T_BIO */
diff --git a/sys/geom/geom_io.c b/sys/geom/geom_io.c
index 9a6d215..842595a 100644
--- a/sys/geom/geom_io.c
+++ b/sys/geom/geom_io.c
@@ -154,7 +154,7 @@ g_clone_bio(struct bio *bp)
bp2->bio_offset = bp->bio_offset;
bp2->bio_data = bp->bio_data;
bp2->bio_attribute = bp->bio_attribute;
- bp->bio_children++; /* XXX: atomic ? */
+ bp->bio_children++;
}
/* g_trace(G_T_BIO, "g_clone_bio(%p) = %p", bp, bp2); */
return(bp2);
@@ -261,24 +261,46 @@ g_io_check(struct bio *bp)
void
g_io_request(struct bio *bp, struct g_consumer *cp)
{
+ struct g_provider *pp;
+ struct bintime bt;
+ pp = cp->provider;
KASSERT(cp != NULL, ("NULL cp in g_io_request"));
KASSERT(bp != NULL, ("NULL bp in g_io_request"));
KASSERT(bp->bio_data != NULL, ("NULL bp->data in g_io_request"));
- KASSERT(bp->bio_to == NULL, ("consumer not attached in g_io_request"));
+ KASSERT(pp != NULL, ("consumer not attached in g_io_request"));
+
bp->bio_from = cp;
- bp->bio_to = cp->provider;
+ bp->bio_to = pp;
bp->bio_error = 0;
bp->bio_completed = 0;
- /* begin_stats(&bp->stats); */
+ if (g_collectstats) {
+ /* Collect statistics */
+ binuptime(&bp->bio_t0);
+ if (cp->stat.nop == cp->stat.nend) {
+ /* Consumer is idle */
+ bt = bp->bio_t0;
+ bintime_sub(&bt, &cp->stat.wentidle);
+ bintime_add(&cp->stat.it, &bt);
+ if (pp->stat.nop == pp->stat.nend) {
+ /*
+ * NB: Provider can only be idle if the
+ * consumer is but we cannot trust them
+ * to have gone idle at the same time.
+ */
+ bt = bp->bio_t0;
+ bintime_sub(&bt, &pp->stat.wentidle);
+ bintime_add(&pp->stat.it, &bt);
+ }
+ }
+ }
+ cp->stat.nop++;
+ pp->stat.nop++;
- atomic_add_int(&cp->biocount, 1);
-
/* Pass it on down. */
g_trace(G_T_BIO, "bio_request(%p) from %p(%s) to %p(%s) cmd %d",
- bp, bp->bio_from, bp->bio_from->geom->name,
- bp->bio_to, bp->bio_to->name, bp->bio_cmd);
+ bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd);
g_bioq_enqueue_tail(bp, &g_bio_run_down);
wakeup(&g_wait_down);
}
@@ -286,32 +308,69 @@ g_io_request(struct bio *bp, struct g_consumer *cp)
void
g_io_deliver(struct bio *bp, int error)
{
+ struct g_consumer *cp;
+ struct g_provider *pp;
+ struct bintime t1;
+ int idx;
+ cp = bp->bio_from;
+ pp = bp->bio_to;
KASSERT(bp != NULL, ("NULL bp in g_io_deliver"));
- KASSERT(bp->bio_from != NULL, ("NULL bio_from in g_io_deliver"));
- KASSERT(bp->bio_from->geom != NULL,
- ("NULL bio_from->geom in g_io_deliver"));
- KASSERT(bp->bio_to != NULL, ("NULL bio_to in g_io_deliver"));
+ KASSERT(cp != NULL, ("NULL bio_from in g_io_deliver"));
+ KASSERT(cp->geom != NULL, ("NULL bio_from->geom in g_io_deliver"));
+ KASSERT(pp != NULL, ("NULL bio_to in g_io_deliver"));
g_trace(G_T_BIO,
"g_io_deliver(%p) from %p(%s) to %p(%s) cmd %d error %d off %jd len %jd",
- bp, bp->bio_from, bp->bio_from->geom->name,
- bp->bio_to, bp->bio_to->name, bp->bio_cmd, error,
+ bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd, error,
(intmax_t)bp->bio_offset, (intmax_t)bp->bio_length);
- /* finish_stats(&bp->stats); */
+
+ switch (bp->bio_cmd) {
+ case BIO_READ: idx = G_STAT_IDX_READ; break;
+ case BIO_WRITE: idx = G_STAT_IDX_WRITE; break;
+ case BIO_DELETE: idx = G_STAT_IDX_DELETE; break;
+ case BIO_GETATTR: idx = -1; break;
+ case BIO_SETATTR: idx = -1; break;
+ default:
+ panic("unknown bio_cmd in g_io_deliver");
+ break;
+ }
+
+ /* Collect statistics */
+ if (g_collectstats) {
+ binuptime(&t1);
+ pp->stat.wentidle = t1;
+ cp->stat.wentidle = t1;
+
+ if (idx >= 0) {
+ bintime_sub(&t1, &bp->bio_t0);
+ bintime_add(&cp->stat.ops[idx].dt, &t1);
+ bintime_add(&pp->stat.ops[idx].dt, &t1);
+ pp->stat.ops[idx].nbyte += bp->bio_completed;
+ cp->stat.ops[idx].nbyte += bp->bio_completed;
+ pp->stat.ops[idx].nop++;
+ cp->stat.ops[idx].nop++;
+ if (error == ENOMEM) {
+ cp->stat.ops[idx].nmem++;
+ pp->stat.ops[idx].nmem++;
+ } else if (error != 0) {
+ cp->stat.ops[idx].nerr++;
+ pp->stat.ops[idx].nerr++;
+ }
+ }
+ }
+
+ pp->stat.nend++; /* In reverse order of g_io_request() */
+ cp->stat.nend++;
if (error == ENOMEM) {
- printf("ENOMEM %p on %p(%s)\n",
- bp, bp->bio_to, bp->bio_to->name);
- g_io_request(bp, bp->bio_from);
+ printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name);
+ g_io_request(bp, cp);
pace++;
return;
}
-
bp->bio_error = error;
-
g_bioq_enqueue_tail(bp, &g_bio_run_up);
-
wakeup(&g_wait_up);
}
@@ -362,8 +421,6 @@ g_io_schedule_up(struct thread *tp __unused)
break;
cp = bp->bio_from;
-
- atomic_add_int(&cp->biocount, -1);
biodone(bp);
}
}
diff --git a/sys/geom/geom_kern.c b/sys/geom/geom_kern.c
index fb9a249..a885c28 100644
--- a/sys/geom/geom_kern.c
+++ b/sys/geom/geom_kern.c
@@ -57,6 +57,7 @@ struct sx topology_lock;
static struct proc *g_up_proc;
int g_debugflags;
+int g_collectstats;
/*
* G_UP and G_DOWN are the two threads which push I/O through the
@@ -230,6 +231,9 @@ SYSCTL_PROC(_kern_geom, OID_AUTO, conftxt, CTLTYPE_STRING|CTLFLAG_RD,
SYSCTL_INT(_kern_geom, OID_AUTO, debugflags, CTLFLAG_RW,
&g_debugflags, 0, "");
+SYSCTL_INT(_kern_geom, OID_AUTO, collectstats, CTLFLAG_RW,
+ &g_collectstats, 0, "");
+
SYSCTL_INT(_debug_sizeof, OID_AUTO, g_class, CTLFLAG_RD,
0, sizeof(struct g_class), "");
SYSCTL_INT(_debug_sizeof, OID_AUTO, g_geom, CTLFLAG_RD,
@@ -242,3 +246,5 @@ SYSCTL_INT(_debug_sizeof, OID_AUTO, g_bioq, CTLFLAG_RD,
0, sizeof(struct g_bioq), "");
SYSCTL_INT(_debug_sizeof, OID_AUTO, g_event, CTLFLAG_RD,
0, sizeof(struct g_event), "");
+SYSCTL_INT(_debug_sizeof, OID_AUTO, g_stat, CTLFLAG_RD,
+ 0, sizeof(struct g_stat), "");
diff --git a/sys/geom/geom_subr.c b/sys/geom/geom_subr.c
index 0d28978..d8b3054 100644
--- a/sys/geom/geom_subr.c
+++ b/sys/geom/geom_subr.c
@@ -145,6 +145,7 @@ g_new_consumer(struct g_geom *gp)
cp = g_malloc(sizeof *cp, M_ZERO);
cp->protect = 0x020016602;
cp->geom = gp;
+ cp->stat.id = cp;
LIST_INSERT_HEAD(&gp->consumer, cp, consumer);
return(cp);
}
@@ -184,6 +185,7 @@ g_new_providerf(struct g_geom *gp, const char *fmt, ...)
LIST_INIT(&pp->consumers);
pp->error = ENXIO;
pp->geom = gp;
+ pp->stat.id = pp;
LIST_INSERT_HEAD(&gp->provider, pp, provider);
g_nproviders++;
g_post_event(EV_NEW_PROVIDER, NULL, NULL, pp, NULL);
@@ -327,7 +329,7 @@ g_detach(struct g_consumer *cp)
KASSERT(cp->acr == 0, ("detach but nonzero acr"));
KASSERT(cp->acw == 0, ("detach but nonzero acw"));
KASSERT(cp->ace == 0, ("detach but nonzero ace"));
- KASSERT(cp->biocount == 0, ("detach but nonzero biocount"));
+ KASSERT(cp->stat.nop == cp->stat.nend, ("detach with active requests"));
pp = cp->provider;
LIST_REMOVE(cp, consumers);
cp->provider = NULL;
@@ -499,8 +501,8 @@ g_std_done(struct bio *bp)
bp2->bio_error = bp->bio_error;
bp2->bio_completed += bp->bio_completed;
g_destroy_bio(bp);
- bp2->bio_children--; /* XXX: atomic ? */
- if (bp2->bio_children == 0)
+ bp2->bio_inbed++;
+ if (bp2->bio_children == bp2->bio_inbed)
g_io_deliver(bp2, bp2->bio_error);
}
diff --git a/sys/geom/notes b/sys/geom/notes
new file mode 100644
index 0000000..88e0f52
--- /dev/null
+++ b/sys/geom/notes
@@ -0,0 +1,40 @@
+$FreeBSD$
+
+For the lack of a better place to put them, this file will contain
+notes on some of the more intricate details of geom.
+
+-----------------------------------------------------------------------
+Locking of bio_children and bio_inbed
+
+bio_children is used by g_std_done() and g_clone_bio() to keep track
+of children cloned off a request. g_clone_bio will increment the
+bio_children counter for each time it is called and g_std_done will
+increment bio_inbed for every call, and if the two counters are
+equal, call g_io_deliver() on the parent bio.
+
+The general assumption is that g_clone_bio() is called only in
+the g_down thread, and g_std_done() only in the g_up thread and
+therefore the two fields do not generally need locking. These
+restrictions are not enforced by the code, but only with great
+care should they be violated.
+
+It is the responsibility of the class implementation to avoid the
+following race condition: A class intend to split a bio in two
+children. It clones the bio, and requests I/O on the child.
+This I/O operation completes before the second child is cloned
+and g_std_done() sees the counters both equal 1 and finishes off
+the bio.
+
+There is no race present in the common case where the bio is split
+in multiple parts in the class start method and the I/O is requested
+on another GEOM class below: There is only one g_down thread and
+the class below will not get its start method run until we return
+from our start method, and consequently the I/O cannot complete
+prematurely.
+
+In all other cases, this race needs to be mitigated, for instance
+by cloning all children before I/O is request on any of them.
+
+Notice that cloning an "extra" child and calling g_std_done() on
+it directly opens another race since the assumption is that
+g_std_done() only is called in the g_up thread.
diff --git a/sys/sys/bio.h b/sys/sys/bio.h
index 2ef28bc..3169593 100644
--- a/sys/sys/bio.h
+++ b/sys/sys/bio.h
@@ -71,6 +71,7 @@ struct bio {
off_t bio_length; /* Like bio_bcount */
off_t bio_completed; /* Inverse of bio_resid */
u_int bio_children; /* Number of spawned bios */
+ u_int bio_inbed; /* Children safely home by now */
struct bio *bio_parent; /* Pointer to parent */
struct bintime bio_t0; /* Time request started */
OpenPOWER on IntegriCloud