diff options
author | scottl <scottl@FreeBSD.org> | 2014-01-07 01:32:23 +0000 |
---|---|---|
committer | scottl <scottl@FreeBSD.org> | 2014-01-07 01:32:23 +0000 |
commit | 0a34594b9cd7c8b87f719ed058da6be2b756a8e5 (patch) | |
tree | 9702de6a6a50f2bb1a6829d66c26686ca7a160cc | |
parent | 1bce546983c144fd6d05af45e88abd3186b87b1b (diff) | |
download | FreeBSD-src-0a34594b9cd7c8b87f719ed058da6be2b756a8e5.zip FreeBSD-src-0a34594b9cd7c8b87f719ed058da6be2b756a8e5.tar.gz |
MFC Alexander Motin's GEOM direct dispatch work:
r256603:
Introduce new function devstat_end_transaction_bio_bt(), adding new argument
to specify present time. Use this function to move binuptime() out of lock,
substantially reducing lock congestion when slow timecounter is used.
r256606:
Move g_io_deliver() out of the lock, as required for direct dispatch.
Move g_destroy_bio() out too to reduce lock scope even more.
r256607:
Fix passing uninitialized bio_resid argument to g_trace().
r256610:
Add unmapped I/O support to GEOM RAID.
r256830:
Restore BIO_UNMAPPED and BIO_TRANSIENT_MAPPING in biodonne() when unmapping
temporary mapped buffer. That fixes double unmap if biodone() called twice
for the same BIO (but with different done methods).
r256880:
Merge GEOM direct dispatch changes from the projects/camlock branch.
When safety requirements are met, it allows to avoid passing I/O requests
to GEOM g_up/g_down thread, executing them directly in the caller context.
That allows to avoid CPU bottlenecks in g_up/g_down threads, plus avoid
several context switches per I/O.
r259247:
Fix bug introduced at r256607. We have to recalculate bp_resid here since
sizes of original and completed requests may differ due to end of media.
Testing of the stable/10 merge was done by Netflix, but all of the credit
goes to Alexander and iX Systems.
Submitted by: mav
Sponsored by: iX Systems
43 files changed, 586 insertions, 295 deletions
diff --git a/sys/cam/ata/ata_da.c b/sys/cam/ata/ata_da.c index 062e805..e476a3e 100644 --- a/sys/cam/ata/ata_da.c +++ b/sys/cam/ata/ata_da.c @@ -1254,7 +1254,7 @@ adaregister(struct cam_periph *periph, void *arg) maxio = min(maxio, 256 * softc->params.secsize); softc->disk->d_maxsize = maxio; softc->disk->d_unit = periph->unit_number; - softc->disk->d_flags = 0; + softc->disk->d_flags = DISKFLAG_DIRECT_COMPLETION; if (softc->flags & ADA_FLAG_CAN_FLUSHCACHE) softc->disk->d_flags |= DISKFLAG_CANFLUSHCACHE; if (softc->flags & ADA_FLAG_CAN_TRIM) { diff --git a/sys/cam/scsi/scsi_da.c b/sys/cam/scsi/scsi_da.c index 1ba7382..59a332c 100644 --- a/sys/cam/scsi/scsi_da.c +++ b/sys/cam/scsi/scsi_da.c @@ -2133,7 +2133,7 @@ daregister(struct cam_periph *periph, void *arg) else softc->disk->d_maxsize = cpi.maxio; softc->disk->d_unit = periph->unit_number; - softc->disk->d_flags = 0; + softc->disk->d_flags = DISKFLAG_DIRECT_COMPLETION; if ((softc->quirks & DA_Q_NO_SYNC_CACHE) == 0) softc->disk->d_flags |= DISKFLAG_CANFLUSHCACHE; if ((cpi.hba_misc & PIM_UNMAPPED) != 0) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c index 72401d2..741a60b 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c @@ -147,6 +147,7 @@ vdev_geom_attach(struct g_provider *pp) ZFS_LOG(1, "Used existing consumer for %s.", pp->name); } } + cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; return (cp); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c index 72d4502..753927d 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c @@ -2153,6 +2153,7 @@ zvol_geom_create(const char *name) gp->start = zvol_geom_start; gp->access = zvol_geom_access; pp = g_new_providerf(gp, "%s/%s", ZVOL_DRIVER, name); + pp->flags |= G_PF_DIRECT_RECEIVE | G_PF_DIRECT_SEND; pp->sectorsize = DEV_BSIZE; zv = kmem_zalloc(sizeof(*zv), KM_SLEEP); @@ -2256,18 +2257,20 @@ zvol_geom_start(struct bio *bp) zvol_state_t *zv; boolean_t first; + zv = bp->bio_to->private; + ASSERT(zv != NULL); switch (bp->bio_cmd) { + case BIO_FLUSH: + if (!THREAD_CAN_SLEEP()) + goto enqueue; + zil_commit(zv->zv_zilog, ZVOL_OBJ); + g_io_deliver(bp, 0); + break; case BIO_READ: case BIO_WRITE: - case BIO_FLUSH: - zv = bp->bio_to->private; - ASSERT(zv != NULL); - mtx_lock(&zv->zv_queue_mtx); - first = (bioq_first(&zv->zv_queue) == NULL); - bioq_insert_tail(&zv->zv_queue, bp); - mtx_unlock(&zv->zv_queue_mtx); - if (first) - wakeup_one(&zv->zv_queue); + if (!THREAD_CAN_SLEEP()) + goto enqueue; + zvol_strategy(bp); break; case BIO_GETATTR: case BIO_DELETE: @@ -2275,6 +2278,15 @@ zvol_geom_start(struct bio *bp) g_io_deliver(bp, EOPNOTSUPP); break; } + return; + +enqueue: + mtx_lock(&zv->zv_queue_mtx); + first = (bioq_first(&zv->zv_queue) == NULL); + bioq_insert_tail(&zv->zv_queue, bp); + mtx_unlock(&zv->zv_queue_mtx); + if (first) + wakeup_one(&zv->zv_queue); } static void @@ -2449,6 +2461,7 @@ zvol_rename_minor(struct g_geom *gp, const char *newname) g_wither_provider(pp, ENXIO); pp = g_new_providerf(gp, "%s/%s", ZVOL_DRIVER, newname); + pp->flags |= G_PF_DIRECT_RECEIVE | G_PF_DIRECT_SEND; pp->sectorsize = DEV_BSIZE; pp->mediasize = zv->zv_volsize; pp->private = zv; diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c index f0d1aec..8ae51d1 100644 --- a/sys/dev/md/md.c +++ b/sys/dev/md/md.c @@ -189,6 +189,7 @@ struct md_s { LIST_ENTRY(md_s) list; struct bio_queue_head bio_queue; struct mtx queue_mtx; + struct mtx stat_mtx; struct cdev *dev; enum md_types type; off_t mediasize; @@ -415,8 +416,11 @@ g_md_start(struct bio *bp) struct md_s *sc; sc = bp->bio_to->geom->softc; - if ((bp->bio_cmd == BIO_READ) || (bp->bio_cmd == BIO_WRITE)) + if ((bp->bio_cmd == BIO_READ) || (bp->bio_cmd == BIO_WRITE)) { + mtx_lock(&sc->stat_mtx); devstat_start_transaction_bio(sc->devstat, bp); + mtx_unlock(&sc->stat_mtx); + } mtx_lock(&sc->queue_mtx); bioq_disksort(&sc->bio_queue, bp); mtx_unlock(&sc->queue_mtx); @@ -987,6 +991,7 @@ mdnew(int unit, int *errp, enum md_types type) sc->type = type; bioq_init(&sc->bio_queue); mtx_init(&sc->queue_mtx, "md bio queue", NULL, MTX_DEF); + mtx_init(&sc->stat_mtx, "md stat", NULL, MTX_DEF); sc->unit = unit; sprintf(sc->name, "md%d", unit); LIST_INSERT_HEAD(&md_softc_list, sc, list); @@ -994,6 +999,7 @@ mdnew(int unit, int *errp, enum md_types type) if (error == 0) return (sc); LIST_REMOVE(sc, list); + mtx_destroy(&sc->stat_mtx); mtx_destroy(&sc->queue_mtx); free_unr(md_uh, sc->unit); free(sc, M_MD); @@ -1011,6 +1017,7 @@ mdinit(struct md_s *sc) gp = g_new_geomf(&g_md_class, "md%d", sc->unit); gp->softc = sc; pp = g_new_providerf(gp, "md%d", sc->unit); + pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; pp->mediasize = sc->mediasize; pp->sectorsize = sc->sectorsize; switch (sc->type) { @@ -1206,6 +1213,7 @@ mddestroy(struct md_s *sc, struct thread *td) while (!(sc->flags & MD_EXITING)) msleep(sc->procp, &sc->queue_mtx, PRIBIO, "mddestroy", hz / 10); mtx_unlock(&sc->queue_mtx); + mtx_destroy(&sc->stat_mtx); mtx_destroy(&sc->queue_mtx); if (sc->vnode != NULL) { vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY); diff --git a/sys/geom/concat/g_concat.c b/sys/geom/concat/g_concat.c index 9143166..2efc1b5 100644 --- a/sys/geom/concat/g_concat.c +++ b/sys/geom/concat/g_concat.c @@ -239,6 +239,27 @@ g_concat_kernel_dump(struct bio *bp) } static void +g_concat_done(struct bio *bp) +{ + struct g_concat_softc *sc; + struct bio *pbp; + + pbp = bp->bio_parent; + sc = pbp->bio_to->geom->softc; + mtx_lock(&sc->sc_lock); + if (pbp->bio_error == 0) + pbp->bio_error = bp->bio_error; + pbp->bio_completed += bp->bio_completed; + pbp->bio_inbed++; + if (pbp->bio_children == pbp->bio_inbed) { + mtx_unlock(&sc->sc_lock); + g_io_deliver(pbp, pbp->bio_error); + } else + mtx_unlock(&sc->sc_lock); + g_destroy_bio(bp); +} + +static void g_concat_flush(struct g_concat_softc *sc, struct bio *bp) { struct bio_queue_head queue; @@ -250,23 +271,19 @@ g_concat_flush(struct g_concat_softc *sc, struct bio *bp) for (no = 0; no < sc->sc_ndisks; no++) { cbp = g_clone_bio(bp); if (cbp == NULL) { - for (cbp = bioq_first(&queue); cbp != NULL; - cbp = bioq_first(&queue)) { - bioq_remove(&queue, cbp); + while ((cbp = bioq_takefirst(&queue)) != NULL) g_destroy_bio(cbp); - } if (bp->bio_error == 0) bp->bio_error = ENOMEM; g_io_deliver(bp, bp->bio_error); return; } bioq_insert_tail(&queue, cbp); - cbp->bio_done = g_std_done; + cbp->bio_done = g_concat_done; cbp->bio_caller1 = sc->sc_disks[no].d_consumer; cbp->bio_to = sc->sc_disks[no].d_consumer->provider; } - for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) { - bioq_remove(&queue, cbp); + while ((cbp = bioq_takefirst(&queue)) != NULL) { G_CONCAT_LOGREQ(cbp, "Sending request."); cp = cbp->bio_caller1; cbp->bio_caller1 = NULL; @@ -320,7 +337,10 @@ g_concat_start(struct bio *bp) offset = bp->bio_offset; length = bp->bio_length; - addr = bp->bio_data; + if ((bp->bio_flags & BIO_UNMAPPED) != 0) + addr = NULL; + else + addr = bp->bio_data; end = offset + length; bioq_init(&queue); @@ -338,11 +358,8 @@ g_concat_start(struct bio *bp) cbp = g_clone_bio(bp); if (cbp == NULL) { - for (cbp = bioq_first(&queue); cbp != NULL; - cbp = bioq_first(&queue)) { - bioq_remove(&queue, cbp); + while ((cbp = bioq_takefirst(&queue)) != NULL) g_destroy_bio(cbp); - } if (bp->bio_error == 0) bp->bio_error = ENOMEM; g_io_deliver(bp, bp->bio_error); @@ -352,11 +369,21 @@ g_concat_start(struct bio *bp) /* * Fill in the component buf structure. */ - cbp->bio_done = g_std_done; + if (len == bp->bio_length) + cbp->bio_done = g_std_done; + else + cbp->bio_done = g_concat_done; cbp->bio_offset = off; - cbp->bio_data = addr; - addr += len; cbp->bio_length = len; + if ((bp->bio_flags & BIO_UNMAPPED) != 0) { + cbp->bio_ma_offset += (uintptr_t)addr; + cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE; + cbp->bio_ma_offset %= PAGE_SIZE; + cbp->bio_ma_n = round_page(cbp->bio_ma_offset + + cbp->bio_length) / PAGE_SIZE; + } else + cbp->bio_data = addr; + addr += len; cbp->bio_to = disk->d_consumer->provider; cbp->bio_caller1 = disk; @@ -366,8 +393,7 @@ g_concat_start(struct bio *bp) KASSERT(length == 0, ("Length is still greater than 0 (class=%s, name=%s).", bp->bio_to->geom->class->name, bp->bio_to->geom->name)); - for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) { - bioq_remove(&queue, cbp); + while ((cbp = bioq_takefirst(&queue)) != NULL) { G_CONCAT_LOGREQ(cbp, "Sending request."); disk = cbp->bio_caller1; cbp->bio_caller1 = NULL; @@ -379,7 +405,7 @@ static void g_concat_check_and_run(struct g_concat_softc *sc) { struct g_concat_disk *disk; - struct g_provider *pp; + struct g_provider *dp, *pp; u_int no, sectorsize = 0; off_t start; @@ -388,20 +414,27 @@ g_concat_check_and_run(struct g_concat_softc *sc) return; pp = g_new_providerf(sc->sc_geom, "concat/%s", sc->sc_name); + pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE | + G_PF_ACCEPT_UNMAPPED; start = 0; for (no = 0; no < sc->sc_ndisks; no++) { disk = &sc->sc_disks[no]; + dp = disk->d_consumer->provider; disk->d_start = start; - disk->d_end = disk->d_start + - disk->d_consumer->provider->mediasize; + disk->d_end = disk->d_start + dp->mediasize; if (sc->sc_type == G_CONCAT_TYPE_AUTOMATIC) - disk->d_end -= disk->d_consumer->provider->sectorsize; + disk->d_end -= dp->sectorsize; start = disk->d_end; if (no == 0) - sectorsize = disk->d_consumer->provider->sectorsize; - else { - sectorsize = lcm(sectorsize, - disk->d_consumer->provider->sectorsize); + sectorsize = dp->sectorsize; + else + sectorsize = lcm(sectorsize, dp->sectorsize); + + /* A provider underneath us doesn't support unmapped */ + if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) { + G_CONCAT_DEBUG(1, "Cancelling unmapped " + "because of %s.", dp->name); + pp->flags &= ~G_PF_ACCEPT_UNMAPPED; } } pp->sectorsize = sectorsize; @@ -468,6 +501,7 @@ g_concat_add_disk(struct g_concat_softc *sc, struct g_provider *pp, u_int no) fcp = LIST_FIRST(&gp->consumer); cp = g_new_consumer(gp); + cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, pp); if (error != 0) { g_destroy_consumer(cp); @@ -557,6 +591,7 @@ g_concat_create(struct g_class *mp, const struct g_concat_metadata *md, for (no = 0; no < sc->sc_ndisks; no++) sc->sc_disks[no].d_consumer = NULL; sc->sc_type = type; + mtx_init(&sc->sc_lock, "gconcat lock", NULL, MTX_DEF); gp->softc = sc; sc->sc_geom = gp; @@ -605,6 +640,7 @@ g_concat_destroy(struct g_concat_softc *sc, boolean_t force) KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)", gp->name)); free(sc->sc_disks, M_CONCAT); + mtx_destroy(&sc->sc_lock); free(sc, M_CONCAT); G_CONCAT_DEBUG(0, "Device %s destroyed.", gp->name); diff --git a/sys/geom/concat/g_concat.h b/sys/geom/concat/g_concat.h index 1c1e6f5..c2ea366 100644 --- a/sys/geom/concat/g_concat.h +++ b/sys/geom/concat/g_concat.h @@ -83,6 +83,7 @@ struct g_concat_softc { struct g_concat_disk *sc_disks; uint16_t sc_ndisks; + struct mtx sc_lock; }; #define sc_name sc_geom->name #endif /* _KERNEL */ diff --git a/sys/geom/gate/g_gate.c b/sys/geom/gate/g_gate.c index 0727ecd..eed4abb 100644 --- a/sys/geom/gate/g_gate.c +++ b/sys/geom/gate/g_gate.c @@ -91,6 +91,7 @@ static struct mtx g_gate_units_lock; static int g_gate_destroy(struct g_gate_softc *sc, boolean_t force) { + struct bio_queue_head queue; struct g_provider *pp; struct g_consumer *cp; struct g_geom *gp; @@ -113,21 +114,22 @@ g_gate_destroy(struct g_gate_softc *sc, boolean_t force) pp->flags |= G_PF_WITHER; g_orphan_provider(pp, ENXIO); callout_drain(&sc->sc_callout); + bioq_init(&queue); mtx_lock(&sc->sc_queue_mtx); - while ((bp = bioq_first(&sc->sc_inqueue)) != NULL) { - bioq_remove(&sc->sc_inqueue, bp); + while ((bp = bioq_takefirst(&sc->sc_inqueue)) != NULL) { sc->sc_queue_count--; - G_GATE_LOGREQ(1, bp, "Request canceled."); - g_io_deliver(bp, ENXIO); + bioq_insert_tail(&queue, bp); } - while ((bp = bioq_first(&sc->sc_outqueue)) != NULL) { - bioq_remove(&sc->sc_outqueue, bp); + while ((bp = bioq_takefirst(&sc->sc_outqueue)) != NULL) { sc->sc_queue_count--; - G_GATE_LOGREQ(1, bp, "Request canceled."); - g_io_deliver(bp, ENXIO); + bioq_insert_tail(&queue, bp); } mtx_unlock(&sc->sc_queue_mtx); g_topology_unlock(); + while ((bp = bioq_takefirst(&queue)) != NULL) { + G_GATE_LOGREQ(1, bp, "Request canceled."); + g_io_deliver(bp, ENXIO); + } mtx_lock(&g_gate_units_lock); /* One reference is ours. */ sc->sc_ref--; @@ -334,6 +336,7 @@ g_gate_getunit(int unit, int *errorp) static void g_gate_guard(void *arg) { + struct bio_queue_head queue; struct g_gate_softc *sc; struct bintime curtime; struct bio *bp, *bp2; @@ -341,24 +344,27 @@ g_gate_guard(void *arg) sc = arg; binuptime(&curtime); g_gate_hold(sc->sc_unit, NULL); + bioq_init(&queue); mtx_lock(&sc->sc_queue_mtx); TAILQ_FOREACH_SAFE(bp, &sc->sc_inqueue.queue, bio_queue, bp2) { if (curtime.sec - bp->bio_t0.sec < 5) continue; bioq_remove(&sc->sc_inqueue, bp); sc->sc_queue_count--; - G_GATE_LOGREQ(1, bp, "Request timeout."); - g_io_deliver(bp, EIO); + bioq_insert_tail(&queue, bp); } TAILQ_FOREACH_SAFE(bp, &sc->sc_outqueue.queue, bio_queue, bp2) { if (curtime.sec - bp->bio_t0.sec < 5) continue; bioq_remove(&sc->sc_outqueue, bp); sc->sc_queue_count--; + bioq_insert_tail(&queue, bp); + } + mtx_unlock(&sc->sc_queue_mtx); + while ((bp = bioq_takefirst(&queue)) != NULL) { G_GATE_LOGREQ(1, bp, "Request timeout."); g_io_deliver(bp, EIO); } - mtx_unlock(&sc->sc_queue_mtx); if ((sc->sc_flags & G_GATE_FLAG_DESTROY) == 0) { callout_reset(&sc->sc_callout, sc->sc_timeout * hz, g_gate_guard, sc); @@ -542,6 +548,7 @@ g_gate_create(struct g_gate_ctl_create *ggio) if (ropp != NULL) { cp = g_new_consumer(gp); + cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, ropp); if (error != 0) { G_GATE_DEBUG(1, "Unable to attach to %s.", ropp->name); @@ -560,6 +567,7 @@ g_gate_create(struct g_gate_ctl_create *ggio) ggio->gctl_unit = sc->sc_unit; pp = g_new_providerf(gp, "%s", name); + pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; pp->mediasize = ggio->gctl_mediasize; pp->sectorsize = ggio->gctl_sectorsize; sc->sc_provider = pp; @@ -636,6 +644,7 @@ g_gate_modify(struct g_gate_softc *sc, struct g_gate_ctl_modify *ggio) return (EINVAL); } cp = g_new_consumer(sc->sc_provider->geom); + cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, pp); if (error != 0) { G_GATE_DEBUG(1, "Unable to attach to %s.", diff --git a/sys/geom/geom.h b/sys/geom/geom.h index 660bf6e..1c1fdb03 100644 --- a/sys/geom/geom.h +++ b/sys/geom/geom.h @@ -177,6 +177,8 @@ struct g_consumer { int flags; #define G_CF_SPOILED 0x1 #define G_CF_ORPHAN 0x4 +#define G_CF_DIRECT_SEND 0x10 +#define G_CF_DIRECT_RECEIVE 0x20 struct devstat *stat; u_int nstart, nend; @@ -206,6 +208,8 @@ struct g_provider { #define G_PF_WITHER 0x2 #define G_PF_ORPHAN 0x4 #define G_PF_ACCEPT_UNMAPPED 0x8 +#define G_PF_DIRECT_SEND 0x10 +#define G_PF_DIRECT_RECEIVE 0x20 /* Two fields for the implementing class to use */ void *private; @@ -393,6 +397,8 @@ g_free(void *ptr) }; \ DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_FIRST); +int g_is_geom_thread(struct thread *td); + #endif /* _KERNEL */ /* geom_ctl.c */ diff --git a/sys/geom/geom_dev.c b/sys/geom/geom_dev.c index 17f24f8..2c113e6 100644 --- a/sys/geom/geom_dev.c +++ b/sys/geom/geom_dev.c @@ -222,6 +222,7 @@ g_dev_taste(struct g_class *mp, struct g_provider *pp, int insist __unused) mtx_init(&sc->sc_mtx, "g_dev", NULL, MTX_DEF); cp = g_new_consumer(gp); cp->private = sc; + cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, pp); KASSERT(error == 0, ("g_dev_taste(%s) failed to g_attach, err=%d", pp->name, error)); @@ -485,16 +486,16 @@ g_dev_done(struct bio *bp2) sc = cp->private; bp = bp2->bio_parent; bp->bio_error = bp2->bio_error; - if (bp->bio_error != 0) { + bp->bio_completed = bp2->bio_completed; + bp->bio_resid = bp->bio_length - bp2->bio_completed; + if (bp2->bio_error != 0) { g_trace(G_T_BIO, "g_dev_done(%p) had error %d", - bp2, bp->bio_error); + bp2, bp2->bio_error); bp->bio_flags |= BIO_ERROR; } else { g_trace(G_T_BIO, "g_dev_done(%p/%p) resid %ld completed %jd", - bp2, bp, bp->bio_resid, (intmax_t)bp2->bio_completed); + bp2, bp, bp2->bio_resid, (intmax_t)bp2->bio_completed); } - bp->bio_resid = bp->bio_length - bp2->bio_completed; - bp->bio_completed = bp2->bio_completed; g_destroy_bio(bp2); destroy = 0; mtx_lock(&sc->sc_mtx); diff --git a/sys/geom/geom_disk.c b/sys/geom/geom_disk.c index 732b35d..e0b9776 100644 --- a/sys/geom/geom_disk.c +++ b/sys/geom/geom_disk.c @@ -66,6 +66,7 @@ struct g_disk_softc { struct sysctl_oid *sysctl_tree; char led[64]; uint32_t state; + struct mtx start_mtx; }; static g_access_t g_disk_access; @@ -229,6 +230,7 @@ g_disk_setstate(struct bio *bp, struct g_disk_softc *sc) static void g_disk_done(struct bio *bp) { + struct bintime now; struct bio *bp2; struct g_disk_softc *sc; @@ -237,19 +239,40 @@ g_disk_done(struct bio *bp) bp2 = bp->bio_parent; sc = bp2->bio_to->private; bp->bio_completed = bp->bio_length - bp->bio_resid; + binuptime(&now); mtx_lock(&sc->done_mtx); if (bp2->bio_error == 0) bp2->bio_error = bp->bio_error; bp2->bio_completed += bp->bio_completed; if ((bp->bio_cmd & (BIO_READ|BIO_WRITE|BIO_DELETE)) != 0) - devstat_end_transaction_bio(sc->dp->d_devstat, bp); - g_destroy_bio(bp); + devstat_end_transaction_bio_bt(sc->dp->d_devstat, bp, &now); bp2->bio_inbed++; if (bp2->bio_children == bp2->bio_inbed) { + mtx_unlock(&sc->done_mtx); bp2->bio_resid = bp2->bio_bcount - bp2->bio_completed; g_io_deliver(bp2, bp2->bio_error); + } else + mtx_unlock(&sc->done_mtx); + g_destroy_bio(bp); +} + +static void +g_disk_done_single(struct bio *bp) +{ + struct bintime now; + struct g_disk_softc *sc; + + bp->bio_completed = bp->bio_length - bp->bio_resid; + bp->bio_done = (void *)bp->bio_to; + bp->bio_to = LIST_FIRST(&bp->bio_disk->d_geom->provider); + if ((bp->bio_cmd & (BIO_READ|BIO_WRITE|BIO_DELETE)) != 0) { + binuptime(&now); + sc = bp->bio_to->private; + mtx_lock(&sc->done_mtx); + devstat_end_transaction_bio_bt(sc->dp->d_devstat, bp, &now); + mtx_unlock(&sc->done_mtx); } - mtx_unlock(&sc->done_mtx); + g_io_deliver(bp, bp->bio_error); } static int @@ -277,7 +300,7 @@ g_disk_start(struct bio *bp) struct disk *dp; struct g_disk_softc *sc; int error; - off_t off; + off_t d_maxsize, off; sc = bp->bio_to->private; if (sc == NULL || (dp = sc->dp) == NULL || dp->d_destroyed) { @@ -294,6 +317,22 @@ g_disk_start(struct bio *bp) /* fall-through */ case BIO_READ: case BIO_WRITE: + d_maxsize = (bp->bio_cmd == BIO_DELETE) ? + dp->d_delmaxsize : dp->d_maxsize; + if (bp->bio_length <= d_maxsize) { + bp->bio_disk = dp; + bp->bio_to = (void *)bp->bio_done; + bp->bio_done = g_disk_done_single; + bp->bio_pblkno = bp->bio_offset / dp->d_sectorsize; + bp->bio_bcount = bp->bio_length; + mtx_lock(&sc->start_mtx); + devstat_start_transaction_bio(dp->d_devstat, bp); + mtx_unlock(&sc->start_mtx); + g_disk_lock_giant(dp); + dp->d_strategy(bp); + g_disk_unlock_giant(dp); + break; + } off = 0; bp3 = NULL; bp2 = g_clone_bio(bp); @@ -302,10 +341,6 @@ g_disk_start(struct bio *bp) break; } do { - off_t d_maxsize; - - d_maxsize = (bp->bio_cmd == BIO_DELETE) ? - dp->d_delmaxsize : dp->d_maxsize; bp2->bio_offset += off; bp2->bio_length -= off; if ((bp->bio_flags & BIO_UNMAPPED) == 0) { @@ -346,7 +381,9 @@ g_disk_start(struct bio *bp) bp2->bio_pblkno = bp2->bio_offset / dp->d_sectorsize; bp2->bio_bcount = bp2->bio_length; bp2->bio_disk = dp; + mtx_lock(&sc->start_mtx); devstat_start_transaction_bio(dp->d_devstat, bp2); + mtx_unlock(&sc->start_mtx); g_disk_lock_giant(dp); dp->d_strategy(bp2); g_disk_unlock_giant(dp); @@ -402,15 +439,11 @@ g_disk_start(struct bio *bp) error = EOPNOTSUPP; break; } - bp2 = g_clone_bio(bp); - if (bp2 == NULL) { - g_io_deliver(bp, ENOMEM); - return; - } - bp2->bio_done = g_disk_done; - bp2->bio_disk = dp; + bp->bio_disk = dp; + bp->bio_to = (void *)bp->bio_done; + bp->bio_done = g_disk_done_single; g_disk_lock_giant(dp); - dp->d_strategy(bp2); + dp->d_strategy(bp); g_disk_unlock_giant(dp); break; default: @@ -515,17 +548,24 @@ g_disk_create(void *arg, int flag) g_topology_assert(); dp = arg; sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO); + mtx_init(&sc->start_mtx, "g_disk_start", NULL, MTX_DEF); mtx_init(&sc->done_mtx, "g_disk_done", NULL, MTX_DEF); sc->dp = dp; gp = g_new_geomf(&g_disk_class, "%s%d", dp->d_name, dp->d_unit); gp->softc = sc; pp = g_new_providerf(gp, "%s", gp->name); + devstat_remove_entry(pp->stat); + pp->stat = NULL; + dp->d_devstat->id = pp; pp->mediasize = dp->d_mediasize; pp->sectorsize = dp->d_sectorsize; pp->stripeoffset = dp->d_stripeoffset; pp->stripesize = dp->d_stripesize; if ((dp->d_flags & DISKFLAG_UNMAPPED_BIO) != 0) pp->flags |= G_PF_ACCEPT_UNMAPPED; + if ((dp->d_flags & DISKFLAG_DIRECT_COMPLETION) != 0) + pp->flags |= G_PF_DIRECT_SEND; + pp->flags |= G_PF_DIRECT_RECEIVE; if (bootverbose) printf("GEOM: new disk %s\n", gp->name); sysctl_ctx_init(&sc->sysctl_ctx); @@ -574,6 +614,7 @@ g_disk_providergone(struct g_provider *pp) pp->private = NULL; pp->geom->softc = NULL; mtx_destroy(&sc->done_mtx); + mtx_destroy(&sc->start_mtx); g_free(sc); } diff --git a/sys/geom/geom_disk.h b/sys/geom/geom_disk.h index 852047b..b092146 100644 --- a/sys/geom/geom_disk.h +++ b/sys/geom/geom_disk.h @@ -107,6 +107,7 @@ struct disk { #define DISKFLAG_CANDELETE 0x4 #define DISKFLAG_CANFLUSHCACHE 0x8 #define DISKFLAG_UNMAPPED_BIO 0x10 +#define DISKFLAG_DIRECT_COMPLETION 0x20 struct disk *disk_alloc(void); void disk_create(struct disk *disk, int version); diff --git a/sys/geom/geom_int.h b/sys/geom/geom_int.h index 149a283..22f42e2 100644 --- a/sys/geom/geom_int.h +++ b/sys/geom/geom_int.h @@ -39,6 +39,9 @@ LIST_HEAD(class_list_head, g_class); TAILQ_HEAD(g_tailq_head, g_geom); extern int g_collectstats; +#define G_STATS_PROVIDERS 1 /* Collect I/O stats for providers */ +#define G_STATS_CONSUMERS 2 /* Collect I/O stats for consumers */ + extern int g_debugflags; /* * 1 G_T_TOPOLOGY diff --git a/sys/geom/geom_io.c b/sys/geom/geom_io.c index 0e79920..0b8e118 100644 --- a/sys/geom/geom_io.c +++ b/sys/geom/geom_io.c @@ -65,6 +65,8 @@ __FBSDID("$FreeBSD$"); #include <vm/vm_extern.h> #include <vm/vm_map.h> +static int g_io_transient_map_bio(struct bio *bp); + static struct g_bioq g_bio_run_down; static struct g_bioq g_bio_run_up; static struct g_bioq g_bio_run_task; @@ -310,6 +312,8 @@ g_io_check(struct bio *bp) { struct g_consumer *cp; struct g_provider *pp; + off_t excess; + int error; cp = bp->bio_from; pp = bp->bio_to; @@ -354,11 +358,44 @@ g_io_check(struct bio *bp) return (EIO); if (bp->bio_offset > pp->mediasize) return (EIO); + + /* Truncate requests to the end of providers media. */ + excess = bp->bio_offset + bp->bio_length; + if (excess > bp->bio_to->mediasize) { + KASSERT((bp->bio_flags & BIO_UNMAPPED) == 0 || + round_page(bp->bio_ma_offset + + bp->bio_length) / PAGE_SIZE == bp->bio_ma_n, + ("excess bio %p too short", bp)); + excess -= bp->bio_to->mediasize; + bp->bio_length -= excess; + if ((bp->bio_flags & BIO_UNMAPPED) != 0) { + bp->bio_ma_n = round_page(bp->bio_ma_offset + + bp->bio_length) / PAGE_SIZE; + } + if (excess > 0) + CTR3(KTR_GEOM, "g_down truncated bio " + "%p provider %s by %d", bp, + bp->bio_to->name, excess); + } + + /* Deliver zero length transfers right here. */ + if (bp->bio_length == 0) { + CTR2(KTR_GEOM, "g_down terminated 0-length " + "bp %p provider %s", bp, bp->bio_to->name); + return (0); + } + + if ((bp->bio_flags & BIO_UNMAPPED) != 0 && + (bp->bio_to->flags & G_PF_ACCEPT_UNMAPPED) == 0 && + (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) { + if ((error = g_io_transient_map_bio(bp)) >= 0) + return (error); + } break; default: break; } - return (0); + return (EJUSTRETURN); } /* @@ -422,7 +459,8 @@ void g_io_request(struct bio *bp, struct g_consumer *cp) { struct g_provider *pp; - int first; + struct mtx *mtxp; + int direct, error, first; KASSERT(cp != NULL, ("NULL cp in g_io_request")); KASSERT(bp != NULL, ("NULL bp in g_io_request")); @@ -472,48 +510,81 @@ g_io_request(struct bio *bp, struct g_consumer *cp) KASSERT(!(bp->bio_flags & BIO_ONQUEUE), ("Bio already on queue bp=%p", bp)); - bp->bio_flags |= BIO_ONQUEUE; - - if (g_collectstats) + if ((g_collectstats & G_STATS_CONSUMERS) != 0 || + ((g_collectstats & G_STATS_PROVIDERS) != 0 && pp->stat != NULL)) binuptime(&bp->bio_t0); else getbinuptime(&bp->bio_t0); +#ifdef GET_STACK_USAGE + direct = (cp->flags & G_CF_DIRECT_SEND) && + (pp->flags & G_PF_DIRECT_RECEIVE) && + !g_is_geom_thread(curthread) && + (((pp->flags & G_PF_ACCEPT_UNMAPPED) == 0 && + (bp->bio_flags & BIO_UNMAPPED) != 0) || THREAD_CAN_SLEEP()); + if (direct) { + /* Block direct execution if less then half of stack left. */ + size_t st, su; + GET_STACK_USAGE(st, su); + if (su * 2 > st) + direct = 0; + } +#else + direct = 0; +#endif + + if (!TAILQ_EMPTY(&g_classifier_tailq) && !bp->bio_classifier1) { + g_bioq_lock(&g_bio_run_down); + g_run_classifiers(bp); + g_bioq_unlock(&g_bio_run_down); + } + /* * The statistics collection is lockless, as such, but we * can not update one instance of the statistics from more * than one thread at a time, so grab the lock first. - * - * We also use the lock to protect the list of classifiers. */ - g_bioq_lock(&g_bio_run_down); - - if (!TAILQ_EMPTY(&g_classifier_tailq) && !bp->bio_classifier1) - g_run_classifiers(bp); - - if (g_collectstats & 1) + mtxp = mtx_pool_find(mtxpool_sleep, pp); + mtx_lock(mtxp); + if (g_collectstats & G_STATS_PROVIDERS) devstat_start_transaction(pp->stat, &bp->bio_t0); - if (g_collectstats & 2) + if (g_collectstats & G_STATS_CONSUMERS) devstat_start_transaction(cp->stat, &bp->bio_t0); - pp->nstart++; cp->nstart++; - first = TAILQ_EMPTY(&g_bio_run_down.bio_queue); - TAILQ_INSERT_TAIL(&g_bio_run_down.bio_queue, bp, bio_queue); - g_bio_run_down.bio_queue_length++; - g_bioq_unlock(&g_bio_run_down); + mtx_unlock(mtxp); - /* Pass it on down. */ - if (first) - wakeup(&g_wait_down); + if (direct) { + error = g_io_check(bp); + if (error >= 0) { + CTR3(KTR_GEOM, "g_io_request g_io_check on bp %p " + "provider %s returned %d", bp, bp->bio_to->name, + error); + g_io_deliver(bp, error); + return; + } + bp->bio_to->geom->start(bp); + } else { + g_bioq_lock(&g_bio_run_down); + first = TAILQ_EMPTY(&g_bio_run_down.bio_queue); + TAILQ_INSERT_TAIL(&g_bio_run_down.bio_queue, bp, bio_queue); + bp->bio_flags |= BIO_ONQUEUE; + g_bio_run_down.bio_queue_length++; + g_bioq_unlock(&g_bio_run_down); + /* Pass it on down. */ + if (first) + wakeup(&g_wait_down); + } } void g_io_deliver(struct bio *bp, int error) { + struct bintime now; struct g_consumer *cp; struct g_provider *pp; - int first; + struct mtx *mtxp; + int direct, first; KASSERT(bp != NULL, ("NULL bp in g_io_deliver")); pp = bp->bio_to; @@ -559,31 +630,55 @@ g_io_deliver(struct bio *bp, int error) bp->bio_bcount = bp->bio_length; bp->bio_resid = bp->bio_bcount - bp->bio_completed; +#ifdef GET_STACK_USAGE + direct = (pp->flags & G_PF_DIRECT_SEND) && + (cp->flags & G_CF_DIRECT_RECEIVE) && + !g_is_geom_thread(curthread); + if (direct) { + /* Block direct execution if less then half of stack left. */ + size_t st, su; + GET_STACK_USAGE(st, su); + if (su * 2 > st) + direct = 0; + } +#else + direct = 0; +#endif + /* * The statistics collection is lockless, as such, but we * can not update one instance of the statistics from more * than one thread at a time, so grab the lock first. */ - g_bioq_lock(&g_bio_run_up); - if (g_collectstats & 1) - devstat_end_transaction_bio(pp->stat, bp); - if (g_collectstats & 2) - devstat_end_transaction_bio(cp->stat, bp); - + if ((g_collectstats & G_STATS_CONSUMERS) != 0 || + ((g_collectstats & G_STATS_PROVIDERS) != 0 && pp->stat != NULL)) + binuptime(&now); + mtxp = mtx_pool_find(mtxpool_sleep, cp); + mtx_lock(mtxp); + if (g_collectstats & G_STATS_PROVIDERS) + devstat_end_transaction_bio_bt(pp->stat, bp, &now); + if (g_collectstats & G_STATS_CONSUMERS) + devstat_end_transaction_bio_bt(cp->stat, bp, &now); cp->nend++; pp->nend++; + mtx_unlock(mtxp); + if (error != ENOMEM) { bp->bio_error = error; - first = TAILQ_EMPTY(&g_bio_run_up.bio_queue); - TAILQ_INSERT_TAIL(&g_bio_run_up.bio_queue, bp, bio_queue); - bp->bio_flags |= BIO_ONQUEUE; - g_bio_run_up.bio_queue_length++; - g_bioq_unlock(&g_bio_run_up); - if (first) - wakeup(&g_wait_up); + if (direct) { + biodone(bp); + } else { + g_bioq_lock(&g_bio_run_up); + first = TAILQ_EMPTY(&g_bio_run_up.bio_queue); + TAILQ_INSERT_TAIL(&g_bio_run_up.bio_queue, bp, bio_queue); + bp->bio_flags |= BIO_ONQUEUE; + g_bio_run_up.bio_queue_length++; + g_bioq_unlock(&g_bio_run_up); + if (first) + wakeup(&g_wait_up); + } return; } - g_bioq_unlock(&g_bio_run_up); if (bootverbose) printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name); @@ -639,11 +734,10 @@ retry: if (vmem_alloc(transient_arena, size, M_BESTFIT | M_NOWAIT, &addr)) { if (transient_map_retries != 0 && retried >= transient_map_retries) { - g_io_deliver(bp, EDEADLK/* XXXKIB */); CTR2(KTR_GEOM, "g_down cannot map bp %p provider %s", bp, bp->bio_to->name); atomic_add_int(&transient_map_hard_failures, 1); - return (1); + return (EDEADLK/* XXXKIB */); } else { /* * Naive attempt to quisce the I/O to get more @@ -663,14 +757,13 @@ retry: bp->bio_data = (caddr_t)addr + bp->bio_ma_offset; bp->bio_flags |= BIO_TRANSIENT_MAPPING; bp->bio_flags &= ~BIO_UNMAPPED; - return (0); + return (EJUSTRETURN); } void g_io_schedule_down(struct thread *tp __unused) { struct bio *bp; - off_t excess; int error; for(;;) { @@ -689,59 +782,15 @@ g_io_schedule_down(struct thread *tp __unused) pause("g_down", hz/10); pace--; } + CTR2(KTR_GEOM, "g_down processing bp %p provider %s", bp, + bp->bio_to->name); error = g_io_check(bp); - if (error) { + if (error >= 0) { CTR3(KTR_GEOM, "g_down g_io_check on bp %p provider " "%s returned %d", bp, bp->bio_to->name, error); g_io_deliver(bp, error); continue; } - CTR2(KTR_GEOM, "g_down processing bp %p provider %s", bp, - bp->bio_to->name); - switch (bp->bio_cmd) { - case BIO_READ: - case BIO_WRITE: - case BIO_DELETE: - /* Truncate requests to the end of providers media. */ - /* - * XXX: What if we truncate because of offset being - * bad, not length? - */ - excess = bp->bio_offset + bp->bio_length; - if (excess > bp->bio_to->mediasize) { - KASSERT((bp->bio_flags & BIO_UNMAPPED) == 0 || - round_page(bp->bio_ma_offset + - bp->bio_length) / PAGE_SIZE == bp->bio_ma_n, - ("excess bio %p too short", bp)); - excess -= bp->bio_to->mediasize; - bp->bio_length -= excess; - if ((bp->bio_flags & BIO_UNMAPPED) != 0) { - bp->bio_ma_n = round_page( - bp->bio_ma_offset + - bp->bio_length) / PAGE_SIZE; - } - if (excess > 0) - CTR3(KTR_GEOM, "g_down truncated bio " - "%p provider %s by %d", bp, - bp->bio_to->name, excess); - } - /* Deliver zero length transfers right here. */ - if (bp->bio_length == 0) { - g_io_deliver(bp, 0); - CTR2(KTR_GEOM, "g_down terminated 0-length " - "bp %p provider %s", bp, bp->bio_to->name); - continue; - } - break; - default: - break; - } - if ((bp->bio_flags & BIO_UNMAPPED) != 0 && - (bp->bio_to->flags & G_PF_ACCEPT_UNMAPPED) == 0 && - (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) { - if (g_io_transient_map_bio(bp)) - continue; - } THREAD_NO_SLEEPING(); CTR4(KTR_GEOM, "g_down starting bp %p provider %s off %ld " "len %ld", bp, bp->bio_to->name, bp->bio_offset, diff --git a/sys/geom/geom_kern.c b/sys/geom/geom_kern.c index 2e65bfb..79afb14 100644 --- a/sys/geom/geom_kern.c +++ b/sys/geom/geom_kern.c @@ -124,6 +124,13 @@ g_event_procbody(void *arg) /* NOTREACHED */ } +int +g_is_geom_thread(struct thread *td) +{ + + return (td == g_up_td || td == g_down_td || td == g_event_td); +} + static void geom_shutdown(void *foo __unused) { diff --git a/sys/geom/geom_slice.c b/sys/geom/geom_slice.c index 976d840..f40a4de 100644 --- a/sys/geom/geom_slice.c +++ b/sys/geom/geom_slice.c @@ -396,8 +396,10 @@ g_slice_config(struct g_geom *gp, u_int idx, int how, off_t offset, off_t length pp->stripeoffset = pp2->stripeoffset + offset; if (pp->stripesize > 0) pp->stripeoffset %= pp->stripesize; - if (gsp->nhotspot == 0) + if (gsp->nhotspot == 0) { pp->flags |= pp2->flags & G_PF_ACCEPT_UNMAPPED; + pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; + } if (0 && bootverbose) printf("GEOM: Configure %s, start %jd length %jd end %jd\n", pp->name, (intmax_t)offset, (intmax_t)length, @@ -430,16 +432,20 @@ g_slice_conf_hot(struct g_geom *gp, u_int idx, off_t offset, off_t length, int r { struct g_slicer *gsp; struct g_slice_hot *gsl, *gsl2; + struct g_consumer *cp; struct g_provider *pp; g_trace(G_T_TOPOLOGY, "g_slice_conf_hot(%s, idx: %d, off: %jd, len: %jd)", gp->name, idx, (intmax_t)offset, (intmax_t)length); g_topology_assert(); gsp = gp->softc; - /* Deny unmapped I/O if hotspots are used. */ + /* Deny unmapped I/O and direct dispatch if hotspots are used. */ if (gsp->nhotspot == 0) { LIST_FOREACH(pp, &gp->provider, provider) - pp->flags &= ~G_PF_ACCEPT_UNMAPPED; + pp->flags &= ~(G_PF_ACCEPT_UNMAPPED | + G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE); + LIST_FOREACH(cp, &gp->consumer, consumer) + cp->flags &= ~(G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE); } gsl = gsp->hotspot; if(idx >= gsp->nhotspot) { @@ -511,6 +517,7 @@ g_slice_new(struct g_class *mp, u_int slices, struct g_provider *pp, struct g_co if (gp->class->destroy_geom == NULL) gp->class->destroy_geom = g_slice_destroy_geom; cp = g_new_consumer(gp); + cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, pp); if (error == 0) error = g_access(cp, 1, 0, 0); diff --git a/sys/geom/geom_vfs.c b/sys/geom/geom_vfs.c index 92f1ad2..eda4b75 100644 --- a/sys/geom/geom_vfs.c +++ b/sys/geom/geom_vfs.c @@ -102,14 +102,10 @@ g_vfs_done(struct bio *bip) /* * Collect statistics on synchronous and asynchronous read * and write counts for disks that have associated filesystems. - * Since this run by the g_up thread it is single threaded and - * we do not need to use atomic increments on the counters. */ bp = bip->bio_caller2; vp = bp->b_vp; - if (vp == NULL) { - mp = NULL; - } else { + if (vp != NULL) { /* * If not a disk vnode, use its associated mount point * otherwise use the mountpoint associated with the disk. @@ -122,20 +118,20 @@ g_vfs_done(struct bio *bip) mp = vp->v_mount; else mp = cdevp->si_mountpt; - VI_UNLOCK(vp); - } - if (mp != NULL) { - if (bp->b_iocmd == BIO_WRITE) { - if (LK_HOLDER(bp->b_lock.lk_lock) == LK_KERNPROC) - mp->mnt_stat.f_asyncwrites++; - else - mp->mnt_stat.f_syncwrites++; - } else { - if (LK_HOLDER(bp->b_lock.lk_lock) == LK_KERNPROC) - mp->mnt_stat.f_asyncreads++; - else - mp->mnt_stat.f_syncreads++; + if (mp != NULL) { + if (bp->b_iocmd == BIO_READ) { + if (LK_HOLDER(bp->b_lock.lk_lock) == LK_KERNPROC) + mp->mnt_stat.f_asyncreads++; + else + mp->mnt_stat.f_syncreads++; + } else if (bp->b_iocmd == BIO_WRITE) { + if (LK_HOLDER(bp->b_lock.lk_lock) == LK_KERNPROC) + mp->mnt_stat.f_asyncwrites++; + else + mp->mnt_stat.f_syncwrites++; + } } + VI_UNLOCK(vp); } cp = bip->bio_from; @@ -260,6 +256,7 @@ g_vfs_open(struct vnode *vp, struct g_consumer **cpp, const char *fsname, int wr vnode_create_vobject(vp, pp->mediasize, curthread); *cpp = cp; cp->private = vp; + cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; bo->bo_ops = g_vfs_bufops; bo->bo_private = cp; bo->bo_bsize = pp->sectorsize; diff --git a/sys/geom/mirror/g_mirror.c b/sys/geom/mirror/g_mirror.c index 04233fe..b4be912 100644 --- a/sys/geom/mirror/g_mirror.c +++ b/sys/geom/mirror/g_mirror.c @@ -394,6 +394,7 @@ g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp) g_topology_lock(); cp = g_new_consumer(disk->d_softc->sc_geom); + cp->flags |= G_CF_DIRECT_RECEIVE; error = g_attach(cp, pp); if (error != 0) { g_destroy_consumer(cp); @@ -554,6 +555,7 @@ g_mirror_destroy_device(struct g_mirror_softc *sc) g_topology_unlock(); mtx_destroy(&sc->sc_queue_mtx); mtx_destroy(&sc->sc_events_mtx); + mtx_destroy(&sc->sc_done_mtx); sx_xunlock(&sc->sc_lock); sx_destroy(&sc->sc_lock); } @@ -852,6 +854,27 @@ g_mirror_unidle(struct g_mirror_softc *sc) } static void +g_mirror_flush_done(struct bio *bp) +{ + struct g_mirror_softc *sc; + struct bio *pbp; + + pbp = bp->bio_parent; + sc = pbp->bio_to->geom->softc; + mtx_lock(&sc->sc_done_mtx); + if (pbp->bio_error == 0) + pbp->bio_error = bp->bio_error; + pbp->bio_completed += bp->bio_completed; + pbp->bio_inbed++; + if (pbp->bio_children == pbp->bio_inbed) { + mtx_unlock(&sc->sc_done_mtx); + g_io_deliver(pbp, pbp->bio_error); + } else + mtx_unlock(&sc->sc_done_mtx); + g_destroy_bio(bp); +} + +static void g_mirror_done(struct bio *bp) { struct g_mirror_softc *sc; @@ -1037,23 +1060,19 @@ g_mirror_flush(struct g_mirror_softc *sc, struct bio *bp) continue; cbp = g_clone_bio(bp); if (cbp == NULL) { - for (cbp = bioq_first(&queue); cbp != NULL; - cbp = bioq_first(&queue)) { - bioq_remove(&queue, cbp); + while ((cbp = bioq_takefirst(&queue)) != NULL) g_destroy_bio(cbp); - } if (bp->bio_error == 0) bp->bio_error = ENOMEM; g_io_deliver(bp, bp->bio_error); return; } bioq_insert_tail(&queue, cbp); - cbp->bio_done = g_std_done; + cbp->bio_done = g_mirror_flush_done; cbp->bio_caller1 = disk; cbp->bio_to = disk->d_consumer->provider; } - for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) { - bioq_remove(&queue, cbp); + while ((cbp = bioq_takefirst(&queue)) != NULL) { G_MIRROR_LOGREQ(3, cbp, "Sending request."); disk = cbp->bio_caller1; cbp->bio_caller1 = NULL; @@ -1538,11 +1557,8 @@ g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp) continue; cbp = g_clone_bio(bp); if (cbp == NULL) { - for (cbp = bioq_first(&queue); cbp != NULL; - cbp = bioq_first(&queue)) { + while ((cbp = bioq_takefirst(&queue)) != NULL) bioq_remove(&queue, cbp); - g_destroy_bio(cbp); - } if (bp->bio_error == 0) bp->bio_error = ENOMEM; g_io_deliver(bp, bp->bio_error); @@ -1561,8 +1577,7 @@ g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp) offset += cbp->bio_length; data += cbp->bio_length; } - for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) { - bioq_remove(&queue, cbp); + while ((cbp = bioq_takefirst(&queue)) != NULL) { G_MIRROR_LOGREQ(3, cbp, "Sending request."); disk = cbp->bio_caller1; cbp->bio_caller1 = NULL; @@ -1643,11 +1658,8 @@ g_mirror_register_request(struct bio *bp) continue; cbp = g_clone_bio(bp); if (cbp == NULL) { - for (cbp = bioq_first(&queue); cbp != NULL; - cbp = bioq_first(&queue)) { - bioq_remove(&queue, cbp); + while ((cbp = bioq_takefirst(&queue)) != NULL) g_destroy_bio(cbp); - } if (bp->bio_error == 0) bp->bio_error = ENOMEM; g_io_deliver(bp, bp->bio_error); @@ -1662,9 +1674,7 @@ g_mirror_register_request(struct bio *bp) ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr, cp->acw, cp->ace)); } - for (cbp = bioq_first(&queue); cbp != NULL; - cbp = bioq_first(&queue)) { - bioq_remove(&queue, cbp); + while ((cbp = bioq_takefirst(&queue)) != NULL) { G_MIRROR_LOGREQ(3, cbp, "Sending request."); cp = cbp->bio_caller1; cbp->bio_caller1 = NULL; @@ -1920,6 +1930,7 @@ g_mirror_sync_start(struct g_mirror_disk *disk) sx_xunlock(&sc->sc_lock); g_topology_lock(); cp = g_new_consumer(sc->sc_sync.ds_geom); + cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, sc->sc_provider); KASSERT(error == 0, ("Cannot attach to %s (error=%d).", sc->sc_name, error)); @@ -2034,6 +2045,7 @@ g_mirror_launch_provider(struct g_mirror_softc *sc) g_topology_lock(); pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name); + pp->flags |= G_PF_DIRECT_RECEIVE; pp->mediasize = sc->sc_mediasize; pp->sectorsize = sc->sc_sectorsize; pp->stripesize = 0; @@ -2082,10 +2094,8 @@ g_mirror_destroy_provider(struct g_mirror_softc *sc) g_topology_lock(); g_error_provider(sc->sc_provider, ENXIO); mtx_lock(&sc->sc_queue_mtx); - while ((bp = bioq_first(&sc->sc_queue)) != NULL) { - bioq_remove(&sc->sc_queue, bp); + while ((bp = bioq_takefirst(&sc->sc_queue)) != NULL) g_io_deliver(bp, ENXIO); - } mtx_unlock(&sc->sc_queue_mtx); G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name, sc->sc_provider->name); @@ -2896,6 +2906,7 @@ g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md) TAILQ_INIT(&sc->sc_events); mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF); callout_init(&sc->sc_callout, CALLOUT_MPSAFE); + mtx_init(&sc->sc_done_mtx, "gmirror:done", NULL, MTX_DEF); sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING; gp->softc = sc; sc->sc_geom = gp; @@ -2914,6 +2925,7 @@ g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md) G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.", sc->sc_name); g_destroy_geom(sc->sc_sync.ds_geom); + mtx_destroy(&sc->sc_done_mtx); mtx_destroy(&sc->sc_events_mtx); mtx_destroy(&sc->sc_queue_mtx); sx_destroy(&sc->sc_lock); diff --git a/sys/geom/mirror/g_mirror.h b/sys/geom/mirror/g_mirror.h index 44ea18a..96270c8 100644 --- a/sys/geom/mirror/g_mirror.h +++ b/sys/geom/mirror/g_mirror.h @@ -212,6 +212,8 @@ struct g_mirror_softc { struct callout sc_callout; struct root_hold_token *sc_rootmount; + + struct mtx sc_done_mtx; }; #define sc_name sc_geom->name diff --git a/sys/geom/multipath/g_multipath.c b/sys/geom/multipath/g_multipath.c index 72cd2c5..6bc1d6e 100644 --- a/sys/geom/multipath/g_multipath.c +++ b/sys/geom/multipath/g_multipath.c @@ -442,6 +442,7 @@ g_multipath_create(struct g_class *mp, struct g_multipath_metadata *md) gp->dumpconf = g_multipath_dumpconf; pp = g_new_providerf(gp, "multipath/%s", md->md_name); + pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; if (md->md_size != 0) { pp->mediasize = md->md_size - ((md->md_uuid[0] != 0) ? md->md_sectorsize : 0); @@ -479,6 +480,7 @@ g_multipath_add_disk(struct g_geom *gp, struct g_provider *pp) } nxtcp = LIST_FIRST(&gp->consumer); cp = g_new_consumer(gp); + cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; cp->private = NULL; cp->index = MP_NEW; error = g_attach(cp, pp); diff --git a/sys/geom/nop/g_nop.c b/sys/geom/nop/g_nop.c index a1a1ad1..bd72d78 100644 --- a/sys/geom/nop/g_nop.c +++ b/sys/geom/nop/g_nop.c @@ -107,6 +107,7 @@ g_nop_start(struct bio *bp) gp = bp->bio_to->geom; sc = gp->softc; G_NOP_LOGREQ(bp, "Request received."); + mtx_lock(&sc->sc_lock); switch (bp->bio_cmd) { case BIO_READ: sc->sc_reads++; @@ -119,6 +120,7 @@ g_nop_start(struct bio *bp) failprob = sc->sc_wfailprob; break; } + mtx_unlock(&sc->sc_lock); if (failprob > 0) { u_int rval; @@ -224,6 +226,7 @@ g_nop_create(struct gctl_req *req, struct g_class *mp, struct g_provider *pp, sc->sc_writes = 0; sc->sc_readbytes = 0; sc->sc_wrotebytes = 0; + mtx_init(&sc->sc_lock, "gnop lock", NULL, MTX_DEF); gp->softc = sc; gp->start = g_nop_start; gp->orphan = g_nop_orphan; @@ -232,10 +235,12 @@ g_nop_create(struct gctl_req *req, struct g_class *mp, struct g_provider *pp, gp->dumpconf = g_nop_dumpconf; newpp = g_new_providerf(gp, "%s", gp->name); + newpp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; newpp->mediasize = size; newpp->sectorsize = secsize; cp = g_new_consumer(gp); + cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, pp); if (error != 0) { gctl_error(req, "Cannot attach to provider %s.", pp->name); @@ -251,6 +256,7 @@ fail: g_detach(cp); g_destroy_consumer(cp); g_destroy_provider(newpp); + mtx_destroy(&sc->sc_lock); g_free(gp->softc); g_destroy_geom(gp); return (error); @@ -259,10 +265,12 @@ fail: static int g_nop_destroy(struct g_geom *gp, boolean_t force) { + struct g_nop_softc *sc; struct g_provider *pp; g_topology_assert(); - if (gp->softc == NULL) + sc = gp->softc; + if (sc == NULL) return (ENXIO); pp = LIST_FIRST(&gp->provider); if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { @@ -277,8 +285,9 @@ g_nop_destroy(struct g_geom *gp, boolean_t force) } else { G_NOP_DEBUG(0, "Device %s removed.", gp->name); } - g_free(gp->softc); gp->softc = NULL; + mtx_destroy(&sc->sc_lock); + g_free(sc); g_wither_geom(gp, ENXIO); return (0); diff --git a/sys/geom/nop/g_nop.h b/sys/geom/nop/g_nop.h index da555ec..3e37c05 100644 --- a/sys/geom/nop/g_nop.h +++ b/sys/geom/nop/g_nop.h @@ -65,6 +65,7 @@ struct g_nop_softc { uintmax_t sc_writes; uintmax_t sc_readbytes; uintmax_t sc_wrotebytes; + struct mtx sc_lock; }; #endif /* _KERNEL */ diff --git a/sys/geom/part/g_part.c b/sys/geom/part/g_part.c index db46dd3..15536e1 100644 --- a/sys/geom/part/g_part.c +++ b/sys/geom/part/g_part.c @@ -416,6 +416,7 @@ g_part_new_provider(struct g_geom *gp, struct g_part_table *table, sbuf_finish(sb); entry->gpe_pp = g_new_providerf(gp, "%s", sbuf_data(sb)); sbuf_delete(sb); + entry->gpe_pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; entry->gpe_pp->private = entry; /* Close the circle. */ } entry->gpe_pp->index = entry->gpe_index - 1; /* index is 1-based. */ @@ -928,6 +929,7 @@ g_part_ctl_create(struct gctl_req *req, struct g_part_parms *gpp) LIST_INIT(&table->gpt_entry); if (null == NULL) { cp = g_new_consumer(gp); + cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, pp); if (error == 0) error = g_access(cp, 1, 1, 1); @@ -1884,6 +1886,7 @@ g_part_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) */ gp = g_new_geomf(mp, "%s", pp->name); cp = g_new_consumer(gp); + cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, pp); if (error == 0) error = g_access(cp, 1, 0, 0); diff --git a/sys/geom/raid/g_raid.c b/sys/geom/raid/g_raid.c index 41a1f96..a161f8a 100644 --- a/sys/geom/raid/g_raid.c +++ b/sys/geom/raid/g_raid.c @@ -792,6 +792,7 @@ g_raid_open_consumer(struct g_raid_softc *sc, const char *name) if (pp == NULL) return (NULL); cp = g_new_consumer(sc->sc_geom); + cp->flags |= G_CF_DIRECT_RECEIVE; if (g_attach(cp, pp) != 0) { g_destroy_consumer(cp); return (NULL); @@ -993,20 +994,15 @@ g_raid_tr_flush_common(struct g_raid_tr_object *tr, struct bio *bp) cbp->bio_caller1 = sd; bioq_insert_tail(&queue, cbp); } - for (cbp = bioq_first(&queue); cbp != NULL; - cbp = bioq_first(&queue)) { - bioq_remove(&queue, cbp); + while ((cbp = bioq_takefirst(&queue)) != NULL) { sd = cbp->bio_caller1; cbp->bio_caller1 = NULL; g_raid_subdisk_iostart(sd, cbp); } return; failure: - for (cbp = bioq_first(&queue); cbp != NULL; - cbp = bioq_first(&queue)) { - bioq_remove(&queue, cbp); + while ((cbp = bioq_takefirst(&queue)) != NULL) g_destroy_bio(cbp); - } if (bp->bio_error == 0) bp->bio_error = ENOMEM; g_raid_iodone(bp, bp->bio_error); @@ -1639,11 +1635,13 @@ static void g_raid_launch_provider(struct g_raid_volume *vol) { struct g_raid_disk *disk; + struct g_raid_subdisk *sd; struct g_raid_softc *sc; struct g_provider *pp; char name[G_RAID_MAX_VOLUMENAME]; char announce_buf[80], buf1[32]; off_t off; + int i; sc = vol->v_softc; sx_assert(&sc->sc_lock, SX_LOCKED); @@ -1673,6 +1671,18 @@ g_raid_launch_provider(struct g_raid_volume *vol) } pp = g_new_providerf(sc->sc_geom, "%s", name); + pp->flags |= G_PF_DIRECT_RECEIVE; + if (vol->v_tr->tro_class->trc_accept_unmapped) { + pp->flags |= G_PF_ACCEPT_UNMAPPED; + for (i = 0; i < vol->v_disks_count; i++) { + sd = &vol->v_subdisks[i]; + if (sd->sd_state == G_RAID_SUBDISK_S_NONE) + continue; + if ((sd->sd_disk->d_consumer->provider->flags & + G_PF_ACCEPT_UNMAPPED) == 0) + pp->flags &= ~G_PF_ACCEPT_UNMAPPED; + } + } pp->private = vol; pp->mediasize = vol->v_mediasize; pp->sectorsize = vol->v_sectorsize; @@ -2247,6 +2257,7 @@ g_raid_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) */ gp->orphan = g_raid_taste_orphan; cp = g_new_consumer(gp); + cp->flags |= G_CF_DIRECT_RECEIVE; g_attach(cp, pp); geom = NULL; diff --git a/sys/geom/raid/g_raid.h b/sys/geom/raid/g_raid.h index 993b100..8a96fa9 100644 --- a/sys/geom/raid/g_raid.h +++ b/sys/geom/raid/g_raid.h @@ -376,6 +376,7 @@ struct g_raid_tr_class { KOBJ_CLASS_FIELDS; int trc_enable; int trc_priority; + int trc_accept_unmapped; LIST_ENTRY(g_raid_tr_class) trc_list; }; diff --git a/sys/geom/raid/md_ddf.c b/sys/geom/raid/md_ddf.c index 5a17301..4e1545b 100644 --- a/sys/geom/raid/md_ddf.c +++ b/sys/geom/raid/md_ddf.c @@ -2143,6 +2143,7 @@ g_raid_md_taste_ddf(struct g_raid_md_object *md, struct g_class *mp, } rcp = g_new_consumer(geom); + rcp->flags |= G_CF_DIRECT_RECEIVE; g_attach(rcp, pp); if (g_access(rcp, 1, 1, 1) != 0) ; //goto fail1; diff --git a/sys/geom/raid/md_intel.c b/sys/geom/raid/md_intel.c index eeb42d5..11917f5 100644 --- a/sys/geom/raid/md_intel.c +++ b/sys/geom/raid/md_intel.c @@ -1477,6 +1477,7 @@ search: } rcp = g_new_consumer(geom); + rcp->flags |= G_CF_DIRECT_RECEIVE; g_attach(rcp, pp); if (g_access(rcp, 1, 1, 1) != 0) ; //goto fail1; diff --git a/sys/geom/raid/md_jmicron.c b/sys/geom/raid/md_jmicron.c index a062215..2da4a33 100644 --- a/sys/geom/raid/md_jmicron.c +++ b/sys/geom/raid/md_jmicron.c @@ -923,6 +923,7 @@ search: } rcp = g_new_consumer(geom); + rcp->flags |= G_CF_DIRECT_RECEIVE; g_attach(rcp, pp); if (g_access(rcp, 1, 1, 1) != 0) ; //goto fail1; diff --git a/sys/geom/raid/md_nvidia.c b/sys/geom/raid/md_nvidia.c index 92d9f71..25cc2cc 100644 --- a/sys/geom/raid/md_nvidia.c +++ b/sys/geom/raid/md_nvidia.c @@ -919,6 +919,7 @@ search: } rcp = g_new_consumer(geom); + rcp->flags |= G_CF_DIRECT_RECEIVE; g_attach(rcp, pp); if (g_access(rcp, 1, 1, 1) != 0) ; //goto fail1; diff --git a/sys/geom/raid/md_promise.c b/sys/geom/raid/md_promise.c index 0007b20..b1e4427 100644 --- a/sys/geom/raid/md_promise.c +++ b/sys/geom/raid/md_promise.c @@ -1176,6 +1176,7 @@ search: } rcp = g_new_consumer(geom); + rcp->flags |= G_CF_DIRECT_RECEIVE; g_attach(rcp, pp); if (g_access(rcp, 1, 1, 1) != 0) ; //goto fail1; diff --git a/sys/geom/raid/md_sii.c b/sys/geom/raid/md_sii.c index 03bb03b..149b336 100644 --- a/sys/geom/raid/md_sii.c +++ b/sys/geom/raid/md_sii.c @@ -1012,6 +1012,7 @@ search: } rcp = g_new_consumer(geom); + rcp->flags |= G_CF_DIRECT_RECEIVE; g_attach(rcp, pp); if (g_access(rcp, 1, 1, 1) != 0) ; //goto fail1; diff --git a/sys/geom/raid/tr_concat.c b/sys/geom/raid/tr_concat.c index 60db472..44951d4 100644 --- a/sys/geom/raid/tr_concat.c +++ b/sys/geom/raid/tr_concat.c @@ -74,7 +74,8 @@ static struct g_raid_tr_class g_raid_tr_concat_class = { g_raid_tr_concat_methods, sizeof(struct g_raid_tr_concat_object), .trc_enable = 1, - .trc_priority = 50 + .trc_priority = 50, + .trc_accept_unmapped = 1 }; static int @@ -227,7 +228,10 @@ g_raid_tr_iostart_concat(struct g_raid_tr_object *tr, struct bio *bp) offset = bp->bio_offset; remain = bp->bio_length; - addr = bp->bio_data; + if ((bp->bio_flags & BIO_UNMAPPED) != 0) + addr = NULL; + else + addr = bp->bio_data; no = 0; while (no < vol->v_disks_count && offset >= vol->v_subdisks[no].sd_size) { @@ -244,8 +248,16 @@ g_raid_tr_iostart_concat(struct g_raid_tr_object *tr, struct bio *bp) if (cbp == NULL) goto failure; cbp->bio_offset = offset; - cbp->bio_data = addr; cbp->bio_length = length; + if ((bp->bio_flags & BIO_UNMAPPED) != 0 && + bp->bio_cmd != BIO_DELETE) { + cbp->bio_ma_offset += (uintptr_t)addr; + cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE; + cbp->bio_ma_offset %= PAGE_SIZE; + cbp->bio_ma_n = round_page(cbp->bio_ma_offset + + cbp->bio_length) / PAGE_SIZE; + } else + cbp->bio_data = addr; cbp->bio_caller1 = sd; bioq_insert_tail(&queue, cbp); remain -= length; @@ -257,20 +269,15 @@ g_raid_tr_iostart_concat(struct g_raid_tr_object *tr, struct bio *bp) ("Request ends after volume end (%ju, %ju)", bp->bio_offset, bp->bio_length)); } while (remain > 0); - for (cbp = bioq_first(&queue); cbp != NULL; - cbp = bioq_first(&queue)) { - bioq_remove(&queue, cbp); + while ((cbp = bioq_takefirst(&queue)) != NULL) { sd = cbp->bio_caller1; cbp->bio_caller1 = NULL; g_raid_subdisk_iostart(sd, cbp); } return; failure: - for (cbp = bioq_first(&queue); cbp != NULL; - cbp = bioq_first(&queue)) { - bioq_remove(&queue, cbp); + while ((cbp = bioq_takefirst(&queue)) != NULL) g_destroy_bio(cbp); - } if (bp->bio_error == 0) bp->bio_error = ENOMEM; g_raid_iodone(bp, bp->bio_error); diff --git a/sys/geom/raid/tr_raid0.c b/sys/geom/raid/tr_raid0.c index 7873be8..40d35fd 100644 --- a/sys/geom/raid/tr_raid0.c +++ b/sys/geom/raid/tr_raid0.c @@ -74,7 +74,8 @@ static struct g_raid_tr_class g_raid_tr_raid0_class = { g_raid_tr_raid0_methods, sizeof(struct g_raid_tr_raid0_object), .trc_enable = 1, - .trc_priority = 100 + .trc_priority = 100, + .trc_accept_unmapped = 1 }; static int @@ -204,7 +205,10 @@ g_raid_tr_iostart_raid0(struct g_raid_tr_object *tr, struct bio *bp) g_raid_tr_flush_common(tr, bp); return; } - addr = bp->bio_data; + if ((bp->bio_flags & BIO_UNMAPPED) != 0) + addr = NULL; + else + addr = bp->bio_data; strip_size = vol->v_strip_size; /* Stripe number. */ @@ -225,8 +229,16 @@ g_raid_tr_iostart_raid0(struct g_raid_tr_object *tr, struct bio *bp) if (cbp == NULL) goto failure; cbp->bio_offset = offset + start; - cbp->bio_data = addr; cbp->bio_length = length; + if ((bp->bio_flags & BIO_UNMAPPED) != 0 && + bp->bio_cmd != BIO_DELETE) { + cbp->bio_ma_offset += (uintptr_t)addr; + cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE; + cbp->bio_ma_offset %= PAGE_SIZE; + cbp->bio_ma_n = round_page(cbp->bio_ma_offset + + cbp->bio_length) / PAGE_SIZE; + } else + cbp->bio_data = addr; cbp->bio_caller1 = &vol->v_subdisks[no]; bioq_insert_tail(&queue, cbp); if (++no >= vol->v_disks_count) { @@ -238,20 +250,15 @@ g_raid_tr_iostart_raid0(struct g_raid_tr_object *tr, struct bio *bp) addr += length; start = 0; } while (remain > 0); - for (cbp = bioq_first(&queue); cbp != NULL; - cbp = bioq_first(&queue)) { - bioq_remove(&queue, cbp); + while ((cbp = bioq_takefirst(&queue)) != NULL) { sd = cbp->bio_caller1; cbp->bio_caller1 = NULL; g_raid_subdisk_iostart(sd, cbp); } return; failure: - for (cbp = bioq_first(&queue); cbp != NULL; - cbp = bioq_first(&queue)) { - bioq_remove(&queue, cbp); + while ((cbp = bioq_takefirst(&queue)) != NULL) g_destroy_bio(cbp); - } if (bp->bio_error == 0) bp->bio_error = ENOMEM; g_raid_iodone(bp, bp->bio_error); diff --git a/sys/geom/raid/tr_raid1.c b/sys/geom/raid/tr_raid1.c index 4465e32..833655b 100644 --- a/sys/geom/raid/tr_raid1.c +++ b/sys/geom/raid/tr_raid1.c @@ -130,7 +130,8 @@ static struct g_raid_tr_class g_raid_tr_raid1_class = { g_raid_tr_raid1_methods, sizeof(struct g_raid_tr_raid1_object), .trc_enable = 1, - .trc_priority = 100 + .trc_priority = 100, + .trc_accept_unmapped = 1 }; static void g_raid_tr_raid1_rebuild_abort(struct g_raid_tr_object *tr); @@ -594,20 +595,15 @@ g_raid_tr_iostart_raid1_write(struct g_raid_tr_object *tr, struct bio *bp) cbp->bio_caller1 = sd; bioq_insert_tail(&queue, cbp); } - for (cbp = bioq_first(&queue); cbp != NULL; - cbp = bioq_first(&queue)) { - bioq_remove(&queue, cbp); + while ((cbp = bioq_takefirst(&queue)) != NULL) { sd = cbp->bio_caller1; cbp->bio_caller1 = NULL; g_raid_subdisk_iostart(sd, cbp); } return; failure: - for (cbp = bioq_first(&queue); cbp != NULL; - cbp = bioq_first(&queue)) { - bioq_remove(&queue, cbp); + while ((cbp = bioq_takefirst(&queue)) != NULL) g_destroy_bio(cbp); - } if (bp->bio_error == 0) bp->bio_error = ENOMEM; g_raid_iodone(bp, bp->bio_error); diff --git a/sys/geom/raid/tr_raid1e.c b/sys/geom/raid/tr_raid1e.c index 679b766..d283606 100644 --- a/sys/geom/raid/tr_raid1e.c +++ b/sys/geom/raid/tr_raid1e.c @@ -134,7 +134,8 @@ static struct g_raid_tr_class g_raid_tr_raid1e_class = { g_raid_tr_raid1e_methods, sizeof(struct g_raid_tr_raid1e_object), .trc_enable = 1, - .trc_priority = 200 + .trc_priority = 200, + .trc_accept_unmapped = 1 }; static void g_raid_tr_raid1e_rebuild_abort(struct g_raid_tr_object *tr); @@ -701,7 +702,10 @@ g_raid_tr_iostart_raid1e_read(struct g_raid_tr_object *tr, struct bio *bp) int best; vol = tr->tro_volume; - addr = bp->bio_data; + if ((bp->bio_flags & BIO_UNMAPPED) != 0) + addr = NULL; + else + addr = bp->bio_data; strip_size = vol->v_strip_size; V2P(vol, bp->bio_offset, &no, &offset, &start); remain = bp->bio_length; @@ -721,8 +725,15 @@ g_raid_tr_iostart_raid1e_read(struct g_raid_tr_object *tr, struct bio *bp) if (cbp == NULL) goto failure; cbp->bio_offset = offset + start; - cbp->bio_data = addr; cbp->bio_length = length; + if ((bp->bio_flags & BIO_UNMAPPED) != 0) { + cbp->bio_ma_offset += (uintptr_t)addr; + cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE; + cbp->bio_ma_offset %= PAGE_SIZE; + cbp->bio_ma_n = round_page(cbp->bio_ma_offset + + cbp->bio_length) / PAGE_SIZE; + } else + cbp->bio_data = addr; cbp->bio_caller1 = &vol->v_subdisks[no]; bioq_insert_tail(&queue, cbp); no += N - best; @@ -734,20 +745,15 @@ g_raid_tr_iostart_raid1e_read(struct g_raid_tr_object *tr, struct bio *bp) addr += length; start = 0; } - for (cbp = bioq_first(&queue); cbp != NULL; - cbp = bioq_first(&queue)) { - bioq_remove(&queue, cbp); + while ((cbp = bioq_takefirst(&queue)) != NULL) { sd = cbp->bio_caller1; cbp->bio_caller1 = NULL; g_raid_subdisk_iostart(sd, cbp); } return; failure: - for (cbp = bioq_first(&queue); cbp != NULL; - cbp = bioq_first(&queue)) { - bioq_remove(&queue, cbp); + while ((cbp = bioq_takefirst(&queue)) != NULL) g_destroy_bio(cbp); - } if (bp->bio_error == 0) bp->bio_error = ENOMEM; g_raid_iodone(bp, bp->bio_error); @@ -766,7 +772,10 @@ g_raid_tr_iostart_raid1e_write(struct g_raid_tr_object *tr, struct bio *bp) int i; vol = tr->tro_volume; - addr = bp->bio_data; + if ((bp->bio_flags & BIO_UNMAPPED) != 0) + addr = NULL; + else + addr = bp->bio_data; strip_size = vol->v_strip_size; V2P(vol, bp->bio_offset, &no, &offset, &start); remain = bp->bio_length; @@ -791,8 +800,16 @@ g_raid_tr_iostart_raid1e_write(struct g_raid_tr_object *tr, struct bio *bp) if (cbp == NULL) goto failure; cbp->bio_offset = offset + start; - cbp->bio_data = addr; cbp->bio_length = length; + if ((bp->bio_flags & BIO_UNMAPPED) != 0 && + bp->bio_cmd != BIO_DELETE) { + cbp->bio_ma_offset += (uintptr_t)addr; + cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE; + cbp->bio_ma_offset %= PAGE_SIZE; + cbp->bio_ma_n = round_page(cbp->bio_ma_offset + + cbp->bio_length) / PAGE_SIZE; + } else + cbp->bio_data = addr; cbp->bio_caller1 = sd; bioq_insert_tail(&queue, cbp); nextdisk: @@ -806,20 +823,15 @@ nextdisk: addr += length; start = 0; } - for (cbp = bioq_first(&queue); cbp != NULL; - cbp = bioq_first(&queue)) { - bioq_remove(&queue, cbp); + while ((cbp = bioq_takefirst(&queue)) != NULL) { sd = cbp->bio_caller1; cbp->bio_caller1 = NULL; g_raid_subdisk_iostart(sd, cbp); } return; failure: - for (cbp = bioq_first(&queue); cbp != NULL; - cbp = bioq_first(&queue)) { - bioq_remove(&queue, cbp); + while ((cbp = bioq_takefirst(&queue)) != NULL) g_destroy_bio(cbp); - } if (bp->bio_error == 0) bp->bio_error = ENOMEM; g_raid_iodone(bp, bp->bio_error); @@ -1030,6 +1042,9 @@ rebuild_round_done: cbp->bio_offset = offset + start; cbp->bio_length = bp->bio_length; cbp->bio_data = bp->bio_data; + cbp->bio_ma = bp->bio_ma; + cbp->bio_ma_offset = bp->bio_ma_offset; + cbp->bio_ma_n = bp->bio_ma_n; g_destroy_bio(bp); nsd = &vol->v_subdisks[disk]; G_RAID_LOGREQ(2, cbp, "Retrying read from %d", diff --git a/sys/geom/raid/tr_raid5.c b/sys/geom/raid/tr_raid5.c index 6e54d16..c90845e 100644 --- a/sys/geom/raid/tr_raid5.c +++ b/sys/geom/raid/tr_raid5.c @@ -324,20 +324,15 @@ g_raid_tr_iostart_raid5_read(struct g_raid_tr_object *tr, struct bio *bp) addr += length; start = 0; } while (remain > 0); - for (cbp = bioq_first(&queue); cbp != NULL; - cbp = bioq_first(&queue)) { - bioq_remove(&queue, cbp); + while ((cbp = bioq_takefirst(&queue)) != NULL) { sd = cbp->bio_caller1; cbp->bio_caller1 = NULL; g_raid_subdisk_iostart(sd, cbp); } return; failure: - for (cbp = bioq_first(&queue); cbp != NULL; - cbp = bioq_first(&queue)) { - bioq_remove(&queue, cbp); + while ((cbp = bioq_takefirst(&queue)) != NULL) g_destroy_bio(cbp); - } if (bp->bio_error == 0) bp->bio_error = ENOMEM; g_raid_iodone(bp, bp->bio_error); diff --git a/sys/geom/stripe/g_stripe.c b/sys/geom/stripe/g_stripe.c index 575ec5f..b5d77c5 100644 --- a/sys/geom/stripe/g_stripe.c +++ b/sys/geom/stripe/g_stripe.c @@ -284,22 +284,25 @@ g_stripe_done(struct bio *bp) pbp = bp->bio_parent; sc = pbp->bio_to->geom->softc; - if (pbp->bio_error == 0) - pbp->bio_error = bp->bio_error; - pbp->bio_completed += bp->bio_completed; if (bp->bio_cmd == BIO_READ && bp->bio_caller1 != NULL) { g_stripe_copy(sc, bp->bio_data, bp->bio_caller1, bp->bio_offset, bp->bio_length, 1); bp->bio_data = bp->bio_caller1; bp->bio_caller1 = NULL; } - g_destroy_bio(bp); + mtx_lock(&sc->sc_lock); + if (pbp->bio_error == 0) + pbp->bio_error = bp->bio_error; + pbp->bio_completed += bp->bio_completed; pbp->bio_inbed++; if (pbp->bio_children == pbp->bio_inbed) { + mtx_unlock(&sc->sc_lock); if (pbp->bio_driver1 != NULL) uma_zfree(g_stripe_zone, pbp->bio_driver1); g_io_deliver(pbp, pbp->bio_error); - } + } else + mtx_unlock(&sc->sc_lock); + g_destroy_bio(bp); } static int @@ -442,7 +445,6 @@ g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length) sc = bp->bio_to->geom->softc; - addr = bp->bio_data; stripesize = sc->sc_stripesize; cbp = g_clone_bio(bp); @@ -454,10 +456,18 @@ g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length) /* * Fill in the component buf structure. */ - cbp->bio_done = g_std_done; + if (bp->bio_length == length) + cbp->bio_done = g_std_done; /* Optimized lockless case. */ + else + cbp->bio_done = g_stripe_done; cbp->bio_offset = offset; - cbp->bio_data = addr; cbp->bio_length = length; + if ((bp->bio_flags & BIO_UNMAPPED) != 0) { + bp->bio_ma_n = round_page(bp->bio_ma_offset + + bp->bio_length) / PAGE_SIZE; + addr = NULL; + } else + addr = bp->bio_data; cbp->bio_caller2 = sc->sc_disks[no]; /* offset -= offset % stripesize; */ @@ -479,14 +489,21 @@ g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length) /* * Fill in the component buf structure. */ - cbp->bio_done = g_std_done; + cbp->bio_done = g_stripe_done; cbp->bio_offset = offset; - cbp->bio_data = addr; /* * MIN() is in case when * (bp->bio_length % sc->sc_stripesize) != 0. */ cbp->bio_length = MIN(stripesize, length); + if ((bp->bio_flags & BIO_UNMAPPED) != 0) { + cbp->bio_ma_offset += (uintptr_t)addr; + cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE; + cbp->bio_ma_offset %= PAGE_SIZE; + cbp->bio_ma_n = round_page(cbp->bio_ma_offset + + cbp->bio_length) / PAGE_SIZE; + } else + cbp->bio_data = addr; cbp->bio_caller2 = sc->sc_disks[no]; } @@ -536,15 +553,15 @@ g_stripe_flush(struct g_stripe_softc *sc, struct bio *bp) return; } bioq_insert_tail(&queue, cbp); - cbp->bio_done = g_std_done; - cbp->bio_caller1 = sc->sc_disks[no]; + cbp->bio_done = g_stripe_done; + cbp->bio_caller2 = sc->sc_disks[no]; cbp->bio_to = sc->sc_disks[no]->provider; } for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) { bioq_remove(&queue, cbp); G_STRIPE_LOGREQ(cbp, "Sending request."); - cp = cbp->bio_caller1; - cbp->bio_caller1 = NULL; + cp = cbp->bio_caller2; + cbp->bio_caller2 = NULL; g_io_request(cbp, cp); } } @@ -613,9 +630,12 @@ g_stripe_start(struct bio *bp) * 3. Request size is bigger than stripesize * ndisks. If it isn't, * there will be no need to send more than one I/O request to * a provider, so there is nothing to optmize. + * and + * 4. Request is not unmapped. */ if (g_stripe_fast && bp->bio_length <= MAXPHYS && - bp->bio_length >= stripesize * sc->sc_ndisks) { + bp->bio_length >= stripesize * sc->sc_ndisks && + (bp->bio_flags & BIO_UNMAPPED) == 0) { fast = 1; } error = 0; @@ -642,6 +662,7 @@ g_stripe_start(struct bio *bp) static void g_stripe_check_and_run(struct g_stripe_softc *sc) { + struct g_provider *dp; off_t mediasize, ms; u_int no, sectorsize = 0; @@ -651,6 +672,9 @@ g_stripe_check_and_run(struct g_stripe_softc *sc) sc->sc_provider = g_new_providerf(sc->sc_geom, "stripe/%s", sc->sc_name); + sc->sc_provider->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; + if (g_stripe_fast == 0) + sc->sc_provider->flags |= G_PF_ACCEPT_UNMAPPED; /* * Find the smallest disk. */ @@ -660,14 +684,21 @@ g_stripe_check_and_run(struct g_stripe_softc *sc) mediasize -= mediasize % sc->sc_stripesize; sectorsize = sc->sc_disks[0]->provider->sectorsize; for (no = 1; no < sc->sc_ndisks; no++) { - ms = sc->sc_disks[no]->provider->mediasize; + dp = sc->sc_disks[no]->provider; + ms = dp->mediasize; if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) - ms -= sc->sc_disks[no]->provider->sectorsize; + ms -= dp->sectorsize; ms -= ms % sc->sc_stripesize; if (ms < mediasize) mediasize = ms; - sectorsize = lcm(sectorsize, - sc->sc_disks[no]->provider->sectorsize); + sectorsize = lcm(sectorsize, dp->sectorsize); + + /* A provider underneath us doesn't support unmapped */ + if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) { + G_STRIPE_DEBUG(1, "Cancelling unmapped " + "because of %s.", dp->name); + sc->sc_provider->flags &= ~G_PF_ACCEPT_UNMAPPED; + } } sc->sc_provider->sectorsize = sectorsize; sc->sc_provider->mediasize = mediasize * sc->sc_ndisks; @@ -729,6 +760,7 @@ g_stripe_add_disk(struct g_stripe_softc *sc, struct g_provider *pp, u_int no) fcp = LIST_FIRST(&gp->consumer); cp = g_new_consumer(gp); + cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; cp->private = NULL; cp->index = no; error = g_attach(cp, pp); @@ -830,6 +862,7 @@ g_stripe_create(struct g_class *mp, const struct g_stripe_metadata *md, for (no = 0; no < sc->sc_ndisks; no++) sc->sc_disks[no] = NULL; sc->sc_type = type; + mtx_init(&sc->sc_lock, "gstripe lock", NULL, MTX_DEF); gp->softc = sc; sc->sc_geom = gp; @@ -878,6 +911,7 @@ g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force) KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)", gp->name)); free(sc->sc_disks, M_STRIPE); + mtx_destroy(&sc->sc_lock); free(sc, M_STRIPE); G_STRIPE_DEBUG(0, "Device %s destroyed.", gp->name); g_wither_geom(gp, ENXIO); diff --git a/sys/geom/stripe/g_stripe.h b/sys/geom/stripe/g_stripe.h index 2720c6f..fe4452b 100644 --- a/sys/geom/stripe/g_stripe.h +++ b/sys/geom/stripe/g_stripe.h @@ -76,6 +76,7 @@ struct g_stripe_softc { uint16_t sc_ndisks; uint32_t sc_stripesize; uint32_t sc_stripebits; + struct mtx sc_lock; }; #define sc_name sc_geom->name #endif /* _KERNEL */ diff --git a/sys/geom/zero/g_zero.c b/sys/geom/zero/g_zero.c index 311db54..8cdfd90 100644 --- a/sys/geom/zero/g_zero.c +++ b/sys/geom/zero/g_zero.c @@ -106,6 +106,7 @@ g_zero_init(struct g_class *mp) gp->start = g_zero_start; gp->access = g_std_access; gpp = pp = g_new_providerf(gp, "%s", gp->name); + pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; if (!g_zero_clear) pp->flags |= G_PF_ACCEPT_UNMAPPED; pp->mediasize = 1152921504606846976LLU; diff --git a/sys/kern/subr_devstat.c b/sys/kern/subr_devstat.c index c44ef27..6800ce3 100644 --- a/sys/kern/subr_devstat.c +++ b/sys/kern/subr_devstat.c @@ -131,6 +131,7 @@ devstat_new_entry(const void *dev_name, ds = devstat_alloc(); mtx_lock(&devstat_mutex); if (unit_number == -1) { + ds->unit_number = unit_number; ds->id = dev_name; binuptime(&ds->creation_time); devstat_generation++; @@ -242,7 +243,7 @@ devstat_remove_entry(struct devstat *ds) /* Remove this entry from the devstat queue */ atomic_add_acq_int(&ds->sequence1, 1); - if (ds->id == NULL) { + if (ds->unit_number != -1) { devstat_num_devs--; STAILQ_REMOVE(devstat_head, ds, devstat, dev_links); } @@ -374,6 +375,14 @@ devstat_end_transaction(struct devstat *ds, uint32_t bytes, void devstat_end_transaction_bio(struct devstat *ds, struct bio *bp) { + + devstat_end_transaction_bio_bt(ds, bp, NULL); +} + +void +devstat_end_transaction_bio_bt(struct devstat *ds, struct bio *bp, + struct bintime *now) +{ devstat_trans_flags flg; /* sanity check */ @@ -390,7 +399,7 @@ devstat_end_transaction_bio(struct devstat *ds, struct bio *bp) flg = DEVSTAT_NO_DATA; devstat_end_transaction(ds, bp->bio_bcount - bp->bio_resid, - DEVSTAT_TAG_SIMPLE, flg, NULL, &bp->bio_t0); + DEVSTAT_TAG_SIMPLE, flg, now, &bp->bio_t0); DTRACE_DEVSTAT_BIO_DONE(); } diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 3812813..d69bba9 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -3557,15 +3557,15 @@ biodone(struct bio *bp) struct mtx *mtxp; void (*done)(struct bio *); vm_offset_t start, end; - int transient; if ((bp->bio_flags & BIO_TRANSIENT_MAPPING) != 0) { + bp->bio_flags &= ~BIO_TRANSIENT_MAPPING; + bp->bio_flags |= BIO_UNMAPPED; start = trunc_page((vm_offset_t)bp->bio_data); end = round_page((vm_offset_t)bp->bio_data + bp->bio_length); - transient = 1; - } else { - transient = 0; - start = end = 0; + pmap_qremove(start, OFF_TO_IDX(end - start)); + vmem_free(transient_arena, start, end - start); + atomic_add_int(&inflight_transient_maps, -1); } done = bp->bio_done; if (done == NULL) { @@ -3578,11 +3578,6 @@ biodone(struct bio *bp) bp->bio_flags |= BIO_DONE; done(bp); } - if (transient) { - pmap_qremove(start, OFF_TO_IDX(end - start)); - vmem_free(transient_arena, start, end - start); - atomic_add_int(&inflight_transient_maps, -1); - } } /* diff --git a/sys/sys/devicestat.h b/sys/sys/devicestat.h index 1a017cb..bce0570 100644 --- a/sys/sys/devicestat.h +++ b/sys/sys/devicestat.h @@ -199,6 +199,8 @@ void devstat_end_transaction(struct devstat *ds, u_int32_t bytes, devstat_trans_flags flags, struct bintime *now, struct bintime *then); void devstat_end_transaction_bio(struct devstat *ds, struct bio *bp); +void devstat_end_transaction_bio_bt(struct devstat *ds, struct bio *bp, + struct bintime *now); #endif #endif /* _DEVICESTAT_H */ diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 5443b61..fce1f8a 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -793,6 +793,8 @@ extern pid_t pid_max; #define THREAD_SLEEPING_OK() ((curthread)->td_no_sleeping--) +#define THREAD_CAN_SLEEP() ((curthread)->td_no_sleeping == 0) + #define PIDHASH(pid) (&pidhashtbl[(pid) & pidhash]) extern LIST_HEAD(pidhashhead, proc) *pidhashtbl; extern u_long pidhash; |