summaryrefslogtreecommitdiffstats
path: root/sys/geom/stripe
diff options
context:
space:
mode:
authorscottl <scottl@FreeBSD.org>2014-01-07 01:32:23 +0000
committerscottl <scottl@FreeBSD.org>2014-01-07 01:32:23 +0000
commit0a34594b9cd7c8b87f719ed058da6be2b756a8e5 (patch)
tree9702de6a6a50f2bb1a6829d66c26686ca7a160cc /sys/geom/stripe
parent1bce546983c144fd6d05af45e88abd3186b87b1b (diff)
downloadFreeBSD-src-0a34594b9cd7c8b87f719ed058da6be2b756a8e5.zip
FreeBSD-src-0a34594b9cd7c8b87f719ed058da6be2b756a8e5.tar.gz
MFC Alexander Motin's GEOM direct dispatch work:
r256603: Introduce new function devstat_end_transaction_bio_bt(), adding new argument to specify present time. Use this function to move binuptime() out of lock, substantially reducing lock congestion when slow timecounter is used. r256606: Move g_io_deliver() out of the lock, as required for direct dispatch. Move g_destroy_bio() out too to reduce lock scope even more. r256607: Fix passing uninitialized bio_resid argument to g_trace(). r256610: Add unmapped I/O support to GEOM RAID. r256830: Restore BIO_UNMAPPED and BIO_TRANSIENT_MAPPING in biodonne() when unmapping temporary mapped buffer. That fixes double unmap if biodone() called twice for the same BIO (but with different done methods). r256880: Merge GEOM direct dispatch changes from the projects/camlock branch. When safety requirements are met, it allows to avoid passing I/O requests to GEOM g_up/g_down thread, executing them directly in the caller context. That allows to avoid CPU bottlenecks in g_up/g_down threads, plus avoid several context switches per I/O. r259247: Fix bug introduced at r256607. We have to recalculate bp_resid here since sizes of original and completed requests may differ due to end of media. Testing of the stable/10 merge was done by Netflix, but all of the credit goes to Alexander and iX Systems. Submitted by: mav Sponsored by: iX Systems
Diffstat (limited to 'sys/geom/stripe')
-rw-r--r--sys/geom/stripe/g_stripe.c72
-rw-r--r--sys/geom/stripe/g_stripe.h1
2 files changed, 54 insertions, 19 deletions
diff --git a/sys/geom/stripe/g_stripe.c b/sys/geom/stripe/g_stripe.c
index 575ec5f..b5d77c5 100644
--- a/sys/geom/stripe/g_stripe.c
+++ b/sys/geom/stripe/g_stripe.c
@@ -284,22 +284,25 @@ g_stripe_done(struct bio *bp)
pbp = bp->bio_parent;
sc = pbp->bio_to->geom->softc;
- if (pbp->bio_error == 0)
- pbp->bio_error = bp->bio_error;
- pbp->bio_completed += bp->bio_completed;
if (bp->bio_cmd == BIO_READ && bp->bio_caller1 != NULL) {
g_stripe_copy(sc, bp->bio_data, bp->bio_caller1, bp->bio_offset,
bp->bio_length, 1);
bp->bio_data = bp->bio_caller1;
bp->bio_caller1 = NULL;
}
- g_destroy_bio(bp);
+ mtx_lock(&sc->sc_lock);
+ if (pbp->bio_error == 0)
+ pbp->bio_error = bp->bio_error;
+ pbp->bio_completed += bp->bio_completed;
pbp->bio_inbed++;
if (pbp->bio_children == pbp->bio_inbed) {
+ mtx_unlock(&sc->sc_lock);
if (pbp->bio_driver1 != NULL)
uma_zfree(g_stripe_zone, pbp->bio_driver1);
g_io_deliver(pbp, pbp->bio_error);
- }
+ } else
+ mtx_unlock(&sc->sc_lock);
+ g_destroy_bio(bp);
}
static int
@@ -442,7 +445,6 @@ g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length)
sc = bp->bio_to->geom->softc;
- addr = bp->bio_data;
stripesize = sc->sc_stripesize;
cbp = g_clone_bio(bp);
@@ -454,10 +456,18 @@ g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length)
/*
* Fill in the component buf structure.
*/
- cbp->bio_done = g_std_done;
+ if (bp->bio_length == length)
+ cbp->bio_done = g_std_done; /* Optimized lockless case. */
+ else
+ cbp->bio_done = g_stripe_done;
cbp->bio_offset = offset;
- cbp->bio_data = addr;
cbp->bio_length = length;
+ if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
+ bp->bio_ma_n = round_page(bp->bio_ma_offset +
+ bp->bio_length) / PAGE_SIZE;
+ addr = NULL;
+ } else
+ addr = bp->bio_data;
cbp->bio_caller2 = sc->sc_disks[no];
/* offset -= offset % stripesize; */
@@ -479,14 +489,21 @@ g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length)
/*
* Fill in the component buf structure.
*/
- cbp->bio_done = g_std_done;
+ cbp->bio_done = g_stripe_done;
cbp->bio_offset = offset;
- cbp->bio_data = addr;
/*
* MIN() is in case when
* (bp->bio_length % sc->sc_stripesize) != 0.
*/
cbp->bio_length = MIN(stripesize, length);
+ if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
+ cbp->bio_ma_offset += (uintptr_t)addr;
+ cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE;
+ cbp->bio_ma_offset %= PAGE_SIZE;
+ cbp->bio_ma_n = round_page(cbp->bio_ma_offset +
+ cbp->bio_length) / PAGE_SIZE;
+ } else
+ cbp->bio_data = addr;
cbp->bio_caller2 = sc->sc_disks[no];
}
@@ -536,15 +553,15 @@ g_stripe_flush(struct g_stripe_softc *sc, struct bio *bp)
return;
}
bioq_insert_tail(&queue, cbp);
- cbp->bio_done = g_std_done;
- cbp->bio_caller1 = sc->sc_disks[no];
+ cbp->bio_done = g_stripe_done;
+ cbp->bio_caller2 = sc->sc_disks[no];
cbp->bio_to = sc->sc_disks[no]->provider;
}
for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
bioq_remove(&queue, cbp);
G_STRIPE_LOGREQ(cbp, "Sending request.");
- cp = cbp->bio_caller1;
- cbp->bio_caller1 = NULL;
+ cp = cbp->bio_caller2;
+ cbp->bio_caller2 = NULL;
g_io_request(cbp, cp);
}
}
@@ -613,9 +630,12 @@ g_stripe_start(struct bio *bp)
* 3. Request size is bigger than stripesize * ndisks. If it isn't,
* there will be no need to send more than one I/O request to
* a provider, so there is nothing to optmize.
+ * and
+ * 4. Request is not unmapped.
*/
if (g_stripe_fast && bp->bio_length <= MAXPHYS &&
- bp->bio_length >= stripesize * sc->sc_ndisks) {
+ bp->bio_length >= stripesize * sc->sc_ndisks &&
+ (bp->bio_flags & BIO_UNMAPPED) == 0) {
fast = 1;
}
error = 0;
@@ -642,6 +662,7 @@ g_stripe_start(struct bio *bp)
static void
g_stripe_check_and_run(struct g_stripe_softc *sc)
{
+ struct g_provider *dp;
off_t mediasize, ms;
u_int no, sectorsize = 0;
@@ -651,6 +672,9 @@ g_stripe_check_and_run(struct g_stripe_softc *sc)
sc->sc_provider = g_new_providerf(sc->sc_geom, "stripe/%s",
sc->sc_name);
+ sc->sc_provider->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
+ if (g_stripe_fast == 0)
+ sc->sc_provider->flags |= G_PF_ACCEPT_UNMAPPED;
/*
* Find the smallest disk.
*/
@@ -660,14 +684,21 @@ g_stripe_check_and_run(struct g_stripe_softc *sc)
mediasize -= mediasize % sc->sc_stripesize;
sectorsize = sc->sc_disks[0]->provider->sectorsize;
for (no = 1; no < sc->sc_ndisks; no++) {
- ms = sc->sc_disks[no]->provider->mediasize;
+ dp = sc->sc_disks[no]->provider;
+ ms = dp->mediasize;
if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC)
- ms -= sc->sc_disks[no]->provider->sectorsize;
+ ms -= dp->sectorsize;
ms -= ms % sc->sc_stripesize;
if (ms < mediasize)
mediasize = ms;
- sectorsize = lcm(sectorsize,
- sc->sc_disks[no]->provider->sectorsize);
+ sectorsize = lcm(sectorsize, dp->sectorsize);
+
+ /* A provider underneath us doesn't support unmapped */
+ if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
+ G_STRIPE_DEBUG(1, "Cancelling unmapped "
+ "because of %s.", dp->name);
+ sc->sc_provider->flags &= ~G_PF_ACCEPT_UNMAPPED;
+ }
}
sc->sc_provider->sectorsize = sectorsize;
sc->sc_provider->mediasize = mediasize * sc->sc_ndisks;
@@ -729,6 +760,7 @@ g_stripe_add_disk(struct g_stripe_softc *sc, struct g_provider *pp, u_int no)
fcp = LIST_FIRST(&gp->consumer);
cp = g_new_consumer(gp);
+ cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
cp->private = NULL;
cp->index = no;
error = g_attach(cp, pp);
@@ -830,6 +862,7 @@ g_stripe_create(struct g_class *mp, const struct g_stripe_metadata *md,
for (no = 0; no < sc->sc_ndisks; no++)
sc->sc_disks[no] = NULL;
sc->sc_type = type;
+ mtx_init(&sc->sc_lock, "gstripe lock", NULL, MTX_DEF);
gp->softc = sc;
sc->sc_geom = gp;
@@ -878,6 +911,7 @@ g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force)
KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)",
gp->name));
free(sc->sc_disks, M_STRIPE);
+ mtx_destroy(&sc->sc_lock);
free(sc, M_STRIPE);
G_STRIPE_DEBUG(0, "Device %s destroyed.", gp->name);
g_wither_geom(gp, ENXIO);
diff --git a/sys/geom/stripe/g_stripe.h b/sys/geom/stripe/g_stripe.h
index 2720c6f..fe4452b 100644
--- a/sys/geom/stripe/g_stripe.h
+++ b/sys/geom/stripe/g_stripe.h
@@ -76,6 +76,7 @@ struct g_stripe_softc {
uint16_t sc_ndisks;
uint32_t sc_stripesize;
uint32_t sc_stripebits;
+ struct mtx sc_lock;
};
#define sc_name sc_geom->name
#endif /* _KERNEL */
OpenPOWER on IntegriCloud