diff options
author | pjd <pjd@FreeBSD.org> | 2004-07-09 14:30:09 +0000 |
---|---|---|
committer | pjd <pjd@FreeBSD.org> | 2004-07-09 14:30:09 +0000 |
commit | 227209a4dab50a9d07ecccb9250eaf351858312e (patch) | |
tree | 95e16ec23e3854cb034e6ca1f97212d8cb3e0f7f /sys | |
parent | 5be6baa85c81107025f55b6b0b6b9997d83fb135 (diff) | |
download | FreeBSD-src-227209a4dab50a9d07ecccb9250eaf351858312e.zip FreeBSD-src-227209a4dab50a9d07ecccb9250eaf351858312e.tar.gz |
Implement "FAST" mode for GEOM_STRIPE class and turn it on by default.
In this mode you can setup even very small stripe size and you can be
sure that only one I/O request will be send to every disks in stripe.
It consumes some more memory, but if allocation fails, it will fall
back to "ECONOMIC" mode.
It is about 10 times faster for small stripe size than "ECONOMIC" mode
and other RAID0 implementations. It is even recommended to use this
mode and small stripe size, so our requests are always splitted.
One can still use "ECONOMIC" mode by setting kern.geom.stripe.fast to 0.
It is also possible to setup maximum memory which "FAST" mode can consume,
by setting kern.geom.stripe.maxmem from /boot/loader.conf.
Diffstat (limited to 'sys')
-rw-r--r-- | sys/geom/stripe/g_stripe.c | 425 |
1 files changed, 345 insertions, 80 deletions
diff --git a/sys/geom/stripe/g_stripe.c b/sys/geom/stripe/g_stripe.c index cac06f8..6dc2fb2 100644 --- a/sys/geom/stripe/g_stripe.c +++ b/sys/geom/stripe/g_stripe.c @@ -36,17 +36,15 @@ __FBSDID("$FreeBSD$"); #include <sys/bio.h> #include <sys/sysctl.h> #include <sys/malloc.h> +#include <vm/uma.h> #include <geom/geom.h> #include <geom/stripe/g_stripe.h> +#define MAX_IO_SIZE (DFLTPHYS * 2) static MALLOC_DEFINE(M_STRIPE, "stripe data", "GEOM_STRIPE Data"); -SYSCTL_DECL(_kern_geom); -SYSCTL_NODE(_kern_geom, OID_AUTO, stripe, CTLFLAG_RW, 0, "GEOM_STRIPE stuff"); -static u_int g_stripe_debug = 0; -SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, debug, CTLFLAG_RW, &g_stripe_debug, 0, - "Debug level"); +static uma_zone_t g_stripe_zone; static int g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force); static int g_stripe_destroy_geom(struct gctl_req *req, struct g_class *mp, @@ -55,14 +53,42 @@ static int g_stripe_destroy_geom(struct gctl_req *req, struct g_class *mp, static g_taste_t g_stripe_taste; static g_ctl_req_t g_stripe_config; static g_dumpconf_t g_stripe_dumpconf; +static g_init_t g_stripe_init; +static g_fini_t g_stripe_fini; struct g_class g_stripe_class = { .name = G_STRIPE_CLASS_NAME, .ctlreq = g_stripe_config, .taste = g_stripe_taste, - .destroy_geom = g_stripe_destroy_geom + .destroy_geom = g_stripe_destroy_geom, + .init = g_stripe_init, + .fini = g_stripe_fini }; +SYSCTL_DECL(_kern_geom); +SYSCTL_NODE(_kern_geom, OID_AUTO, stripe, CTLFLAG_RW, 0, "GEOM_STRIPE stuff"); +static u_int g_stripe_debug = 0; +SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, debug, CTLFLAG_RW, &g_stripe_debug, 0, + "Debug level"); +static int g_stripe_fast = 1; +TUNABLE_INT("kern.geom.stripe.fast", &g_stripe_fast); +static int +g_sysctl_stripe_fast(SYSCTL_HANDLER_ARGS) +{ + int error, fast; + + fast = g_stripe_fast; + error = sysctl_handle_int(oidp, &fast, sizeof(fast), req); + if (error == 0 && req->newptr != NULL) + g_stripe_fast = fast; + return (error); +} +SYSCTL_PROC(_kern_geom_stripe, OID_AUTO, fast, CTLTYPE_INT | CTLFLAG_RW, + NULL, 0, g_sysctl_stripe_fast, "I", "Fast, but memory-consuming mode"); +static u_int g_stripe_maxmem = MAX_IO_SIZE * 10; +TUNABLE_INT("kern.geom.stripe.maxmem", &g_stripe_maxmem); +SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, maxmem, CTLFLAG_RD, &g_stripe_maxmem, + 0, "Maximum memory that could be allocated in \"fast\" mode (in bytes)"); /* * Greatest Common Divisor. @@ -90,6 +116,23 @@ lcm(u_int a, u_int b) return ((a * b) / gcd(a, b)); } +static void +g_stripe_init(struct g_class *mp __unused) +{ + + g_stripe_zone = uma_zcreate("g_stripe_zone", MAX_IO_SIZE, NULL, NULL, + NULL, NULL, 0, 0); + g_stripe_maxmem -= g_stripe_maxmem % MAX_IO_SIZE; + uma_zone_set_max(g_stripe_zone, g_stripe_maxmem / MAX_IO_SIZE); +} + +static void +g_stripe_fini(struct g_class *mp __unused) +{ + + uma_zdestroy(g_stripe_zone); +} + /* * Return the number of valid disks. */ @@ -205,19 +248,278 @@ g_stripe_access(struct g_provider *pp, int dr, int dw, int de) } static void -g_stripe_start(struct bio *bp) +g_stripe_copy(struct g_stripe_softc *sc, char *src, char *dst, off_t offset, + off_t length, int mode) +{ + u_int stripesize; + size_t len; + + stripesize = sc->sc_stripesize; + len = (size_t)(stripesize - (offset & (stripesize - 1))); + do { + bcopy(src, dst, len); + if (mode) { + dst += len + stripesize * (sc->sc_ndisks - 1); + src += len; + } else { + dst += len; + src += len + stripesize * (sc->sc_ndisks - 1); + } + length -= len; + KASSERT(length >= 0, + ("Length < 0 (stripesize=%zu, offset=%jd, length=%jd).", + (size_t)stripesize, (intmax_t)offset, (intmax_t)length)); + if (length > stripesize) + len = stripesize; + else + len = length; + } while (length > 0); +} + +static void +g_stripe_done(struct bio *bp) { - struct g_provider *pp; struct g_stripe_softc *sc; - off_t off, start, length, nstripe; + struct bio *pbp; + + pbp = bp->bio_parent; + sc = pbp->bio_to->geom->softc; + if (pbp->bio_error == 0) + pbp->bio_error = bp->bio_error; + pbp->bio_completed += bp->bio_completed; + if (bp->bio_cmd == BIO_READ && bp->bio_driver1 != NULL) { + g_stripe_copy(sc, bp->bio_data, bp->bio_driver1, bp->bio_offset, + bp->bio_length, 1); + bp->bio_data = bp->bio_driver1; + bp->bio_driver1 = NULL; + } + g_destroy_bio(bp); + pbp->bio_inbed++; + if (pbp->bio_children == pbp->bio_inbed) { + if (pbp->bio_caller1 != NULL) + uma_zfree(g_stripe_zone, pbp->bio_caller1); + g_io_deliver(pbp, pbp->bio_error); + } +} + +static int +g_stripe_start_fast(struct bio *bp, u_int no, off_t offset, off_t length) +{ + TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); + u_int nparts = 0, stripesize; + struct g_stripe_softc *sc; + char *addr, *data = NULL; struct bio *cbp; - u_int sectorsize; + int error; + + sc = bp->bio_to->geom->softc; + + addr = bp->bio_data; + stripesize = sc->sc_stripesize; + + cbp = g_clone_bio(bp); + if (cbp == NULL) { + error = ENOMEM; + goto failure; + } + TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); + nparts++; + /* + * Fill in the component buf structure. + */ + cbp->bio_done = g_stripe_done; + cbp->bio_offset = offset; + cbp->bio_data = addr; + cbp->bio_driver1 = NULL; + cbp->bio_length = length; + cbp->bio_driver2 = sc->sc_disks[no]; + + /* offset -= offset % stripesize; */ + offset -= offset & (stripesize - 1); + addr += length; + length = bp->bio_length - length; + for (no++; length > 0; no++, length -= stripesize, addr += stripesize) { + if (no > sc->sc_ndisks - 1) { + no = 0; + offset += stripesize; + } + if (nparts >= sc->sc_ndisks) { + cbp = TAILQ_NEXT(cbp, bio_queue); + if (cbp == NULL) + cbp = TAILQ_FIRST(&queue); + nparts++; + /* + * Update bio structure. + */ + /* + * MIN() is in case when + * (bp->bio_length % sc->sc_stripesize) != 0. + */ + cbp->bio_length += MIN(stripesize, length); + if (cbp->bio_driver1 == NULL) { + cbp->bio_driver1 = cbp->bio_data; + cbp->bio_data = NULL; + if (data == NULL) { + data = uma_zalloc(g_stripe_zone, + M_NOWAIT); + if (data == NULL) { + error = ENOMEM; + goto failure; + } + } + } + } else { + cbp = g_clone_bio(bp); + if (cbp == NULL) { + error = ENOMEM; + goto failure; + } + TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); + nparts++; + /* + * Fill in the component buf structure. + */ + cbp->bio_done = g_stripe_done; + cbp->bio_offset = offset; + cbp->bio_data = addr; + cbp->bio_driver1 = NULL; + /* + * MIN() is in case when + * (bp->bio_length % sc->sc_stripesize) != 0. + */ + cbp->bio_length = MIN(stripesize, length); + cbp->bio_driver2 = sc->sc_disks[no]; + } + } + if (data != NULL) + bp->bio_caller1 = data; + /* + * Fire off all allocated requests! + */ + while ((cbp = TAILQ_FIRST(&queue)) != NULL) { + struct g_consumer *cp; + + TAILQ_REMOVE(&queue, cbp, bio_queue); + cp = cbp->bio_driver2; + cbp->bio_driver2 = NULL; + cbp->bio_to = cp->provider; + if (cbp->bio_driver1 != NULL) { + cbp->bio_data = data; + if (bp->bio_cmd == BIO_WRITE) { + g_stripe_copy(sc, cbp->bio_driver1, data, + cbp->bio_offset, cbp->bio_length, 0); + } + data += cbp->bio_length; + } + G_STRIPE_LOGREQ(cbp, "Sending request."); + g_io_request(cbp, cp); + } + return (0); +failure: + if (data != NULL) + uma_zfree(g_stripe_zone, data); + while ((cbp = TAILQ_FIRST(&queue)) != NULL) { + TAILQ_REMOVE(&queue, cbp, bio_queue); + if (cbp->bio_driver1 != NULL) { + cbp->bio_data = cbp->bio_driver1; + cbp->bio_driver1 = NULL; + } + g_destroy_bio(cbp); + } + return (error); +} + +static int +g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length) +{ + TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue); + struct g_stripe_softc *sc; uint32_t stripesize; - uint16_t no; + struct bio *cbp; char *addr; + int error; + + sc = bp->bio_to->geom->softc; + + addr = bp->bio_data; + stripesize = sc->sc_stripesize; + + cbp = g_clone_bio(bp); + if (cbp == NULL) { + error = ENOMEM; + goto failure; + } + TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); + /* + * Fill in the component buf structure. + */ + cbp->bio_done = g_std_done; + cbp->bio_offset = offset; + cbp->bio_data = addr; + cbp->bio_length = length; + cbp->bio_driver2 = sc->sc_disks[no]; + + /* offset -= offset % stripesize; */ + offset -= offset & (stripesize - 1); + addr += length; + length = bp->bio_length - length; + for (no++; length > 0; no++, length -= stripesize, addr += stripesize) { + if (no > sc->sc_ndisks - 1) { + no = 0; + offset += stripesize; + } + cbp = g_clone_bio(bp); + if (cbp == NULL) { + error = ENOMEM; + goto failure; + } + TAILQ_INSERT_TAIL(&queue, cbp, bio_queue); + + /* + * Fill in the component buf structure. + */ + cbp->bio_done = g_std_done; + cbp->bio_offset = offset; + cbp->bio_data = addr; + /* + * MIN() is in case when + * (bp->bio_length % sc->sc_stripesize) != 0. + */ + cbp->bio_length = MIN(stripesize, length); + + cbp->bio_driver2 = sc->sc_disks[no]; + } + /* + * Fire off all allocated requests! + */ + while ((cbp = TAILQ_FIRST(&queue)) != NULL) { + struct g_consumer *cp; + + TAILQ_REMOVE(&queue, cbp, bio_queue); + cp = cbp->bio_driver2; + cbp->bio_driver2 = NULL; + cbp->bio_to = cp->provider; + G_STRIPE_LOGREQ(cbp, "Sending request."); + g_io_request(cbp, cp); + } + return (0); +failure: + while ((cbp = TAILQ_FIRST(&queue)) != NULL) { + TAILQ_REMOVE(&queue, cbp, bio_queue); + g_destroy_bio(cbp); + } + return (error); +} + +static void +g_stripe_start(struct bio *bp) +{ + off_t offset, start, length, nstripe; + struct g_stripe_softc *sc; + u_int no, stripesize; + int error, fast = 0; - pp = bp->bio_to; - sc = pp->geom->softc; + sc = bp->bio_to->geom->softc; /* * If sc == NULL, provider's error should be set and g_stripe_start() * should not be called at all. @@ -243,12 +545,10 @@ g_stripe_start(struct bio *bp) return; } - addr = bp->bio_data; - sectorsize = sc->sc_provider->sectorsize; stripesize = sc->sc_stripesize; /* - * Calcucations are quite messy, but fast I hope. + * Calculations are quite messy, but fast I hope. */ /* Stripe number. */ @@ -260,76 +560,41 @@ g_stripe_start(struct bio *bp) /* start = bp->bio_offset % stripesize; */ start = bp->bio_offset & (stripesize - 1); /* Start position in disk. */ - /* off = (nstripe / sc->sc_ndisks) * stripesize + start; */ - off = ((nstripe / sc->sc_ndisks) << sc->sc_stripebits) + start; + /* offset = (nstripe / sc->sc_ndisks) * stripesize + start; */ + offset = ((nstripe / sc->sc_ndisks) << sc->sc_stripebits) + start; /* Length of data to operate. */ length = MIN(bp->bio_length, stripesize - start); - cbp = g_clone_bio(bp); - if (cbp == NULL) { - /* - * Deny all request. This is pointless - * to split rest of the request, bacause - * we're setting bio_error here, so all - * request will be denied, anyway. - */ - bp->bio_completed = bp->bio_length; - if (bp->bio_error == 0) - bp->bio_error = ENOMEM; - g_io_deliver(bp, bp->bio_error); - return; + /* + * Do use "fast" mode when: + * 1. "Fast" mode is ON. + * and + * 2. Request size is less than or equal to MAX_IO_SIZE (128kB), + * which should always be true. + * and + * 3. Request size is bigger than stripesize * ndisks. If it isn't, + * there will be no need to send more than one I/O request to + * a provider, so there is nothing to optmize. + */ + if (g_stripe_fast && bp->bio_length <= MAX_IO_SIZE && + bp->bio_length >= stripesize * sc->sc_ndisks) { + fast = 1; } + error = 0; + if (fast) + error = g_stripe_start_fast(bp, no, offset, length); /* - * Fill in the component buf structure. + * Do use "economic" when: + * 1. "Economic" mode is ON. + * or + * 2. "Fast" mode failed. It can only failed if there is no memory. */ - cbp->bio_done = g_std_done; - cbp->bio_offset = off; - cbp->bio_data = addr; - cbp->bio_length = length; - cbp->bio_to = sc->sc_disks[no]->provider; - G_STRIPE_LOGREQ(cbp, "Sending request."); - g_io_request(cbp, sc->sc_disks[no]); - - /* off -= off % stripesize; */ - off -= off & (stripesize - 1); - addr += length; - length = bp->bio_length - length; - for (no++; length > 0; no++, length -= stripesize, addr += stripesize) { - if (no > sc->sc_ndisks - 1) { - no = 0; - off += stripesize; - } - cbp = g_clone_bio(bp); - if (cbp == NULL) { - /* - * Deny remaining part. This is pointless - * to split rest of the request, bacause - * we're setting bio_error here, so all - * request will be denied, anyway. - */ - bp->bio_completed += length; - if (bp->bio_error == 0) - bp->bio_error = ENOMEM; - if (bp->bio_completed == bp->bio_length) - g_io_deliver(bp, bp->bio_error); - return; - } - - /* - * Fill in the component buf structure. - */ - cbp->bio_done = g_std_done; - cbp->bio_offset = off; - cbp->bio_data = addr; - /* - * MIN() is in case when - * (bp->bio_length % sc->sc_stripesize) != 0. - */ - cbp->bio_length = MIN(stripesize, length); - - cbp->bio_to = sc->sc_disks[no]->provider; - G_STRIPE_LOGREQ(cbp, "Sending request."); - g_io_request(cbp, sc->sc_disks[no]); + if (!fast || error != 0) + error = g_stripe_start_economic(bp, no, offset, length); + if (error != 0) { + if (bp->bio_error == 0) + bp->bio_error = error; + g_io_deliver(bp, bp->bio_error); } } |