From f1e539874655ae9e74c1644fd54133b19f1b14e2 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 10 Dec 2009 23:51:58 +0000 Subject: dm io: remove extra bi_io_vec region hack Remove the hack where we allocate an extra bi_io_vec to store additional private data. This hack prevents us from supporting barriers in dm-raid1 without first making another little block layer change. Instead of doing that, this patch eliminates the bi_io_vec abuse by storing the region number directly in the low bits of bi_private. We need to store two things for each bio, the pointer to the main io structure and, if parallel writes were requested, an index indicating which of these writes this bio belongs to. There can be at most BITS_PER_LONG regions - 32 or 64. The index (region number) was stored in the last (hidden) bio vector and the pointer to struct io was stored in bi_private. This patch now aligns "struct io" on BITS_PER_LONG bytes and stores the region number in the low BITS_PER_LONG bits of bi_private. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-io.c | 89 +++++++++++++++++++++++++++++++++--------------------- 1 file changed, 55 insertions(+), 34 deletions(-) (limited to 'drivers') diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index b0d264e..f6a714c 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -16,12 +16,19 @@ #include #include +#define DM_MSG_PREFIX "io" + +#define DM_IO_MAX_REGIONS BITS_PER_LONG + struct dm_io_client { mempool_t *pool; struct bio_set *bios; }; -/* FIXME: can we shrink this ? */ +/* + * Aligning 'struct io' reduces the number of bits required to store + * its address. Refer to store_io_and_region_in_bio() below. + */ struct io { unsigned long error_bits; unsigned long eopnotsupp_bits; @@ -30,7 +37,7 @@ struct io { struct dm_io_client *client; io_notify_fn callback; void *context; -}; +} __attribute__((aligned(DM_IO_MAX_REGIONS))); static struct kmem_cache *_dm_io_cache; @@ -92,18 +99,29 @@ EXPORT_SYMBOL(dm_io_client_destroy); /*----------------------------------------------------------------- * We need to keep track of which region a bio is doing io for. - * In order to save a memory allocation we store this the last - * bvec which we know is unused (blech). - * XXX This is ugly and can OOPS with some configs... find another way. + * To avoid a memory allocation to store just 5 or 6 bits, we + * ensure the 'struct io' pointer is aligned so enough low bits are + * always zero and then combine it with the region number directly in + * bi_private. *---------------------------------------------------------------*/ -static inline void bio_set_region(struct bio *bio, unsigned region) +static void store_io_and_region_in_bio(struct bio *bio, struct io *io, + unsigned region) { - bio->bi_io_vec[bio->bi_max_vecs].bv_len = region; + if (unlikely(!IS_ALIGNED((unsigned long)io, DM_IO_MAX_REGIONS))) { + DMCRIT("Unaligned struct io pointer %p", io); + BUG(); + } + + bio->bi_private = (void *)((unsigned long)io | region); } -static inline unsigned bio_get_region(struct bio *bio) +static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io, + unsigned *region) { - return bio->bi_io_vec[bio->bi_max_vecs].bv_len; + unsigned long val = (unsigned long)bio->bi_private; + + *io = (void *)(val & -(unsigned long)DM_IO_MAX_REGIONS); + *region = val & (DM_IO_MAX_REGIONS - 1); } /*----------------------------------------------------------------- @@ -144,10 +162,8 @@ static void endio(struct bio *bio, int error) /* * The bio destructor in bio_put() may use the io object. */ - io = bio->bi_private; - region = bio_get_region(bio); + retrieve_io_and_region_from_bio(bio, &io, ®ion); - bio->bi_max_vecs++; bio_put(bio); dec_count(io, region, error); @@ -247,7 +263,10 @@ static void vm_dp_init(struct dpages *dp, void *data) static void dm_bio_destructor(struct bio *bio) { - struct io *io = bio->bi_private; + unsigned region; + struct io *io; + + retrieve_io_and_region_from_bio(bio, &io, ®ion); bio_free(bio, io->client->bios); } @@ -292,24 +311,17 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, while (remaining) { /* - * Allocate a suitably sized-bio: we add an extra - * bvec for bio_get/set_region() and decrement bi_max_vecs - * to hide it from bio_add_page(). + * Allocate a suitably sized-bio. */ num_bvecs = dm_sector_div_up(remaining, (PAGE_SIZE >> SECTOR_SHIFT)); - num_bvecs = 1 + min_t(int, bio_get_nr_vecs(where->bdev), - num_bvecs); - if (unlikely(num_bvecs > BIO_MAX_PAGES)) - num_bvecs = BIO_MAX_PAGES; + num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev), num_bvecs); bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios); bio->bi_sector = where->sector + (where->count - remaining); bio->bi_bdev = where->bdev; bio->bi_end_io = endio; - bio->bi_private = io; bio->bi_destructor = dm_bio_destructor; - bio->bi_max_vecs--; - bio_set_region(bio, region); + store_io_and_region_in_bio(bio, io, region); /* * Try and add as many pages as possible. @@ -337,6 +349,8 @@ static void dispatch_io(int rw, unsigned int num_regions, int i; struct dpages old_pages = *dp; + BUG_ON(num_regions > DM_IO_MAX_REGIONS); + if (sync) rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); @@ -361,7 +375,14 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, struct dm_io_region *where, int rw, struct dpages *dp, unsigned long *error_bits) { - struct io io; + /* + * gcc <= 4.3 can't do the alignment for stack variables, so we must + * align it on our own. + * volatile prevents the optimizer from removing or reusing + * "io_" field from the stack frame (allowed in ANSI C). + */ + volatile char io_[sizeof(struct io) + __alignof__(struct io) - 1]; + struct io *io = (struct io *)PTR_ALIGN(&io_, __alignof__(struct io)); if (num_regions > 1 && (rw & RW_MASK) != WRITE) { WARN_ON(1); @@ -369,33 +390,33 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, } retry: - io.error_bits = 0; - io.eopnotsupp_bits = 0; - atomic_set(&io.count, 1); /* see dispatch_io() */ - io.sleeper = current; - io.client = client; + io->error_bits = 0; + io->eopnotsupp_bits = 0; + atomic_set(&io->count, 1); /* see dispatch_io() */ + io->sleeper = current; + io->client = client; - dispatch_io(rw, num_regions, where, dp, &io, 1); + dispatch_io(rw, num_regions, where, dp, io, 1); while (1) { set_current_state(TASK_UNINTERRUPTIBLE); - if (!atomic_read(&io.count)) + if (!atomic_read(&io->count)) break; io_schedule(); } set_current_state(TASK_RUNNING); - if (io.eopnotsupp_bits && (rw & (1 << BIO_RW_BARRIER))) { + if (io->eopnotsupp_bits && (rw & (1 << BIO_RW_BARRIER))) { rw &= ~(1 << BIO_RW_BARRIER); goto retry; } if (error_bits) - *error_bits = io.error_bits; + *error_bits = io->error_bits; - return io.error_bits ? -EIO : 0; + return io->error_bits ? -EIO : 0; } static int async_io(struct dm_io_client *client, unsigned int num_regions, -- cgit v1.1