diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-21 17:08:06 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-21 17:08:06 -0800 |
commit | b49249d10324d0fd6fb29725c2807dfd80d0edbc (patch) | |
tree | 9a8fa724e6c9f9283530979c6e32a311c74999d5 /drivers/md | |
parent | 10532b560bacf23766f9c7dc09778b31b198ff45 (diff) | |
parent | 45e621d45e24ffc4cb2b2935e8438987b860063a (diff) | |
download | op-kernel-dev-b49249d10324d0fd6fb29725c2807dfd80d0edbc.zip op-kernel-dev-b49249d10324d0fd6fb29725c2807dfd80d0edbc.tar.gz |
Merge tag 'dm-3.8-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm
Pull dm update from Alasdair G Kergon:
"Miscellaneous device-mapper fixes, cleanups and performance
improvements.
Of particular note:
- Disable broken WRITE SAME support in all targets except linear and
striped. Use it when kcopyd is zeroing blocks.
- Remove several mempools from targets by moving the data into the
bio's new front_pad area(which dm calls 'per_bio_data').
- Fix a race in thin provisioning if discards are misused.
- Prevent userspace from interfering with the ioctl parameters and
use kmalloc for the data buffer if it's small instead of vmalloc.
- Throttle some annoying error messages when I/O fails."
* tag 'dm-3.8-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm: (36 commits)
dm stripe: add WRITE SAME support
dm: remove map_info
dm snapshot: do not use map_context
dm thin: dont use map_context
dm raid1: dont use map_context
dm flakey: dont use map_context
dm raid1: rename read_record to bio_record
dm: move target request nr to dm_target_io
dm snapshot: use per_bio_data
dm verity: use per_bio_data
dm raid1: use per_bio_data
dm: introduce per_bio_data
dm kcopyd: add WRITE SAME support to dm_kcopyd_zero
dm linear: add WRITE SAME support
dm: add WRITE SAME support
dm: prepare to support WRITE SAME
dm ioctl: use kmalloc if possible
dm ioctl: remove PF_MEMALLOC
dm persistent data: improve improve space map block alloc failure message
dm thin: use DMERR_LIMIT for errors
...
Diffstat (limited to 'drivers/md')
28 files changed, 473 insertions, 433 deletions
diff --git a/drivers/md/dm-bio-prison.c b/drivers/md/dm-bio-prison.c index e4e8415..aefb78e 100644 --- a/drivers/md/dm-bio-prison.c +++ b/drivers/md/dm-bio-prison.c @@ -208,31 +208,6 @@ void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios) EXPORT_SYMBOL_GPL(dm_cell_release); /* - * There are a couple of places where we put a bio into a cell briefly - * before taking it out again. In these situations we know that no other - * bio may be in the cell. This function releases the cell, and also does - * a sanity check. - */ -static void __cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio) -{ - BUG_ON(cell->holder != bio); - BUG_ON(!bio_list_empty(&cell->bios)); - - __cell_release(cell, NULL); -} - -void dm_cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio) -{ - unsigned long flags; - struct dm_bio_prison *prison = cell->prison; - - spin_lock_irqsave(&prison->lock, flags); - __cell_release_singleton(cell, bio); - spin_unlock_irqrestore(&prison->lock, flags); -} -EXPORT_SYMBOL_GPL(dm_cell_release_singleton); - -/* * Sometimes we don't want the holder, just the additional bios. */ static void __cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates) diff --git a/drivers/md/dm-bio-prison.h b/drivers/md/dm-bio-prison.h index 4e0ac376..53d1a7a 100644 --- a/drivers/md/dm-bio-prison.h +++ b/drivers/md/dm-bio-prison.h @@ -44,7 +44,6 @@ int dm_bio_detain(struct dm_bio_prison *prison, struct dm_cell_key *key, struct bio *inmate, struct dm_bio_prison_cell **ref); void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios); -void dm_cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio); // FIXME: bio arg not needed void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates); void dm_cell_error(struct dm_bio_prison_cell *cell); diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index bbf459b..f7369f9 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1689,8 +1689,7 @@ bad: return ret; } -static int crypt_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int crypt_map(struct dm_target *ti, struct bio *bio) { struct dm_crypt_io *io; struct crypt_config *cc = ti->private; @@ -1846,7 +1845,7 @@ static int crypt_iterate_devices(struct dm_target *ti, static struct target_type crypt_target = { .name = "crypt", - .version = {1, 11, 0}, + .version = {1, 12, 0}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index f53846f..cc1bd04 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -274,8 +274,7 @@ static void delay_resume(struct dm_target *ti) atomic_set(&dc->may_delay, 1); } -static int delay_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int delay_map(struct dm_target *ti, struct bio *bio) { struct delay_c *dc = ti->private; @@ -338,7 +337,7 @@ out: static struct target_type delay_target = { .name = "delay", - .version = {1, 1, 0}, + .version = {1, 2, 0}, .module = THIS_MODULE, .ctr = delay_ctr, .dtr = delay_dtr, diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index cc15543..9721f2f 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -39,6 +39,10 @@ enum feature_flag_bits { DROP_WRITES }; +struct per_bio_data { + bool bio_submitted; +}; + static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, struct dm_target *ti) { @@ -214,6 +218,7 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_requests = 1; ti->num_discard_requests = 1; + ti->per_bio_data_size = sizeof(struct per_bio_data); ti->private = fc; return 0; @@ -265,11 +270,12 @@ static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc) } } -static int flakey_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int flakey_map(struct dm_target *ti, struct bio *bio) { struct flakey_c *fc = ti->private; unsigned elapsed; + struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); + pb->bio_submitted = false; /* Are we alive ? */ elapsed = (jiffies - fc->start_time) / HZ; @@ -277,7 +283,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio, /* * Flag this bio as submitted while down. */ - map_context->ll = 1; + pb->bio_submitted = true; /* * Map reads as normal. @@ -314,17 +320,16 @@ map_bio: return DM_MAPIO_REMAPPED; } -static int flakey_end_io(struct dm_target *ti, struct bio *bio, - int error, union map_info *map_context) +static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error) { struct flakey_c *fc = ti->private; - unsigned bio_submitted_while_down = map_context->ll; + struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); /* * Corrupt successful READs while in down state. * If flags were specified, only corrupt those that match. */ - if (fc->corrupt_bio_byte && !error && bio_submitted_while_down && + if (fc->corrupt_bio_byte && !error && pb->bio_submitted && (bio_data_dir(bio) == READ) && (fc->corrupt_bio_rw == READ) && all_corrupt_bio_flags_match(bio, fc)) corrupt_bio_data(bio, fc); @@ -406,7 +411,7 @@ static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_ static struct target_type flakey_target = { .name = "flakey", - .version = {1, 2, 0}, + .version = {1, 3, 0}, .module = THIS_MODULE, .ctr = flakey_ctr, .dtr = flakey_dtr, diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 1c46f97..ea49834 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -287,7 +287,8 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, unsigned num_bvecs; sector_t remaining = where->count; struct request_queue *q = bdev_get_queue(where->bdev); - sector_t discard_sectors; + unsigned short logical_block_size = queue_logical_block_size(q); + sector_t num_sectors; /* * where->count may be zero if rw holds a flush and we need to @@ -297,7 +298,7 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, /* * Allocate a suitably sized-bio. */ - if (rw & REQ_DISCARD) + if ((rw & REQ_DISCARD) || (rw & REQ_WRITE_SAME)) num_bvecs = 1; else num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev), @@ -310,9 +311,21 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, store_io_and_region_in_bio(bio, io, region); if (rw & REQ_DISCARD) { - discard_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining); - bio->bi_size = discard_sectors << SECTOR_SHIFT; - remaining -= discard_sectors; + num_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining); + bio->bi_size = num_sectors << SECTOR_SHIFT; + remaining -= num_sectors; + } else if (rw & REQ_WRITE_SAME) { + /* + * WRITE SAME only uses a single page. + */ + dp->get_page(dp, &page, &len, &offset); + bio_add_page(bio, page, logical_block_size, offset); + num_sectors = min_t(sector_t, q->limits.max_write_same_sectors, remaining); + bio->bi_size = num_sectors << SECTOR_SHIFT; + + offset = 0; + remaining -= num_sectors; + dp->next_page(dp); } else while (remaining) { /* * Try and add as many pages as possible. diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index afd9598..0666b5d 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1543,7 +1543,21 @@ static int check_version(unsigned int cmd, struct dm_ioctl __user *user) return r; } -static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param) +#define DM_PARAMS_VMALLOC 0x0001 /* Params alloced with vmalloc not kmalloc */ +#define DM_WIPE_BUFFER 0x0010 /* Wipe input buffer before returning from ioctl */ + +static void free_params(struct dm_ioctl *param, size_t param_size, int param_flags) +{ + if (param_flags & DM_WIPE_BUFFER) + memset(param, 0, param_size); + + if (param_flags & DM_PARAMS_VMALLOC) + vfree(param); + else + kfree(param); +} + +static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param, int *param_flags) { struct dm_ioctl tmp, *dmi; int secure_data; @@ -1556,7 +1570,21 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param) secure_data = tmp.flags & DM_SECURE_DATA_FLAG; - dmi = vmalloc(tmp.data_size); + *param_flags = secure_data ? DM_WIPE_BUFFER : 0; + + /* + * Try to avoid low memory issues when a device is suspended. + * Use kmalloc() rather than vmalloc() when we can. + */ + dmi = NULL; + if (tmp.data_size <= KMALLOC_MAX_SIZE) + dmi = kmalloc(tmp.data_size, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); + + if (!dmi) { + dmi = __vmalloc(tmp.data_size, GFP_NOIO | __GFP_REPEAT | __GFP_HIGH, PAGE_KERNEL); + *param_flags |= DM_PARAMS_VMALLOC; + } + if (!dmi) { if (secure_data && clear_user(user, tmp.data_size)) return -EFAULT; @@ -1566,6 +1594,14 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param) if (copy_from_user(dmi, user, tmp.data_size)) goto bad; + /* + * Abort if something changed the ioctl data while it was being copied. + */ + if (dmi->data_size != tmp.data_size) { + DMERR("rejecting ioctl: data size modified while processing parameters"); + goto bad; + } + /* Wipe the user buffer so we do not return it to userspace */ if (secure_data && clear_user(user, tmp.data_size)) goto bad; @@ -1574,9 +1610,8 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param) return 0; bad: - if (secure_data) - memset(dmi, 0, tmp.data_size); - vfree(dmi); + free_params(dmi, tmp.data_size, *param_flags); + return -EFAULT; } @@ -1613,7 +1648,7 @@ static int validate_params(uint cmd, struct dm_ioctl *param) static int ctl_ioctl(uint command, struct dm_ioctl __user *user) { int r = 0; - int wipe_buffer; + int param_flags; unsigned int cmd; struct dm_ioctl *uninitialized_var(param); ioctl_fn fn = NULL; @@ -1649,24 +1684,14 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user) } /* - * Trying to avoid low memory issues when a device is - * suspended. - */ - current->flags |= PF_MEMALLOC; - - /* * Copy the parameters into kernel space. */ - r = copy_params(user, ¶m); - - current->flags &= ~PF_MEMALLOC; + r = copy_params(user, ¶m, ¶m_flags); if (r) return r; input_param_size = param->data_size; - wipe_buffer = param->flags & DM_SECURE_DATA_FLAG; - r = validate_params(cmd, param); if (r) goto out; @@ -1681,10 +1706,7 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user) r = -EFAULT; out: - if (wipe_buffer) - memset(param, 0, input_param_size); - - vfree(param); + free_params(param, input_param_size, param_flags); return r; } diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index bed444c..68c0267 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -349,7 +349,7 @@ static void complete_io(unsigned long error, void *context) struct dm_kcopyd_client *kc = job->kc; if (error) { - if (job->rw == WRITE) + if (job->rw & WRITE) job->write_err |= error; else job->read_err = 1; @@ -361,7 +361,7 @@ static void complete_io(unsigned long error, void *context) } } - if (job->rw == WRITE) + if (job->rw & WRITE) push(&kc->complete_jobs, job); else { @@ -432,7 +432,7 @@ static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc, if (r < 0) { /* error this rogue job */ - if (job->rw == WRITE) + if (job->rw & WRITE) job->write_err = (unsigned long) -1L; else job->read_err = 1; @@ -585,6 +585,7 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, unsigned int flags, dm_kcopyd_notify_fn fn, void *context) { struct kcopyd_job *job; + int i; /* * Allocate an array of jobs consisting of one master job @@ -611,7 +612,16 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, memset(&job->source, 0, sizeof job->source); job->source.count = job->dests[0].count; job->pages = &zero_page_list; - job->rw = WRITE; + + /* + * Use WRITE SAME to optimize zeroing if all dests support it. + */ + job->rw = WRITE | REQ_WRITE_SAME; + for (i = 0; i < job->num_dests; i++) + if (!bdev_write_same(job->dests[i].bdev)) { + job->rw = WRITE; + break; + } } job->fn = fn; diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 1bf19a9..328cad5 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -55,6 +55,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_requests = 1; ti->num_discard_requests = 1; + ti->num_write_same_requests = 1; ti->private = lc; return 0; @@ -87,8 +88,7 @@ static void linear_map_bio(struct dm_target *ti, struct bio *bio) bio->bi_sector = linear_map_sector(ti, bio->bi_sector); } -static int linear_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int linear_map(struct dm_target *ti, struct bio *bio) { linear_map_bio(ti, bio); @@ -155,7 +155,7 @@ static int linear_iterate_devices(struct dm_target *ti, static struct target_type linear_target = { .name = "linear", - .version = {1, 1, 0}, + .version = {1, 2, 0}, .module = THIS_MODULE, .ctr = linear_ctr, .dtr = linear_dtr, diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 45d94a7..3d8984e 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -295,9 +295,11 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size) * Choose a reasonable default. All figures in sectors. */ if (min_region_size > (1 << 13)) { + /* If not a power of 2, make it the next power of 2 */ + if (min_region_size & (min_region_size - 1)) + region_size = 1 << fls(region_size); DMINFO("Choosing default region size of %lu sectors", region_size); - region_size = min_region_size; } else { DMINFO("Choosing default region size of 4MiB"); region_size = 1 << 13; /* sectors */ @@ -1216,7 +1218,7 @@ static void raid_dtr(struct dm_target *ti) context_free(rs); } -static int raid_map(struct dm_target *ti, struct bio *bio, union map_info *map_context) +static int raid_map(struct dm_target *ti, struct bio *bio) { struct raid_set *rs = ti->private; struct mddev *mddev = &rs->md; @@ -1430,7 +1432,7 @@ static void raid_resume(struct dm_target *ti) static struct target_type raid_target = { .name = "raid", - .version = {1, 3, 1}, + .version = {1, 4, 0}, .module = THIS_MODULE, .ctr = raid_ctr, .dtr = raid_dtr, diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index fd61f98..fa51918 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -61,7 +61,6 @@ struct mirror_set { struct dm_region_hash *rh; struct dm_kcopyd_client *kcopyd_client; struct dm_io_client *io_client; - mempool_t *read_record_pool; /* recovery */ region_t nr_regions; @@ -139,14 +138,13 @@ static void dispatch_bios(void *context, struct bio_list *bio_list) queue_bio(ms, bio, WRITE); } -#define MIN_READ_RECORDS 20 -struct dm_raid1_read_record { +struct dm_raid1_bio_record { struct mirror *m; + /* if details->bi_bdev == NULL, details were not saved */ struct dm_bio_details details; + region_t write_region; }; -static struct kmem_cache *_dm_raid1_read_record_cache; - /* * Every mirror should look like this one. */ @@ -876,19 +874,9 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors, atomic_set(&ms->suspend, 0); atomic_set(&ms->default_mirror, DEFAULT_MIRROR); - ms->read_record_pool = mempool_create_slab_pool(MIN_READ_RECORDS, - _dm_raid1_read_record_cache); - - if (!ms->read_record_pool) { - ti->error = "Error creating mirror read_record_pool"; - kfree(ms); - return NULL; - } - ms->io_client = dm_io_client_create(); if (IS_ERR(ms->io_client)) { ti->error = "Error creating dm_io client"; - mempool_destroy(ms->read_record_pool); kfree(ms); return NULL; } @@ -900,7 +888,6 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors, if (IS_ERR(ms->rh)) { ti->error = "Error creating dirty region hash"; dm_io_client_destroy(ms->io_client); - mempool_destroy(ms->read_record_pool); kfree(ms); return NULL; } @@ -916,7 +903,6 @@ static void free_context(struct mirror_set *ms, struct dm_target *ti, dm_io_client_destroy(ms->io_client); dm_region_hash_destroy(ms->rh); - mempool_destroy(ms->read_record_pool); kfree(ms); } @@ -1088,6 +1074,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_requests = 1; ti->num_discard_requests = 1; + ti->per_bio_data_size = sizeof(struct dm_raid1_bio_record); ti->discard_zeroes_data_unsupported = true; ms->kmirrord_wq = alloc_workqueue("kmirrord", @@ -1155,18 +1142,20 @@ static void mirror_dtr(struct dm_target *ti) /* * Mirror mapping function */ -static int mirror_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int mirror_map(struct dm_target *ti, struct bio *bio) { int r, rw = bio_rw(bio); struct mirror *m; struct mirror_set *ms = ti->private; - struct dm_raid1_read_record *read_record = NULL; struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); + struct dm_raid1_bio_record *bio_record = + dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record)); + + bio_record->details.bi_bdev = NULL; if (rw == WRITE) { /* Save region for mirror_end_io() handler */ - map_context->ll = dm_rh_bio_to_region(ms->rh, bio); + bio_record->write_region = dm_rh_bio_to_region(ms->rh, bio); queue_bio(ms, bio, rw); return DM_MAPIO_SUBMITTED; } @@ -1194,33 +1183,29 @@ static int mirror_map(struct dm_target *ti, struct bio *bio, if (unlikely(!m)) return -EIO; - read_record = mempool_alloc(ms->read_record_pool, GFP_NOIO); - if (likely(read_record)) { - dm_bio_record(&read_record->details, bio); - map_context->ptr = read_record; - read_record->m = m; - } + dm_bio_record(&bio_record->details, bio); + bio_record->m = m; map_bio(m, bio); return DM_MAPIO_REMAPPED; } -static int mirror_end_io(struct dm_target *ti, struct bio *bio, - int error, union map_info *map_context) +static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error) { int rw = bio_rw(bio); struct mirror_set *ms = (struct mirror_set *) ti->private; struct mirror *m = NULL; struct dm_bio_details *bd = NULL; - struct dm_raid1_read_record *read_record = map_context->ptr; + struct dm_raid1_bio_record *bio_record = + dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record)); /* * We need to dec pending if this was a write. */ if (rw == WRITE) { if (!(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))) - dm_rh_dec(ms->rh, map_context->ll); + dm_rh_dec(ms->rh, bio_record->write_region); return error; } @@ -1231,7 +1216,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, goto out; if (unlikely(error)) { - if (!read_record) { + if (!bio_record->details.bi_bdev) { /* * There wasn't enough memory to record necessary * information for a retry or there was no other @@ -1241,7 +1226,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, return -EIO; } - m = read_record->m; + m = bio_record->m; DMERR("Mirror read failed from %s. Trying alternative device.", m->dev->name); @@ -1253,22 +1238,18 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, * mirror. */ if (default_ok(m) || mirror_available(ms, bio)) { - bd = &read_record->details; + bd = &bio_record->details; dm_bio_restore(bd, bio); - mempool_free(read_record, ms->read_record_pool); - map_context->ptr = NULL; + bio_record->details.bi_bdev = NULL; queue_bio(ms, bio, rw); - return 1; + return DM_ENDIO_INCOMPLETE; } DMERR("All replicated volumes dead, failing I/O"); } out: - if (read_record) { - mempool_free(read_record, ms->read_record_pool); - map_context->ptr = NULL; - } + bio_record->details.bi_bdev = NULL; return error; } @@ -1422,7 +1403,7 @@ static int mirror_iterate_devices(struct dm_target *ti, static struct target_type mirror_target = { .name = "mirror", - .version = {1, 12, 1}, + .version = {1, 13, 1}, .module = THIS_MODULE, .ctr = mirror_ctr, .dtr = mirror_dtr, @@ -1439,13 +1420,6 @@ static int __init dm_mirror_init(void) { int r; - _dm_raid1_read_record_cache = KMEM_CACHE(dm_raid1_read_record, 0); - if (!_dm_raid1_read_record_cache) { - DMERR("Can't allocate dm_raid1_read_record cache"); - r = -ENOMEM; - goto bad_cache; - } - r = dm_register_target(&mirror_target); if (r < 0) { DMERR("Failed to register mirror target"); @@ -1455,15 +1429,12 @@ static int __init dm_mirror_init(void) return 0; bad_target: - kmem_cache_destroy(_dm_raid1_read_record_cache); -bad_cache: return r; } static void __exit dm_mirror_exit(void) { dm_unregister_target(&mirror_target); - kmem_cache_destroy(_dm_raid1_read_record_cache); } /* Module hooks */ diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index a143921..59fc18a 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -79,7 +79,6 @@ struct dm_snapshot { /* Chunks with outstanding reads */ spinlock_t tracked_chunk_lock; - mempool_t *tracked_chunk_pool; struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE]; /* The on disk metadata handler */ @@ -191,35 +190,38 @@ struct dm_snap_tracked_chunk { chunk_t chunk; }; -static struct kmem_cache *tracked_chunk_cache; +static void init_tracked_chunk(struct bio *bio) +{ + struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); + INIT_HLIST_NODE(&c->node); +} -static struct dm_snap_tracked_chunk *track_chunk(struct dm_snapshot *s, - chunk_t chunk) +static bool is_bio_tracked(struct bio *bio) { - struct dm_snap_tracked_chunk *c = mempool_alloc(s->tracked_chunk_pool, - GFP_NOIO); - unsigned long flags; + struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); + return !hlist_unhashed(&c->node); +} + +static void track_chunk(struct dm_snapshot *s, struct bio *bio, chunk_t chunk) +{ + struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); c->chunk = chunk; - spin_lock_irqsave(&s->tracked_chunk_lock, flags); + spin_lock_irq(&s->tracked_chunk_lock); hlist_add_head(&c->node, &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)]); - spin_unlock_irqrestore(&s->tracked_chunk_lock, flags); - - return c; + spin_unlock_irq(&s->tracked_chunk_lock); } -static void stop_tracking_chunk(struct dm_snapshot *s, - struct dm_snap_tracked_chunk *c) +static void stop_tracking_chunk(struct dm_snapshot *s, struct bio *bio) { + struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); unsigned long flags; spin_lock_irqsave(&s->tracked_chunk_lock, flags); hlist_del(&c->node); spin_unlock_irqrestore(&s->tracked_chunk_lock, flags); - - mempool_free(c, s->tracked_chunk_pool); } static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk) @@ -1120,14 +1122,6 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_pending_pool; } - s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS, - tracked_chunk_cache); - if (!s->tracked_chunk_pool) { - ti->error = "Could not allocate tracked_chunk mempool for " - "tracking reads"; - goto bad_tracked_chunk_pool; - } - for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++) INIT_HLIST_HEAD(&s->tracked_chunk_hash[i]); @@ -1135,6 +1129,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->private = s; ti->num_flush_requests = num_flush_requests; + ti->per_bio_data_size = sizeof(struct dm_snap_tracked_chunk); /* Add snapshot to the list of snapshots for this origin */ /* Exceptions aren't triggered till snapshot_resume() is called */ @@ -1183,9 +1178,6 @@ bad_read_metadata: unregister_snapshot(s); bad_load_and_register: - mempool_destroy(s->tracked_chunk_pool); - -bad_tracked_chunk_pool: mempool_destroy(s->pending_pool); bad_pending_pool: @@ -1290,8 +1282,6 @@ static void snapshot_dtr(struct dm_target *ti) BUG_ON(!hlist_empty(&s->tracked_chunk_hash[i])); #endif - mempool_destroy(s->tracked_chunk_pool); - __free_exceptions(s); mempool_destroy(s->pending_pool); @@ -1577,8 +1567,7 @@ static void remap_exception(struct dm_snapshot *s, struct dm_exception *e, s->store->chunk_mask); } -static int snapshot_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int snapshot_map(struct dm_target *ti, struct bio *bio) { struct dm_exception *e; struct dm_snapshot *s = ti->private; @@ -1586,6 +1575,8 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, chunk_t chunk; struct dm_snap_pending_exception *pe = NULL; + init_tracked_chunk(bio); + if (bio->bi_rw & REQ_FLUSH) { bio->bi_bdev = s->cow->bdev; return DM_MAPIO_REMAPPED; @@ -1670,7 +1661,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, } } else { bio->bi_bdev = s->origin->bdev; - map_context->ptr = track_chunk(s, chunk); + track_chunk(s, bio, chunk); } out_unlock: @@ -1691,20 +1682,20 @@ out: * If merging is currently taking place on the chunk in question, the * I/O is deferred by adding it to s->bios_queued_during_merge. */ -static int snapshot_merge_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int snapshot_merge_map(struct dm_target *ti, struct bio *bio) { struct dm_exception *e; struct dm_snapshot *s = ti->private; int r = DM_MAPIO_REMAPPED; chunk_t chunk; + init_tracked_chunk(bio); + if (bio->bi_rw & REQ_FLUSH) { - if (!map_context->target_request_nr) + if (!dm_bio_get_target_request_nr(bio)) bio->bi_bdev = s->origin->bdev; else bio->bi_bdev = s->cow->bdev; - map_context->ptr = NULL; return DM_MAPIO_REMAPPED; } @@ -1733,7 +1724,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio, remap_exception(s, e, bio, chunk); if (bio_rw(bio) == WRITE) - map_context->ptr = track_chunk(s, chunk); + track_chunk(s, bio, chunk); goto out_unlock; } @@ -1751,14 +1742,12 @@ out_unlock: return r; } -static int snapshot_end_io(struct dm_target *ti, struct bio *bio, - int error, union map_info *map_context) +static int snapshot_end_io(struct dm_target *ti, struct bio *bio, int error) { struct dm_snapshot *s = ti->private; - struct dm_snap_tracked_chunk *c = map_context->ptr; - if (c) - stop_tracking_chunk(s, c); + if (is_bio_tracked(bio)) + stop_tracking_chunk(s, bio); return 0; } @@ -2127,8 +2116,7 @@ static void origin_dtr(struct dm_target *ti) dm_put_device(ti, dev); } -static int origin_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int origin_map(struct dm_target *ti, struct bio *bio) { struct dm_dev *dev = ti->private; bio->bi_bdev = dev->bdev; @@ -2193,7 +2181,7 @@ static int origin_iterate_devices(struct dm_target *ti, static struct target_type origin_target = { .name = "snapshot-origin", - .version = {1, 7, 1}, + .version = {1, 8, 0}, .module = THIS_MODULE, .ctr = origin_ctr, .dtr = origin_dtr, @@ -2206,7 +2194,7 @@ static struct target_type origin_target = { static struct target_type snapshot_target = { .name = "snapshot", - .version = {1, 10, 0}, + .version = {1, 11, 0}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, @@ -2220,7 +2208,7 @@ static struct target_type snapshot_target = { static struct target_type merge_target = { .name = dm_snapshot_merge_target_name, - .version = {1, 1, 0}, + .version = {1, 2, 0}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, @@ -2281,17 +2269,8 @@ static int __init dm_snapshot_init(void) goto bad_pending_cache; } - tracked_chunk_cache = KMEM_CACHE(dm_snap_tracked_chunk, 0); - if (!tracked_chunk_cache) { - DMERR("Couldn't create cache to track chunks in use."); - r = -ENOMEM; - goto bad_tracked_chunk_cache; - } - return 0; -bad_tracked_chunk_cache: - kmem_cache_destroy(pending_cache); bad_pending_cache: kmem_cache_destroy(exception_cache); bad_exception_cache: @@ -2317,7 +2296,6 @@ static void __exit dm_snapshot_exit(void) exit_origin_hash(); kmem_cache_destroy(pending_cache); kmem_cache_destroy(exception_cache); - kmem_cache_destroy(tracked_chunk_cache); dm_exception_store_exit(); } diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index e2f87653..c89cde8 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -162,6 +162,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_requests = stripes; ti->num_discard_requests = stripes; + ti->num_write_same_requests = stripes; sc->chunk_size = chunk_size; if (chunk_size & (chunk_size - 1)) @@ -251,8 +252,8 @@ static void stripe_map_range_sector(struct stripe_c *sc, sector_t sector, *result += sc->chunk_size; /* next chunk */ } -static int stripe_map_discard(struct stripe_c *sc, struct bio *bio, - uint32_t target_stripe) +static int stripe_map_range(struct stripe_c *sc, struct bio *bio, + uint32_t target_stripe) { sector_t begin, end; @@ -271,23 +272,23 @@ static int stripe_map_discard(struct stripe_c *sc, struct bio *bio, } } -static int stripe_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int stripe_map(struct dm_target *ti, struct bio *bio) { struct stripe_c *sc = ti->private; uint32_t stripe; unsigned target_request_nr; if (bio->bi_rw & REQ_FLUSH) { - target_request_nr = map_context->target_request_nr; + target_request_nr = dm_bio_get_target_request_nr(bio); BUG_ON(target_request_nr >= sc->stripes); bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev; return DM_MAPIO_REMAPPED; } - if (unlikely(bio->bi_rw & REQ_DISCARD)) { - target_request_nr = map_context->target_request_nr; + if (unlikely(bio->bi_rw & REQ_DISCARD) || + unlikely(bio->bi_rw & REQ_WRITE_SAME)) { + target_request_nr = dm_bio_get_target_request_nr(bio); BUG_ON(target_request_nr >= sc->stripes); - return stripe_map_discard(sc, bio, target_request_nr); + return stripe_map_range(sc, bio, target_request_nr); } stripe_map_sector(sc, bio->bi_sector, &stripe, &bio->bi_sector); @@ -342,8 +343,7 @@ static int stripe_status(struct dm_target *ti, status_type_t type, return 0; } -static int stripe_end_io(struct dm_target *ti, struct bio *bio, - int error, union map_info *map_context) +static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error) { unsigned i; char major_minor[16]; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 100368e..daf25d0 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -967,13 +967,22 @@ bool dm_table_request_based(struct dm_table *t) int dm_table_alloc_md_mempools(struct dm_table *t) { unsigned type = dm_table_get_type(t); + unsigned per_bio_data_size = 0; + struct dm_target *tgt; + unsigned i; if (unlikely(type == DM_TYPE_NONE)) { DMWARN("no table type is set, can't allocate mempools"); return -EINVAL; } - t->mempools = dm_alloc_md_mempools(type, t->integrity_supported); + if (type == DM_TYPE_BIO_BASED) + for (i = 0; i < t->num_targets; i++) { + tgt = t->targets + i; + per_bio_data_size = max(per_bio_data_size, tgt->per_bio_data_size); + } + + t->mempools = dm_alloc_md_mempools(type, t->integrity_supported, per_bio_data_size); if (!t->mempools) return -ENOMEM; @@ -1414,6 +1423,33 @@ static bool dm_table_all_devices_attribute(struct dm_table *t, return 1; } +static int device_not_write_same_capable(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + struct request_queue *q = bdev_get_queue(dev->bdev); + + return q && !q->limits.max_write_same_sectors; +} + +static bool dm_table_supports_write_same(struct dm_table *t) +{ + struct dm_target *ti; + unsigned i = 0; + + while (i < dm_table_get_num_targets(t)) { + ti = dm_table_get_target(t, i++); + + if (!ti->num_write_same_requests) + return false; + + if (!ti->type->iterate_devices || + !ti->type->iterate_devices(ti, device_not_write_same_capable, NULL)) + return false; + } + + return true; +} + void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, struct queue_limits *limits) { @@ -1445,6 +1481,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, else queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q); + if (!dm_table_supports_write_same(t)) + q->limits.max_write_same_sectors = 0; + dm_table_set_integrity(t); /* diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c index 8da366c..617d21a 100644 --- a/drivers/md/dm-target.c +++ b/drivers/md/dm-target.c @@ -126,15 +126,14 @@ static void io_err_dtr(struct dm_target *tt) /* empty */ } -static int io_err_map(struct dm_target *tt, struct bio *bio, - union map_info *map_context) +static int io_err_map(struct dm_target *tt, struct bio *bio) { return -EIO; } static struct target_type error_target = { .name = "error", - .version = {1, 0, 1}, + .version = {1, 1, 0}, .ctr = io_err_ctr, .dtr = io_err_dtr, .map = io_err_map, diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 693e149..4d6e853 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -408,7 +408,7 @@ static void __setup_btree_details(struct dm_pool_metadata *pmd) pmd->tl_info.tm = pmd->tm; pmd->tl_info.levels = 1; - pmd->tl_info.value_type.context = &pmd->info; + pmd->tl_info.value_type.context = &pmd->bl_info; pmd->tl_info.value_type.size = sizeof(__le64); pmd->tl_info.value_type.inc = subtree_inc; pmd->tl_info.value_type.dec = subtree_dec; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 058acf3..675ae52 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -186,7 +186,6 @@ struct pool { struct dm_thin_new_mapping *next_mapping; mempool_t *mapping_pool; - mempool_t *endio_hook_pool; process_bio_fn process_bio; process_bio_fn process_discard; @@ -304,7 +303,7 @@ static void __requeue_bio_list(struct thin_c *tc, struct bio_list *master) bio_list_init(master); while ((bio = bio_list_pop(&bios))) { - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); if (h->tc == tc) bio_endio(bio, DM_ENDIO_REQUEUE); @@ -368,6 +367,17 @@ static int bio_triggers_commit(struct thin_c *tc, struct bio *bio) dm_thin_changed_this_transaction(tc->td); } +static void inc_all_io_entry(struct pool *pool, struct bio *bio) +{ + struct dm_thin_endio_hook *h; + + if (bio->bi_rw & REQ_DISCARD) + return; + + h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); + h->all_io_entry = dm_deferred_entry_inc(pool->all_io_ds); +} + static void issue(struct thin_c *tc, struct bio *bio) { struct pool *pool = tc->pool; @@ -474,7 +484,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context) static void overwrite_endio(struct bio *bio, int err) { unsigned long flags; - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); struct dm_thin_new_mapping *m = h->overwrite_mapping; struct pool *pool = m->tc->pool; @@ -499,8 +509,7 @@ static void overwrite_endio(struct bio *bio, int err) /* * This sends the bios in the cell back to the deferred_bios list. */ -static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell, - dm_block_t data_block) +static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell) { struct pool *pool = tc->pool; unsigned long flags; @@ -513,17 +522,13 @@ static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell, } /* - * Same as cell_defer above, except it omits one particular detainee, - * a write bio that covers the block and has already been processed. + * Same as cell_defer except it omits the original holder of the cell. */ -static void cell_defer_except(struct thin_c *tc, struct dm_bio_prison_cell *cell) +static void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *cell) { - struct bio_list bios; struct pool *pool = tc->pool; unsigned long flags; - bio_list_init(&bios); - spin_lock_irqsave(&pool->lock, flags); dm_cell_release_no_holder(cell, &pool->deferred_bios); spin_unlock_irqrestore(&pool->lock, flags); @@ -561,7 +566,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) */ r = dm_thin_insert_block(tc->td, m->virt_block, m->data_block); if (r) { - DMERR("dm_thin_insert_block() failed"); + DMERR_LIMIT("dm_thin_insert_block() failed"); dm_cell_error(m->cell); goto out; } @@ -573,10 +578,10 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) * the bios in the cell. */ if (bio) { - cell_defer_except(tc, m->cell); + cell_defer_no_holder(tc, m->cell); bio_endio(bio, 0); } else - cell_defer(tc, m->cell, m->data_block); + cell_defer(tc, m->cell); out: list_del(&m->list); @@ -588,8 +593,8 @@ static void process_prepared_discard_fail(struct dm_thin_new_mapping *m) struct thin_c *tc = m->tc; bio_io_error(m->bio); - cell_defer_except(tc, m->cell); - cell_defer_except(tc, m->cell2); + cell_defer_no_holder(tc, m->cell); + cell_defer_no_holder(tc, m->cell2); mempool_free(m, tc->pool->mapping_pool); } @@ -597,13 +602,15 @@ static void process_prepared_discard_passdown(struct dm_thin_new_mapping *m) { struct thin_c *tc = m->tc; + inc_all_io_entry(tc->pool, m->bio); + cell_defer_no_holder(tc, m->cell); + cell_defer_no_holder(tc, m->cell2); + if (m->pass_discard) remap_and_issue(tc, m->bio, m->data_block); else bio_endio(m->bio, 0); - cell_defer_except(tc, m->cell); - cell_defer_except(tc, m->cell2); mempool_free(m, tc->pool->mapping_pool); } @@ -614,7 +621,7 @@ static void process_prepared_discard(struct dm_thin_new_mapping *m) r = dm_thin_remove_block(tc->td, m->virt_block); if (r) - DMERR("dm_thin_remove_block() failed"); + DMERR_LIMIT("dm_thin_remove_block() failed"); process_prepared_discard_passdown(m); } @@ -706,11 +713,12 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, * bio immediately. Otherwise we use kcopyd to clone the data first. */ if (io_overwrites_block(pool, bio)) { - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); h->overwrite_mapping = m; m->bio = bio; save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio); + inc_all_io_entry(pool, bio); remap_and_issue(tc, bio, data_dest); } else { struct dm_io_region from, to; @@ -727,7 +735,7 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, 0, copy_complete, m); if (r < 0) { mempool_free(m, pool->mapping_pool); - DMERR("dm_kcopyd_copy() failed"); + DMERR_LIMIT("dm_kcopyd_copy() failed"); dm_cell_error(cell); } } @@ -775,11 +783,12 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, process_prepared_mapping(m); else if (io_overwrites_block(pool, bio)) { - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); h->overwrite_mapping = m; m->bio = bio; save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio); + inc_all_io_entry(pool, bio); remap_and_issue(tc, bio, data_block); } else { int r; @@ -792,7 +801,7 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, r = dm_kcopyd_zero(pool->copier, 1, &to, 0, copy_complete, m); if (r < 0) { mempool_free(m, pool->mapping_pool); - DMERR("dm_kcopyd_zero() failed"); + DMERR_LIMIT("dm_kcopyd_zero() failed"); dm_cell_error(cell); } } @@ -804,7 +813,7 @@ static int commit(struct pool *pool) r = dm_pool_commit_metadata(pool->pmd); if (r) - DMERR("commit failed, error = %d", r); + DMERR_LIMIT("commit failed: error = %d", r); return r; } @@ -889,7 +898,7 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result) */ static void retry_on_resume(struct bio *bio) { - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); struct thin_c *tc = h->tc; struct pool *pool = tc->pool; unsigned long flags; @@ -936,7 +945,7 @@ static void process_discard(struct thin_c *tc, struct bio *bio) */ build_data_key(tc->td, lookup_result.block, &key2); if (dm_bio_detain(tc->pool->prison, &key2, bio, &cell2)) { - dm_cell_release_singleton(cell, bio); + cell_defer_no_holder(tc, cell); break; } @@ -962,13 +971,15 @@ static void process_discard(struct thin_c *tc, struct bio *bio) wake_worker(pool); } } else { + inc_all_io_entry(pool, bio); + cell_defer_no_holder(tc, cell); + cell_defer_no_holder(tc, cell2); + /* * The DM core makes sure that the discard doesn't span * a block boundary. So we submit the discard of a * partial block appropriately. */ - dm_cell_release_singleton(cell, bio); - dm_cell_release_singleton(cell2, bio); if ((!lookup_result.shared) && pool->pf.discard_passdown) remap_and_issue(tc, bio, lookup_result.block); else @@ -980,13 +991,14 @@ static void process_discard(struct thin_c *tc, struct bio *bio) /* * It isn't provisioned, just forget it. */ - dm_cell_release_singleton(cell, bio); + cell_defer_no_holder(tc, cell); bio_endio(bio, 0); break; default: - DMERR("discard: find block unexpectedly returned %d", r); - dm_cell_release_singleton(cell, bio); + DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d", + __func__, r); + cell_defer_no_holder(tc, cell); bio_io_error(bio); break; } @@ -1012,7 +1024,8 @@ static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block, break; default: - DMERR("%s: alloc_data_block() failed, error = %d", __func__, r); + DMERR_LIMIT("%s: alloc_data_block() failed: error = %d", + __func__, r); dm_cell_error(cell); break; } @@ -1037,11 +1050,12 @@ static void process_shared_bio(struct thin_c *tc, struct bio *bio, if (bio_data_dir(bio) == WRITE && bio->bi_size) break_sharing(tc, bio, block, &key, lookup_result, cell); else { - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); h->shared_read_entry = dm_deferred_entry_inc(pool->shared_read_ds); + inc_all_io_entry(pool, bio); + cell_defer_no_holder(tc, cell); - dm_cell_release_singleton(cell, bio); remap_and_issue(tc, bio, lookup_result->block); } } @@ -1056,7 +1070,9 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block * Remap empty bios (flushes) immediately, without provisioning. */ if (!bio->bi_size) { - dm_cell_release_singleton(cell, bio); + inc_all_io_entry(tc->pool, bio); + cell_defer_no_holder(tc, cell); + remap_and_issue(tc, bio, 0); return; } @@ -1066,7 +1082,7 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block */ if (bio_data_dir(bio) == READ) { zero_fill_bio(bio); - dm_cell_release_singleton(cell, bio); + cell_defer_no_holder(tc, cell); bio_endio(bio, 0); return; } @@ -1085,7 +1101,8 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block break; default: - DMERR("%s: alloc_data_block() failed, error = %d", __func__, r); + DMERR_LIMIT("%s: alloc_data_block() failed: error = %d", + __func__, r); set_pool_mode(tc->pool, PM_READ_ONLY); dm_cell_error(cell); break; @@ -1111,34 +1128,31 @@ static void process_bio(struct thin_c *tc, struct bio *bio) r = dm_thin_find_block(tc->td, block, 1, &lookup_result); switch (r) { case 0: - /* - * We can release this cell now. This thread is the only - * one that puts bios into a cell, and we know there were - * no preceding bios. - */ - /* - * TODO: this will probably have to change when discard goes - * back in. - */ - dm_cell_release_singleton(cell, bio); - - if (lookup_result.shared) + if (lookup_result.shared) { process_shared_bio(tc, bio, block, &lookup_result); - else + cell_defer_no_holder(tc, cell); + } else { + inc_all_io_entry(tc->pool, bio); + cell_defer_no_holder(tc, cell); + remap_and_issue(tc, bio, lookup_result.block); + } break; case -ENODATA: if (bio_data_dir(bio) == READ && tc->origin_dev) { - dm_cell_release_singleton(cell, bio); + inc_all_io_entry(tc->pool, bio); + cell_defer_no_holder(tc, cell); + remap_to_origin_and_issue(tc, bio); } else provision_block(tc, bio, block, cell); break; default: - DMERR("dm_thin_find_block() failed, error = %d", r); - dm_cell_release_singleton(cell, bio); + DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d", + __func__, r); + cell_defer_no_holder(tc, cell); bio_io_error(bio); break; } @@ -1156,8 +1170,10 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio) case 0: if (lookup_result.shared && (rw == WRITE) && bio->bi_size) bio_io_error(bio); - else + else { + inc_all_io_entry(tc->pool, bio); remap_and_issue(tc, bio, lookup_result.block); + } break; case -ENODATA: @@ -1167,6 +1183,7 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio) } if (tc->origin_dev) { + inc_all_io_entry(tc->pool, bio); remap_to_origin_and_issue(tc, bio); break; } @@ -1176,7 +1193,8 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio) break; default: - DMERR("dm_thin_find_block() failed, error = %d", r); + DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d", + __func__, r); bio_io_error(bio); break; } @@ -1207,7 +1225,7 @@ static void process_deferred_bios(struct pool *pool) spin_unlock_irqrestore(&pool->lock, flags); while ((bio = bio_list_pop(&bios))) { - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); struct thin_c *tc = h->tc; /* @@ -1340,32 +1358,30 @@ static void thin_defer_bio(struct thin_c *tc, struct bio *bio) wake_worker(pool); } -static struct dm_thin_endio_hook *thin_hook_bio(struct thin_c *tc, struct bio *bio) +static void thin_hook_bio(struct thin_c *tc, struct bio *bio) { - struct pool *pool = tc->pool; - struct dm_thin_endio_hook *h = mempool_alloc(pool->endio_hook_pool, GFP_NOIO); + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); h->tc = tc; h->shared_read_entry = NULL; - h->all_io_entry = bio->bi_rw & REQ_DISCARD ? NULL : dm_deferred_entry_inc(pool->all_io_ds); + h->all_io_entry = NULL; h->overwrite_mapping = NULL; - - return h; } /* * Non-blocking function called from the thin target's map function. */ -static int thin_bio_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int thin_bio_map(struct dm_target *ti, struct bio *bio) { int r; struct thin_c *tc = ti->private; dm_block_t block = get_bio_block(tc, bio); struct dm_thin_device *td = tc->td; struct dm_thin_lookup_result result; + struct dm_bio_prison_cell *cell1, *cell2; + struct dm_cell_key key; - map_context->ptr = thin_hook_bio(tc, bio); + thin_hook_bio(tc, bio); if (get_pool_mode(tc->pool) == PM_FAIL) { bio_io_error(bio); @@ -1400,12 +1416,25 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, * shared flag will be set in their case. */ thin_defer_bio(tc, bio); - r = DM_MAPIO_SUBMITTED; - } else { - remap(tc, bio, result.block); - r = DM_MAPIO_REMAPPED; + return DM_MAPIO_SUBMITTED; } - break; + + build_virtual_key(tc->td, block, &key); + if (dm_bio_detain(tc->pool->prison, &key, bio, &cell1)) + return DM_MAPIO_SUBMITTED; + + build_data_key(tc->td, result.block, &key); + if (dm_bio_detain(tc->pool->prison, &key, bio, &cell2)) { + cell_defer_no_holder(tc, cell1); + return DM_MAPIO_SUBMITTED; + } + + inc_all_io_entry(tc->pool, bio); + cell_defer_no_holder(tc, cell2); + cell_defer_no_holder(tc, cell1); + + remap(tc, bio, result.block); + return DM_MAPIO_REMAPPED; case -ENODATA: if (get_pool_mode(tc->pool) == PM_READ_ONLY) { @@ -1414,8 +1443,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, * of doing so. Just error it. */ bio_io_error(bio); - r = DM_MAPIO_SUBMITTED; - break; + return DM_MAPIO_SUBMITTED; } /* fall through */ @@ -1425,8 +1453,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, * provide the hint to load the metadata into cache. */ thin_defer_bio(tc, bio); - r = DM_MAPIO_SUBMITTED; - break; + return DM_MAPIO_SUBMITTED; default: /* @@ -1435,11 +1462,8 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, * pool is switched to fail-io mode. */ bio_io_error(bio); - r = DM_MAPIO_SUBMITTED; - break; + return DM_MAPIO_SUBMITTED; } - - return r; } static int pool_is_congested(struct dm_target_callbacks *cb, int bdi_bits) @@ -1566,14 +1590,12 @@ static void __pool_destroy(struct pool *pool) if (pool->next_mapping) mempool_free(pool->next_mapping, pool->mapping_pool); mempool_destroy(pool->mapping_pool); - mempool_destroy(pool->endio_hook_pool); dm_deferred_set_destroy(pool->shared_read_ds); dm_deferred_set_destroy(pool->all_io_ds); kfree(pool); } static struct kmem_cache *_new_mapping_cache; -static struct kmem_cache *_endio_hook_cache; static struct pool *pool_create(struct mapped_device *pool_md, struct block_device *metadata_dev, @@ -1667,13 +1689,6 @@ static struct pool *pool_create(struct mapped_device *pool_md, goto bad_mapping_pool; } - pool->endio_hook_pool = mempool_create_slab_pool(ENDIO_HOOK_POOL_SIZE, - _endio_hook_cache); - if (!pool->endio_hook_pool) { - *error = "Error creating pool's endio_hook mempool"; - err_p = ERR_PTR(-ENOMEM); - goto bad_endio_hook_pool; - } pool->ref_count = 1; pool->last_commit_jiffies = jiffies; pool->pool_md = pool_md; @@ -1682,8 +1697,6 @@ static struct pool *pool_create(struct mapped_device *pool_md, return pool; -bad_endio_hook_pool: - mempool_destroy(pool->mapping_pool); bad_mapping_pool: dm_deferred_set_destroy(pool->all_io_ds); bad_all_io_ds: @@ -1966,8 +1979,7 @@ out_unlock: return r; } -static int pool_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int pool_map(struct dm_target *ti, struct bio *bio) { int r; struct pool_c *pt = ti->private; @@ -2358,7 +2370,9 @@ static int pool_status(struct dm_target *ti, status_type_t type, else DMEMIT("rw "); - if (pool->pf.discard_enabled && pool->pf.discard_passdown) + if (!pool->pf.discard_enabled) + DMEMIT("ignore_discard"); + else if (pool->pf.discard_passdown) DMEMIT("discard_passdown"); else DMEMIT("no_discard_passdown"); @@ -2454,7 +2468,7 @@ static struct target_type pool_target = { .name = "thin-pool", .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | DM_TARGET_IMMUTABLE, - .version = {1, 5, 0}, + .version = {1, 6, 0}, .module = THIS_MODULE, .ctr = pool_ctr, .dtr = pool_dtr, @@ -2576,6 +2590,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->num_flush_requests = 1; ti->flush_supported = true; + ti->per_bio_data_size = sizeof(struct dm_thin_endio_hook); /* In case the pool supports discards, pass them on. */ if (tc->pool->pf.discard_enabled) { @@ -2609,20 +2624,17 @@ out_unlock: return r; } -static int thin_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int thin_map(struct dm_target *ti, struct bio *bio) { bio->bi_sector = dm_target_offset(ti, bio->bi_sector); - return thin_bio_map(ti, bio, map_context); + return thin_bio_map(ti, bio); } -static int thin_endio(struct dm_target *ti, - struct bio *bio, int err, - union map_info *map_context) +static int thin_endio(struct dm_target *ti, struct bio *bio, int err) { unsigned long flags; - struct dm_thin_endio_hook *h = map_context->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); struct list_head work; struct dm_thin_new_mapping *m, *tmp; struct pool *pool = h->tc->pool; @@ -2643,14 +2655,15 @@ static int thin_endio(struct dm_target *ti, if (h->all_io_entry) { INIT_LIST_HEAD(&work); dm_deferred_entry_dec(h->all_io_entry, &work); - spin_lock_irqsave(&pool->lock, flags); - list_for_each_entry_safe(m, tmp, &work, list) - list_add(&m->list, &pool->prepared_discards); - spin_unlock_irqrestore(&pool->lock, flags); + if (!list_empty(&work)) { + spin_lock_irqsave(&pool->lock, flags); + list_for_each_entry_safe(m, tmp, &work, list) + list_add(&m->list, &pool->prepared_discards); + spin_unlock_irqrestore(&pool->lock, flags); + wake_worker(pool); + } } - mempool_free(h, pool->endio_hook_pool); - return 0; } @@ -2745,7 +2758,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type thin_target = { .name = "thin", - .version = {1, 5, 0}, + .version = {1, 6, 0}, .module = THIS_MODULE, .ctr = thin_ctr, .dtr = thin_dtr, @@ -2779,14 +2792,8 @@ static int __init dm_thin_init(void) if (!_new_mapping_cache) goto bad_new_mapping_cache; - _endio_hook_cache = KMEM_CACHE(dm_thin_endio_hook, 0); - if (!_endio_hook_cache) - goto bad_endio_hook_cache; - return 0; -bad_endio_hook_cache: - kmem_cache_destroy(_new_mapping_cache); bad_new_mapping_cache: dm_unregister_target(&pool_target); bad_pool_target: @@ -2801,7 +2808,6 @@ static void dm_thin_exit(void) dm_unregister_target(&pool_target); kmem_cache_destroy(_new_mapping_cache); - kmem_cache_destroy(_endio_hook_cache); } module_init(dm_thin_init); diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index 9e7328b..52cde982 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -55,7 +55,6 @@ struct dm_verity { unsigned shash_descsize;/* the size of temporary space for crypto */ int hash_failed; /* set to 1 if hash of any block failed */ - mempool_t *io_mempool; /* mempool of struct dm_verity_io */ mempool_t *vec_mempool; /* mempool of bio vector */ struct workqueue_struct *verify_wq; @@ -66,7 +65,6 @@ struct dm_verity { struct dm_verity_io { struct dm_verity *v; - struct bio *bio; /* original values of bio->bi_end_io and bio->bi_private */ bio_end_io_t *orig_bi_end_io; @@ -389,8 +387,8 @@ test_block_hash: */ static void verity_finish_io(struct dm_verity_io *io, int error) { - struct bio *bio = io->bio; struct dm_verity *v = io->v; + struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_bio_data_size); bio->bi_end_io = io->orig_bi_end_io; bio->bi_private = io->orig_bi_private; @@ -398,8 +396,6 @@ static void verity_finish_io(struct dm_verity_io *io, int error) if (io->io_vec != io->io_vec_inline) mempool_free(io->io_vec, v->vec_mempool); - mempool_free(io, v->io_mempool); - bio_endio(bio, error); } @@ -462,8 +458,7 @@ no_prefetch_cluster: * Bio map function. It allocates dm_verity_io structure and bio vector and * fills them. Then it issues prefetches and the I/O. */ -static int verity_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int verity_map(struct dm_target *ti, struct bio *bio) { struct dm_verity *v = ti->private; struct dm_verity_io *io; @@ -486,9 +481,8 @@ static int verity_map(struct dm_target *ti, struct bio *bio, if (bio_data_dir(bio) == WRITE) return -EIO; - io = mempool_alloc(v->io_mempool, GFP_NOIO); + io = dm_per_bio_data(bio, ti->per_bio_data_size); io->v = v; - io->bio = bio; io->orig_bi_end_io = bio->bi_end_io; io->orig_bi_private = bio->bi_private; io->block = bio->bi_sector >> (v->data_dev_block_bits - SECTOR_SHIFT); @@ -610,9 +604,6 @@ static void verity_dtr(struct dm_target *ti) if (v->vec_mempool) mempool_destroy(v->vec_mempool); - if (v->io_mempool) - mempool_destroy(v->io_mempool); - if (v->bufio) dm_bufio_client_destroy(v->bufio); @@ -841,13 +832,7 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } - v->io_mempool = mempool_create_kmalloc_pool(DM_VERITY_MEMPOOL_SIZE, - sizeof(struct dm_verity_io) + v->shash_descsize + v->digest_size * 2); - if (!v->io_mempool) { - ti->error = "Cannot allocate io mempool"; - r = -ENOMEM; - goto bad; - } + ti->per_bio_data_size = roundup(sizeof(struct dm_verity_io) + v->shash_descsize + v->digest_size * 2, __alignof__(struct dm_verity_io)); v->vec_mempool = mempool_create_kmalloc_pool(DM_VERITY_MEMPOOL_SIZE, BIO_MAX_PAGES * sizeof(struct bio_vec)); @@ -875,7 +860,7 @@ bad: static struct target_type verity_target = { .name = "verity", - .version = {1, 0, 0}, + .version = {1, 1, 0}, .module = THIS_MODULE, .ctr = verity_ctr, .dtr = verity_dtr, diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c index cc2b3cb..69a5c3b 100644 --- a/drivers/md/dm-zero.c +++ b/drivers/md/dm-zero.c @@ -33,8 +33,7 @@ static int zero_ctr(struct dm_target *ti, unsigned int argc, char **argv) /* * Return zeros only on reads */ -static int zero_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int zero_map(struct dm_target *ti, struct bio *bio) { switch(bio_rw(bio)) { case READ: @@ -56,7 +55,7 @@ static int zero_map(struct dm_target *ti, struct bio *bio, static struct target_type zero_target = { .name = "zero", - .version = {1, 0, 0}, + .version = {1, 1, 0}, .module = THIS_MODULE, .ctr = zero_ctr, .map = zero_map, diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 77e6eff..c72e4d5 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -63,18 +63,6 @@ struct dm_io { }; /* - * For bio-based dm. - * One of these is allocated per target within a bio. Hopefully - * this will be simplified out one day. - */ -struct dm_target_io { - struct dm_io *io; - struct dm_target *ti; - union map_info info; - struct bio clone; -}; - -/* * For request-based dm. * One of these is allocated per request. */ @@ -657,7 +645,7 @@ static void clone_endio(struct bio *bio, int error) error = -EIO; if (endio) { - r = endio(tio->ti, bio, error, &tio->info); + r = endio(tio->ti, bio, error); if (r < 0 || r == DM_ENDIO_REQUEUE) /* * error and requeue request are handled @@ -1016,7 +1004,7 @@ static void __map_bio(struct dm_target *ti, struct dm_target_io *tio) */ atomic_inc(&tio->io->io_count); sector = clone->bi_sector; - r = ti->type->map(ti, clone, &tio->info); + r = ti->type->map(ti, clone); if (r == DM_MAPIO_REMAPPED) { /* the bio has been remapped so dispatch it */ @@ -1111,6 +1099,7 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci, tio->io = ci->io; tio->ti = ti; memset(&tio->info, 0, sizeof(tio->info)); + tio->target_request_nr = 0; return tio; } @@ -1121,7 +1110,7 @@ static void __issue_target_request(struct clone_info *ci, struct dm_target *ti, struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs); struct bio *clone = &tio->clone; - tio->info.target_request_nr = request_nr; + tio->target_request_nr = request_nr; /* * Discard requests require the bio's inline iovecs be initialized. @@ -1174,7 +1163,28 @@ static void __clone_and_map_simple(struct clone_info *ci, struct dm_target *ti) ci->sector_count = 0; } -static int __clone_and_map_discard(struct clone_info *ci) +typedef unsigned (*get_num_requests_fn)(struct dm_target *ti); + +static unsigned get_num_discard_requests(struct dm_target *ti) +{ + return ti->num_discard_requests; +} + +static unsigned get_num_write_same_requests(struct dm_target *ti) +{ + return ti->num_write_same_requests; +} + +typedef bool (*is_split_required_fn)(struct dm_target *ti); + +static bool is_split_required_for_discard(struct dm_target *ti) +{ + return ti->split_discard_requests; +} + +static int __clone_and_map_changing_extent_only(struct clone_info *ci, + get_num_requests_fn get_num_requests, + is_split_required_fn is_split_required) { struct dm_target *ti; sector_t len; @@ -1185,15 +1195,15 @@ static int __clone_and_map_discard(struct clone_info *ci) return -EIO; /* - * Even though the device advertised discard support, - * that does not mean every target supports it, and + * Even though the device advertised support for this type of + * request, that does not mean every target supports it, and * reconfiguration might also have changed that since the * check was performed. */ - if (!ti->num_discard_requests) + if (!get_num_requests || !get_num_requests(ti)) return -EOPNOTSUPP; - if (!ti->split_discard_requests) + if (is_split_required && !is_split_required(ti)) len = min(ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); else len = min(ci->sector_count, max_io_len(ci->sector, ti)); @@ -1206,6 +1216,17 @@ static int __clone_and_map_discard(struct clone_info *ci) return 0; } +static int __clone_and_map_discard(struct clone_info *ci) +{ + return __clone_and_map_changing_extent_only(ci, get_num_discard_requests, + is_split_required_for_discard); +} + +static int __clone_and_map_write_same(struct clone_info *ci) +{ + return __clone_and_map_changing_extent_only(ci, get_num_write_same_requests, NULL); +} + static int __clone_and_map(struct clone_info *ci) { struct bio *bio = ci->bio; @@ -1215,6 +1236,8 @@ static int __clone_and_map(struct clone_info *ci) if (unlikely(bio->bi_rw & REQ_DISCARD)) return __clone_and_map_discard(ci); + else if (unlikely(bio->bi_rw & REQ_WRITE_SAME)) + return __clone_and_map_write_same(ci); ti = dm_table_find_target(ci->map, ci->sector); if (!dm_target_is_valid(ti)) @@ -1946,13 +1969,20 @@ static void free_dev(struct mapped_device *md) static void __bind_mempools(struct mapped_device *md, struct dm_table *t) { - struct dm_md_mempools *p; + struct dm_md_mempools *p = dm_table_get_md_mempools(t); - if (md->io_pool && (md->tio_pool || dm_table_get_type(t) == DM_TYPE_BIO_BASED) && md->bs) - /* the md already has necessary mempools */ + if (md->io_pool && (md->tio_pool || dm_table_get_type(t) == DM_TYPE_BIO_BASED) && md->bs) { + /* + * The md already has necessary mempools. Reload just the + * bioset because front_pad may have changed because + * a different table was loaded. + */ + bioset_free(md->bs); + md->bs = p->bs; + p->bs = NULL; goto out; + } - p = dm_table_get_md_mempools(t); BUG_ON(!p || md->io_pool || md->tio_pool || md->bs); md->io_pool = p->io_pool; @@ -2711,7 +2741,7 @@ int dm_noflush_suspending(struct dm_target *ti) } EXPORT_SYMBOL_GPL(dm_noflush_suspending); -struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity) +struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, unsigned per_bio_data_size) { struct dm_md_mempools *pools = kmalloc(sizeof(*pools), GFP_KERNEL); unsigned int pool_size = (type == DM_TYPE_BIO_BASED) ? 16 : MIN_IOS; @@ -2719,6 +2749,8 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity) if (!pools) return NULL; + per_bio_data_size = roundup(per_bio_data_size, __alignof__(struct dm_target_io)); + pools->io_pool = (type == DM_TYPE_BIO_BASED) ? mempool_create_slab_pool(MIN_IOS, _io_cache) : mempool_create_slab_pool(MIN_IOS, _rq_bio_info_cache); @@ -2734,7 +2766,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity) pools->bs = (type == DM_TYPE_BIO_BASED) ? bioset_create(pool_size, - offsetof(struct dm_target_io, clone)) : + per_bio_data_size + offsetof(struct dm_target_io, clone)) : bioset_create(pool_size, offsetof(struct dm_rq_clone_bio_info, clone)); if (!pools->bs) diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 6a99fef..45b97da 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -159,7 +159,7 @@ void dm_kcopyd_exit(void); /* * Mempool operations */ -struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity); +struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, unsigned per_bio_data_size); void dm_free_md_mempools(struct dm_md_mempools *pools); #endif diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index a3ae091..28c3ed0 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -428,15 +428,17 @@ static int dm_bm_validate_buffer(struct dm_block_manager *bm, if (!v) return 0; r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(bm->bufio)); - if (unlikely(r)) + if (unlikely(r)) { + DMERR_LIMIT("%s validator check failed for block %llu", v->name, + (unsigned long long) dm_bufio_get_block_number(buf)); return r; + } aux->validator = v; } else { if (unlikely(aux->validator != v)) { - DMERR("validator mismatch (old=%s vs new=%s) for block %llu", - aux->validator->name, v ? v->name : "NULL", - (unsigned long long) - dm_bufio_get_block_number(buf)); + DMERR_LIMIT("validator mismatch (old=%s vs new=%s) for block %llu", + aux->validator->name, v ? v->name : "NULL", + (unsigned long long) dm_bufio_get_block_number(buf)); return -EINVAL; } } diff --git a/drivers/md/persistent-data/dm-btree-internal.h b/drivers/md/persistent-data/dm-btree-internal.h index 5709bfe..accbb05 100644 --- a/drivers/md/persistent-data/dm-btree-internal.h +++ b/drivers/md/persistent-data/dm-btree-internal.h @@ -36,13 +36,13 @@ struct node_header { __le32 padding; } __packed; -struct node { +struct btree_node { struct node_header header; __le64 keys[0]; } __packed; -void inc_children(struct dm_transaction_manager *tm, struct node *n, +void inc_children(struct dm_transaction_manager *tm, struct btree_node *n, struct dm_btree_value_type *vt); int new_block(struct dm_btree_info *info, struct dm_block **result); @@ -64,7 +64,7 @@ struct ro_spine { void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info); int exit_ro_spine(struct ro_spine *s); int ro_step(struct ro_spine *s, dm_block_t new_child); -struct node *ro_node(struct ro_spine *s); +struct btree_node *ro_node(struct ro_spine *s); struct shadow_spine { struct dm_btree_info *info; @@ -98,17 +98,17 @@ int shadow_root(struct shadow_spine *s); /* * Some inlines. */ -static inline __le64 *key_ptr(struct node *n, uint32_t index) +static inline __le64 *key_ptr(struct btree_node *n, uint32_t index) { return n->keys + index; } -static inline void *value_base(struct node *n) +static inline void *value_base(struct btree_node *n) { return &n->keys[le32_to_cpu(n->header.max_entries)]; } -static inline void *value_ptr(struct node *n, uint32_t index) +static inline void *value_ptr(struct btree_node *n, uint32_t index) { uint32_t value_size = le32_to_cpu(n->header.value_size); return value_base(n) + (value_size * index); @@ -117,7 +117,7 @@ static inline void *value_ptr(struct node *n, uint32_t index) /* * Assumes the values are suitably-aligned and converts to core format. */ -static inline uint64_t value64(struct node *n, uint32_t index) +static inline uint64_t value64(struct btree_node *n, uint32_t index) { __le64 *values_le = value_base(n); @@ -127,7 +127,7 @@ static inline uint64_t value64(struct node *n, uint32_t index) /* * Searching for a key within a single node. */ -int lower_bound(struct node *n, uint64_t key); +int lower_bound(struct btree_node *n, uint64_t key); extern struct dm_block_validator btree_node_validator; diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c index aa71e23..c4f2813 100644 --- a/drivers/md/persistent-data/dm-btree-remove.c +++ b/drivers/md/persistent-data/dm-btree-remove.c @@ -53,7 +53,7 @@ /* * Some little utilities for moving node data around. */ -static void node_shift(struct node *n, int shift) +static void node_shift(struct btree_node *n, int shift) { uint32_t nr_entries = le32_to_cpu(n->header.nr_entries); uint32_t value_size = le32_to_cpu(n->header.value_size); @@ -79,7 +79,7 @@ static void node_shift(struct node *n, int shift) } } -static void node_copy(struct node *left, struct node *right, int shift) +static void node_copy(struct btree_node *left, struct btree_node *right, int shift) { uint32_t nr_left = le32_to_cpu(left->header.nr_entries); uint32_t value_size = le32_to_cpu(left->header.value_size); @@ -108,7 +108,7 @@ static void node_copy(struct node *left, struct node *right, int shift) /* * Delete a specific entry from a leaf node. */ -static void delete_at(struct node *n, unsigned index) +static void delete_at(struct btree_node *n, unsigned index) { unsigned nr_entries = le32_to_cpu(n->header.nr_entries); unsigned nr_to_copy = nr_entries - (index + 1); @@ -128,7 +128,7 @@ static void delete_at(struct node *n, unsigned index) n->header.nr_entries = cpu_to_le32(nr_entries - 1); } -static unsigned merge_threshold(struct node *n) +static unsigned merge_threshold(struct btree_node *n) { return le32_to_cpu(n->header.max_entries) / 3; } @@ -136,7 +136,7 @@ static unsigned merge_threshold(struct node *n) struct child { unsigned index; struct dm_block *block; - struct node *n; + struct btree_node *n; }; static struct dm_btree_value_type le64_type = { @@ -147,7 +147,7 @@ static struct dm_btree_value_type le64_type = { .equal = NULL }; -static int init_child(struct dm_btree_info *info, struct node *parent, +static int init_child(struct dm_btree_info *info, struct btree_node *parent, unsigned index, struct child *result) { int r, inc; @@ -177,7 +177,7 @@ static int exit_child(struct dm_btree_info *info, struct child *c) return dm_tm_unlock(info->tm, c->block); } -static void shift(struct node *left, struct node *right, int count) +static void shift(struct btree_node *left, struct btree_node *right, int count) { uint32_t nr_left = le32_to_cpu(left->header.nr_entries); uint32_t nr_right = le32_to_cpu(right->header.nr_entries); @@ -203,11 +203,11 @@ static void shift(struct node *left, struct node *right, int count) right->header.nr_entries = cpu_to_le32(nr_right + count); } -static void __rebalance2(struct dm_btree_info *info, struct node *parent, +static void __rebalance2(struct dm_btree_info *info, struct btree_node *parent, struct child *l, struct child *r) { - struct node *left = l->n; - struct node *right = r->n; + struct btree_node *left = l->n; + struct btree_node *right = r->n; uint32_t nr_left = le32_to_cpu(left->header.nr_entries); uint32_t nr_right = le32_to_cpu(right->header.nr_entries); unsigned threshold = 2 * merge_threshold(left) + 1; @@ -239,7 +239,7 @@ static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info, unsigned left_index) { int r; - struct node *parent; + struct btree_node *parent; struct child left, right; parent = dm_block_data(shadow_current(s)); @@ -270,9 +270,9 @@ static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info, * in right, then rebalance2. This wastes some cpu, but I want something * simple atm. */ -static void delete_center_node(struct dm_btree_info *info, struct node *parent, +static void delete_center_node(struct dm_btree_info *info, struct btree_node *parent, struct child *l, struct child *c, struct child *r, - struct node *left, struct node *center, struct node *right, + struct btree_node *left, struct btree_node *center, struct btree_node *right, uint32_t nr_left, uint32_t nr_center, uint32_t nr_right) { uint32_t max_entries = le32_to_cpu(left->header.max_entries); @@ -301,9 +301,9 @@ static void delete_center_node(struct dm_btree_info *info, struct node *parent, /* * Redistributes entries among 3 sibling nodes. */ -static void redistribute3(struct dm_btree_info *info, struct node *parent, +static void redistribute3(struct dm_btree_info *info, struct btree_node *parent, struct child *l, struct child *c, struct child *r, - struct node *left, struct node *center, struct node *right, + struct btree_node *left, struct btree_node *center, struct btree_node *right, uint32_t nr_left, uint32_t nr_center, uint32_t nr_right) { int s; @@ -343,12 +343,12 @@ static void redistribute3(struct dm_btree_info *info, struct node *parent, *key_ptr(parent, r->index) = right->keys[0]; } -static void __rebalance3(struct dm_btree_info *info, struct node *parent, +static void __rebalance3(struct dm_btree_info *info, struct btree_node *parent, struct child *l, struct child *c, struct child *r) { - struct node *left = l->n; - struct node *center = c->n; - struct node *right = r->n; + struct btree_node *left = l->n; + struct btree_node *center = c->n; + struct btree_node *right = r->n; uint32_t nr_left = le32_to_cpu(left->header.nr_entries); uint32_t nr_center = le32_to_cpu(center->header.nr_entries); @@ -371,7 +371,7 @@ static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info, unsigned left_index) { int r; - struct node *parent = dm_block_data(shadow_current(s)); + struct btree_node *parent = dm_block_data(shadow_current(s)); struct child left, center, right; /* @@ -421,7 +421,7 @@ static int get_nr_entries(struct dm_transaction_manager *tm, { int r; struct dm_block *block; - struct node *n; + struct btree_node *n; r = dm_tm_read_lock(tm, b, &btree_node_validator, &block); if (r) @@ -438,7 +438,7 @@ static int rebalance_children(struct shadow_spine *s, { int i, r, has_left_sibling, has_right_sibling; uint32_t child_entries; - struct node *n; + struct btree_node *n; n = dm_block_data(shadow_current(s)); @@ -483,7 +483,7 @@ static int rebalance_children(struct shadow_spine *s, return r; } -static int do_leaf(struct node *n, uint64_t key, unsigned *index) +static int do_leaf(struct btree_node *n, uint64_t key, unsigned *index) { int i = lower_bound(n, key); @@ -506,7 +506,7 @@ static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info, uint64_t key, unsigned *index) { int i = *index, r; - struct node *n; + struct btree_node *n; for (;;) { r = shadow_step(s, root, vt); @@ -556,7 +556,7 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, unsigned level, last_level = info->levels - 1; int index = 0, r = 0; struct shadow_spine spine; - struct node *n; + struct btree_node *n; init_shadow_spine(&spine, info); for (level = 0; level < info->levels; level++) { diff --git a/drivers/md/persistent-data/dm-btree-spine.c b/drivers/md/persistent-data/dm-btree-spine.c index d9a7912..f199a0c 100644 --- a/drivers/md/persistent-data/dm-btree-spine.c +++ b/drivers/md/persistent-data/dm-btree-spine.c @@ -23,7 +23,7 @@ static void node_prepare_for_write(struct dm_block_validator *v, struct dm_block *b, size_t block_size) { - struct node *n = dm_block_data(b); + struct btree_node *n = dm_block_data(b); struct node_header *h = &n->header; h->blocknr = cpu_to_le64(dm_block_location(b)); @@ -38,15 +38,15 @@ static int node_check(struct dm_block_validator *v, struct dm_block *b, size_t block_size) { - struct node *n = dm_block_data(b); + struct btree_node *n = dm_block_data(b); struct node_header *h = &n->header; size_t value_size; __le32 csum_disk; uint32_t flags; if (dm_block_location(b) != le64_to_cpu(h->blocknr)) { - DMERR("node_check failed blocknr %llu wanted %llu", - le64_to_cpu(h->blocknr), dm_block_location(b)); + DMERR_LIMIT("node_check failed: blocknr %llu != wanted %llu", + le64_to_cpu(h->blocknr), dm_block_location(b)); return -ENOTBLK; } @@ -54,8 +54,8 @@ static int node_check(struct dm_block_validator *v, block_size - sizeof(__le32), BTREE_CSUM_XOR)); if (csum_disk != h->csum) { - DMERR("node_check failed csum %u wanted %u", - le32_to_cpu(csum_disk), le32_to_cpu(h->csum)); + DMERR_LIMIT("node_check failed: csum %u != wanted %u", + le32_to_cpu(csum_disk), le32_to_cpu(h->csum)); return -EILSEQ; } @@ -63,12 +63,12 @@ static int node_check(struct dm_block_validator *v, if (sizeof(struct node_header) + (sizeof(__le64) + value_size) * le32_to_cpu(h->max_entries) > block_size) { - DMERR("node_check failed: max_entries too large"); + DMERR_LIMIT("node_check failed: max_entries too large"); return -EILSEQ; } if (le32_to_cpu(h->nr_entries) > le32_to_cpu(h->max_entries)) { - DMERR("node_check failed, too many entries"); + DMERR_LIMIT("node_check failed: too many entries"); return -EILSEQ; } @@ -77,7 +77,7 @@ static int node_check(struct dm_block_validator *v, */ flags = le32_to_cpu(h->flags); if (!(flags & INTERNAL_NODE) && !(flags & LEAF_NODE)) { - DMERR("node_check failed, node is neither INTERNAL or LEAF"); + DMERR_LIMIT("node_check failed: node is neither INTERNAL or LEAF"); return -EILSEQ; } @@ -164,7 +164,7 @@ int ro_step(struct ro_spine *s, dm_block_t new_child) return r; } -struct node *ro_node(struct ro_spine *s) +struct btree_node *ro_node(struct ro_spine *s) { struct dm_block *block; diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index d12b2cc..4caf669 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -38,7 +38,7 @@ static void array_insert(void *base, size_t elt_size, unsigned nr_elts, /*----------------------------------------------------------------*/ /* makes the assumption that no two keys are the same. */ -static int bsearch(struct node *n, uint64_t key, int want_hi) +static int bsearch(struct btree_node *n, uint64_t key, int want_hi) { int lo = -1, hi = le32_to_cpu(n->header.nr_entries); @@ -58,12 +58,12 @@ static int bsearch(struct node *n, uint64_t key, int want_hi) return want_hi ? hi : lo; } -int lower_bound(struct node *n, uint64_t key) +int lower_bound(struct btree_node *n, uint64_t key) { return bsearch(n, key, 0); } -void inc_children(struct dm_transaction_manager *tm, struct node *n, +void inc_children(struct dm_transaction_manager *tm, struct btree_node *n, struct dm_btree_value_type *vt) { unsigned i; @@ -77,7 +77,7 @@ void inc_children(struct dm_transaction_manager *tm, struct node *n, vt->inc(vt->context, value_ptr(n, i)); } -static int insert_at(size_t value_size, struct node *node, unsigned index, +static int insert_at(size_t value_size, struct btree_node *node, unsigned index, uint64_t key, void *value) __dm_written_to_disk(value) { @@ -122,7 +122,7 @@ int dm_btree_empty(struct dm_btree_info *info, dm_block_t *root) { int r; struct dm_block *b; - struct node *n; + struct btree_node *n; size_t block_size; uint32_t max_entries; @@ -154,7 +154,7 @@ EXPORT_SYMBOL_GPL(dm_btree_empty); #define MAX_SPINE_DEPTH 64 struct frame { struct dm_block *b; - struct node *n; + struct btree_node *n; unsigned level; unsigned nr_children; unsigned current_child; @@ -230,6 +230,11 @@ static void pop_frame(struct del_stack *s) dm_tm_unlock(s->tm, f->b); } +static bool is_internal_level(struct dm_btree_info *info, struct frame *f) +{ + return f->level < (info->levels - 1); +} + int dm_btree_del(struct dm_btree_info *info, dm_block_t root) { int r; @@ -241,7 +246,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root) s->tm = info->tm; s->top = -1; - r = push_frame(s, root, 1); + r = push_frame(s, root, 0); if (r) goto out; @@ -267,7 +272,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root) if (r) goto out; - } else if (f->level != (info->levels - 1)) { + } else if (is_internal_level(info, f)) { b = value64(f->n, f->current_child); f->current_child++; r = push_frame(s, b, f->level + 1); @@ -295,7 +300,7 @@ EXPORT_SYMBOL_GPL(dm_btree_del); /*----------------------------------------------------------------*/ static int btree_lookup_raw(struct ro_spine *s, dm_block_t block, uint64_t key, - int (*search_fn)(struct node *, uint64_t), + int (*search_fn)(struct btree_node *, uint64_t), uint64_t *result_key, void *v, size_t value_size) { int i, r; @@ -406,7 +411,7 @@ static int btree_split_sibling(struct shadow_spine *s, dm_block_t root, size_t size; unsigned nr_left, nr_right; struct dm_block *left, *right, *parent; - struct node *ln, *rn, *pn; + struct btree_node *ln, *rn, *pn; __le64 location; left = shadow_current(s); @@ -491,7 +496,7 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key) size_t size; unsigned nr_left, nr_right; struct dm_block *left, *right, *new_parent; - struct node *pn, *ln, *rn; + struct btree_node *pn, *ln, *rn; __le64 val; new_parent = shadow_current(s); @@ -576,7 +581,7 @@ static int btree_insert_raw(struct shadow_spine *s, dm_block_t root, uint64_t key, unsigned *index) { int r, i = *index, top = 1; - struct node *node; + struct btree_node *node; for (;;) { r = shadow_step(s, root, vt); @@ -643,7 +648,7 @@ static int insert(struct dm_btree_info *info, dm_block_t root, unsigned level, index = -1, last_level = info->levels - 1; dm_block_t block = root; struct shadow_spine spine; - struct node *n; + struct btree_node *n; struct dm_btree_value_type le64_type; le64_type.context = NULL; diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index f3a9af8..3e7a88d 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -39,8 +39,8 @@ static int index_check(struct dm_block_validator *v, __le32 csum_disk; if (dm_block_location(b) != le64_to_cpu(mi_le->blocknr)) { - DMERR("index_check failed blocknr %llu wanted %llu", - le64_to_cpu(mi_le->blocknr), dm_block_location(b)); + DMERR_LIMIT("index_check failed: blocknr %llu != wanted %llu", + le64_to_cpu(mi_le->blocknr), dm_block_location(b)); return -ENOTBLK; } @@ -48,8 +48,8 @@ static int index_check(struct dm_block_validator *v, block_size - sizeof(__le32), INDEX_CSUM_XOR)); if (csum_disk != mi_le->csum) { - DMERR("index_check failed csum %u wanted %u", - le32_to_cpu(csum_disk), le32_to_cpu(mi_le->csum)); + DMERR_LIMIT("index_check failed: csum %u != wanted %u", + le32_to_cpu(csum_disk), le32_to_cpu(mi_le->csum)); return -EILSEQ; } @@ -89,8 +89,8 @@ static int bitmap_check(struct dm_block_validator *v, __le32 csum_disk; if (dm_block_location(b) != le64_to_cpu(disk_header->blocknr)) { - DMERR("bitmap check failed blocknr %llu wanted %llu", - le64_to_cpu(disk_header->blocknr), dm_block_location(b)); + DMERR_LIMIT("bitmap check failed: blocknr %llu != wanted %llu", + le64_to_cpu(disk_header->blocknr), dm_block_location(b)); return -ENOTBLK; } @@ -98,8 +98,8 @@ static int bitmap_check(struct dm_block_validator *v, block_size - sizeof(__le32), BITMAP_CSUM_XOR)); if (csum_disk != disk_header->csum) { - DMERR("bitmap check failed csum %u wanted %u", - le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum)); + DMERR_LIMIT("bitmap check failed: csum %u != wanted %u", + le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum)); return -EILSEQ; } diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index e89ae5e..906cf3d 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -337,7 +337,7 @@ static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b) { int r = sm_metadata_new_block_(sm, b); if (r) - DMERR("out of metadata space"); + DMERR("unable to allocate new metadata block"); return r; } |