diff options
Diffstat (limited to 'drivers/md/dm-cache-target.c')
-rw-r--r-- | drivers/md/dm-cache-target.c | 378 |
1 files changed, 301 insertions, 77 deletions
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 7130505..1e96d78 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -95,7 +95,6 @@ static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio) /*----------------------------------------------------------------*/ -#define PRISON_CELLS 1024 #define MIGRATION_POOL_SIZE 128 #define COMMIT_PERIOD HZ #define MIGRATION_COUNT_WINDOW 10 @@ -237,8 +236,9 @@ struct cache { /* * origin_blocks entries, discarded if set. */ - dm_oblock_t discard_nr_blocks; + dm_dblock_t discard_nr_blocks; unsigned long *discard_bitset; + uint32_t discard_block_size; /* a power of 2 times sectors per block */ /* * Rather than reconstructing the table line for the status we just @@ -310,6 +310,7 @@ struct dm_cache_migration { dm_cblock_t cblock; bool err:1; + bool discard:1; bool writeback:1; bool demote:1; bool promote:1; @@ -433,11 +434,12 @@ static void prealloc_put_cell(struct prealloc *p, struct dm_bio_prison_cell *cel /*----------------------------------------------------------------*/ -static void build_key(dm_oblock_t oblock, struct dm_cell_key *key) +static void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key *key) { key->virtual = 0; key->dev = 0; - key->block = from_oblock(oblock); + key->block_begin = from_oblock(begin); + key->block_end = from_oblock(end); } /* @@ -447,15 +449,15 @@ static void build_key(dm_oblock_t oblock, struct dm_cell_key *key) */ typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell); -static int bio_detain(struct cache *cache, dm_oblock_t oblock, - struct bio *bio, struct dm_bio_prison_cell *cell_prealloc, - cell_free_fn free_fn, void *free_context, - struct dm_bio_prison_cell **cell_result) +static int bio_detain_range(struct cache *cache, dm_oblock_t oblock_begin, dm_oblock_t oblock_end, + struct bio *bio, struct dm_bio_prison_cell *cell_prealloc, + cell_free_fn free_fn, void *free_context, + struct dm_bio_prison_cell **cell_result) { int r; struct dm_cell_key key; - build_key(oblock, &key); + build_key(oblock_begin, oblock_end, &key); r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result); if (r) free_fn(free_context, cell_prealloc); @@ -463,6 +465,16 @@ static int bio_detain(struct cache *cache, dm_oblock_t oblock, return r; } +static int bio_detain(struct cache *cache, dm_oblock_t oblock, + struct bio *bio, struct dm_bio_prison_cell *cell_prealloc, + cell_free_fn free_fn, void *free_context, + struct dm_bio_prison_cell **cell_result) +{ + dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL); + return bio_detain_range(cache, oblock, end, bio, + cell_prealloc, free_fn, free_context, cell_result); +} + static int get_cell(struct cache *cache, dm_oblock_t oblock, struct prealloc *structs, @@ -474,7 +486,7 @@ static int get_cell(struct cache *cache, cell_prealloc = prealloc_get_cell(structs); - build_key(oblock, &key); + build_key(oblock, to_oblock(from_oblock(oblock) + 1ULL), &key); r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result); if (r) prealloc_put_cell(structs, cell_prealloc); @@ -524,33 +536,57 @@ static dm_block_t block_div(dm_block_t b, uint32_t n) return b; } -static void set_discard(struct cache *cache, dm_oblock_t b) +static dm_block_t oblocks_per_dblock(struct cache *cache) +{ + dm_block_t oblocks = cache->discard_block_size; + + if (block_size_is_power_of_two(cache)) + oblocks >>= cache->sectors_per_block_shift; + else + oblocks = block_div(oblocks, cache->sectors_per_block); + + return oblocks; +} + +static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock) +{ + return to_dblock(block_div(from_oblock(oblock), + oblocks_per_dblock(cache))); +} + +static dm_oblock_t dblock_to_oblock(struct cache *cache, dm_dblock_t dblock) +{ + return to_oblock(from_dblock(dblock) * oblocks_per_dblock(cache)); +} + +static void set_discard(struct cache *cache, dm_dblock_t b) { unsigned long flags; + BUG_ON(from_dblock(b) >= from_dblock(cache->discard_nr_blocks)); atomic_inc(&cache->stats.discard_count); spin_lock_irqsave(&cache->lock, flags); - set_bit(from_oblock(b), cache->discard_bitset); + set_bit(from_dblock(b), cache->discard_bitset); spin_unlock_irqrestore(&cache->lock, flags); } -static void clear_discard(struct cache *cache, dm_oblock_t b) +static void clear_discard(struct cache *cache, dm_dblock_t b) { unsigned long flags; spin_lock_irqsave(&cache->lock, flags); - clear_bit(from_oblock(b), cache->discard_bitset); + clear_bit(from_dblock(b), cache->discard_bitset); spin_unlock_irqrestore(&cache->lock, flags); } -static bool is_discarded(struct cache *cache, dm_oblock_t b) +static bool is_discarded(struct cache *cache, dm_dblock_t b) { int r; unsigned long flags; spin_lock_irqsave(&cache->lock, flags); - r = test_bit(from_oblock(b), cache->discard_bitset); + r = test_bit(from_dblock(b), cache->discard_bitset); spin_unlock_irqrestore(&cache->lock, flags); return r; @@ -562,7 +598,8 @@ static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b) unsigned long flags; spin_lock_irqsave(&cache->lock, flags); - r = test_bit(from_oblock(b), cache->discard_bitset); + r = test_bit(from_dblock(oblock_to_dblock(cache, b)), + cache->discard_bitset); spin_unlock_irqrestore(&cache->lock, flags); return r; @@ -687,7 +724,7 @@ static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio, check_if_tick_bio_needed(cache, bio); remap_to_origin(cache, bio); if (bio_data_dir(bio) == WRITE) - clear_discard(cache, oblock); + clear_discard(cache, oblock_to_dblock(cache, oblock)); } static void remap_to_cache_dirty(struct cache *cache, struct bio *bio, @@ -697,7 +734,7 @@ static void remap_to_cache_dirty(struct cache *cache, struct bio *bio, remap_to_cache(cache, bio, cblock); if (bio_data_dir(bio) == WRITE) { set_dirty(cache, oblock, cblock); - clear_discard(cache, oblock); + clear_discard(cache, oblock_to_dblock(cache, oblock)); } } @@ -951,10 +988,14 @@ static void migration_success_post_commit(struct dm_cache_migration *mg) } } else { - clear_dirty(cache, mg->new_oblock, mg->cblock); - if (mg->requeue_holder) + if (mg->requeue_holder) { + clear_dirty(cache, mg->new_oblock, mg->cblock); cell_defer(cache, mg->new_ocell, true); - else { + } else { + /* + * The block was promoted via an overwrite, so it's dirty. + */ + set_dirty(cache, mg->new_oblock, mg->cblock); bio_endio(mg->new_ocell->holder, 0); cell_defer(cache, mg->new_ocell, false); } @@ -978,7 +1019,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context) wake_worker(cache); } -static void issue_copy_real(struct dm_cache_migration *mg) +static void issue_copy(struct dm_cache_migration *mg) { int r; struct dm_io_region o_region, c_region; @@ -1057,11 +1098,46 @@ static void avoid_copy(struct dm_cache_migration *mg) migration_success_pre_commit(mg); } -static void issue_copy(struct dm_cache_migration *mg) +static void calc_discard_block_range(struct cache *cache, struct bio *bio, + dm_dblock_t *b, dm_dblock_t *e) +{ + sector_t sb = bio->bi_iter.bi_sector; + sector_t se = bio_end_sector(bio); + + *b = to_dblock(dm_sector_div_up(sb, cache->discard_block_size)); + + if (se - sb < cache->discard_block_size) + *e = *b; + else + *e = to_dblock(block_div(se, cache->discard_block_size)); +} + +static void issue_discard(struct dm_cache_migration *mg) +{ + dm_dblock_t b, e; + struct bio *bio = mg->new_ocell->holder; + + calc_discard_block_range(mg->cache, bio, &b, &e); + while (b != e) { + set_discard(mg->cache, b); + b = to_dblock(from_dblock(b) + 1); + } + + bio_endio(bio, 0); + cell_defer(mg->cache, mg->new_ocell, false); + free_migration(mg); +} + +static void issue_copy_or_discard(struct dm_cache_migration *mg) { bool avoid; struct cache *cache = mg->cache; + if (mg->discard) { + issue_discard(mg); + return; + } + if (mg->writeback || mg->demote) avoid = !is_dirty(cache, mg->cblock) || is_discarded_oblock(cache, mg->old_oblock); @@ -1070,13 +1146,14 @@ static void issue_copy(struct dm_cache_migration *mg) avoid = is_discarded_oblock(cache, mg->new_oblock); - if (!avoid && bio_writes_complete_block(cache, bio)) { + if (writeback_mode(&cache->features) && + !avoid && bio_writes_complete_block(cache, bio)) { issue_overwrite(mg, bio); return; } } - avoid ? avoid_copy(mg) : issue_copy_real(mg); + avoid ? avoid_copy(mg) : issue_copy(mg); } static void complete_migration(struct dm_cache_migration *mg) @@ -1161,6 +1238,7 @@ static void promote(struct cache *cache, struct prealloc *structs, struct dm_cache_migration *mg = prealloc_get_migration(structs); mg->err = false; + mg->discard = false; mg->writeback = false; mg->demote = false; mg->promote = true; @@ -1184,6 +1262,7 @@ static void writeback(struct cache *cache, struct prealloc *structs, struct dm_cache_migration *mg = prealloc_get_migration(structs); mg->err = false; + mg->discard = false; mg->writeback = true; mg->demote = false; mg->promote = false; @@ -1209,6 +1288,7 @@ static void demote_then_promote(struct cache *cache, struct prealloc *structs, struct dm_cache_migration *mg = prealloc_get_migration(structs); mg->err = false; + mg->discard = false; mg->writeback = false; mg->demote = true; mg->promote = true; @@ -1237,6 +1317,7 @@ static void invalidate(struct cache *cache, struct prealloc *structs, struct dm_cache_migration *mg = prealloc_get_migration(structs); mg->err = false; + mg->discard = false; mg->writeback = false; mg->demote = true; mg->promote = false; @@ -1253,6 +1334,26 @@ static void invalidate(struct cache *cache, struct prealloc *structs, quiesce_migration(mg); } +static void discard(struct cache *cache, struct prealloc *structs, + struct dm_bio_prison_cell *cell) +{ + struct dm_cache_migration *mg = prealloc_get_migration(structs); + + mg->err = false; + mg->discard = true; + mg->writeback = false; + mg->demote = false; + mg->promote = false; + mg->requeue_holder = false; + mg->invalidate = false; + mg->cache = cache; + mg->old_ocell = NULL; + mg->new_ocell = cell; + mg->start_jiffies = jiffies; + + quiesce_migration(mg); +} + /*---------------------------------------------------------------- * bio processing *--------------------------------------------------------------*/ @@ -1286,31 +1387,27 @@ static void process_flush_bio(struct cache *cache, struct bio *bio) issue(cache, bio); } -/* - * People generally discard large parts of a device, eg, the whole device - * when formatting. Splitting these large discards up into cache block - * sized ios and then quiescing (always neccessary for discard) takes too - * long. - * - * We keep it simple, and allow any size of discard to come in, and just - * mark off blocks on the discard bitset. No passdown occurs! - * - * To implement passdown we need to change the bio_prison such that a cell - * can have a key that spans many blocks. - */ -static void process_discard_bio(struct cache *cache, struct bio *bio) +static void process_discard_bio(struct cache *cache, struct prealloc *structs, + struct bio *bio) { - dm_block_t start_block = dm_sector_div_up(bio->bi_iter.bi_sector, - cache->sectors_per_block); - dm_block_t end_block = bio_end_sector(bio); - dm_block_t b; + int r; + dm_dblock_t b, e; + struct dm_bio_prison_cell *cell_prealloc, *new_ocell; - end_block = block_div(end_block, cache->sectors_per_block); + calc_discard_block_range(cache, bio, &b, &e); + if (b == e) { + bio_endio(bio, 0); + return; + } - for (b = start_block; b < end_block; b++) - set_discard(cache, to_oblock(b)); + cell_prealloc = prealloc_get_cell(structs); + r = bio_detain_range(cache, dblock_to_oblock(cache, b), dblock_to_oblock(cache, e), bio, cell_prealloc, + (cell_free_fn) prealloc_put_cell, + structs, &new_ocell); + if (r > 0) + return; - bio_endio(bio, 0); + discard(cache, structs, new_ocell); } static bool spare_migration_bandwidth(struct cache *cache) @@ -1340,9 +1437,8 @@ static void process_bio(struct cache *cache, struct prealloc *structs, dm_oblock_t block = get_bio_block(cache, bio); struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell; struct policy_result lookup_result; - bool discarded_block = is_discarded_oblock(cache, block); bool passthrough = passthrough_mode(&cache->features); - bool can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache)); + bool discarded_block, can_migrate; /* * Check to see if that block is currently migrating. @@ -1354,6 +1450,9 @@ static void process_bio(struct cache *cache, struct prealloc *structs, if (r > 0) return; + discarded_block = is_discarded_oblock(cache, block); + can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache)); + r = policy_map(cache->policy, block, true, can_migrate, discarded_block, bio, &lookup_result); @@ -1500,7 +1599,7 @@ static void process_deferred_bios(struct cache *cache) if (bio->bi_rw & REQ_FLUSH) process_flush_bio(cache, bio); else if (bio->bi_rw & REQ_DISCARD) - process_discard_bio(cache, bio); + process_discard_bio(cache, &structs, bio); else process_bio(cache, &structs, bio); } @@ -1715,7 +1814,7 @@ static void do_worker(struct work_struct *ws) process_invalidation_requests(cache); } - process_migrations(cache, &cache->quiesced_migrations, issue_copy); + process_migrations(cache, &cache->quiesced_migrations, issue_copy_or_discard); process_migrations(cache, &cache->completed_migrations, complete_migration); if (commit_if_needed(cache)) { @@ -2180,6 +2279,45 @@ static int create_cache_policy(struct cache *cache, struct cache_args *ca, return 0; } +/* + * We want the discard block size to be at least the size of the cache + * block size and have no more than 2^14 discard blocks across the origin. + */ +#define MAX_DISCARD_BLOCKS (1 << 14) + +static bool too_many_discard_blocks(sector_t discard_block_size, + sector_t origin_size) +{ + (void) sector_div(origin_size, discard_block_size); + + return origin_size > MAX_DISCARD_BLOCKS; +} + +static sector_t calculate_discard_block_size(sector_t cache_block_size, + sector_t origin_size) +{ + sector_t discard_block_size = cache_block_size; + + if (origin_size) + while (too_many_discard_blocks(discard_block_size, origin_size)) + discard_block_size *= 2; + + return discard_block_size; +} + +static void set_cache_size(struct cache *cache, dm_cblock_t size) +{ + dm_block_t nr_blocks = from_cblock(size); + + if (nr_blocks > (1 << 20) && cache->cache_size != size) + DMWARN_LIMIT("You have created a cache device with a lot of individual cache blocks (%llu)\n" + "All these mappings can consume a lot of kernel memory, and take some time to read/write.\n" + "Please consider increasing the cache block size to reduce the overall cache block count.", + (unsigned long long) nr_blocks); + + cache->cache_size = size; +} + #define DEFAULT_MIGRATION_THRESHOLD 2048 static int cache_create(struct cache_args *ca, struct cache **result) @@ -2204,8 +2342,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) ti->num_discard_bios = 1; ti->discards_supported = true; ti->discard_zeroes_data_unsupported = true; - /* Discard bios must be split on a block boundary */ - ti->split_discard_bios = true; + ti->split_discard_bios = false; cache->features = ca->features; ti->per_bio_data_size = get_per_bio_data_size(cache); @@ -2235,10 +2372,10 @@ static int cache_create(struct cache_args *ca, struct cache **result) cache->sectors_per_block_shift = -1; cache_size = block_div(cache_size, ca->block_size); - cache->cache_size = to_cblock(cache_size); + set_cache_size(cache, to_cblock(cache_size)); } else { cache->sectors_per_block_shift = __ffs(ca->block_size); - cache->cache_size = to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift); + set_cache_size(cache, to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift)); } r = create_cache_policy(cache, ca, error); @@ -2303,13 +2440,17 @@ static int cache_create(struct cache_args *ca, struct cache **result) } clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size)); - cache->discard_nr_blocks = cache->origin_blocks; - cache->discard_bitset = alloc_bitset(from_oblock(cache->discard_nr_blocks)); + cache->discard_block_size = + calculate_discard_block_size(cache->sectors_per_block, + cache->origin_sectors); + cache->discard_nr_blocks = to_dblock(dm_sector_div_up(cache->origin_sectors, + cache->discard_block_size)); + cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks)); if (!cache->discard_bitset) { *error = "could not allocate discard bitset"; goto bad; } - clear_bitset(cache->discard_bitset, from_oblock(cache->discard_nr_blocks)); + clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks)); cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle); if (IS_ERR(cache->copier)) { @@ -2327,7 +2468,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) INIT_DELAYED_WORK(&cache->waker, do_waker); cache->last_commit_jiffies = jiffies; - cache->prison = dm_bio_prison_create(PRISON_CELLS); + cache->prison = dm_bio_prison_create(); if (!cache->prison) { *error = "could not create bio prison"; goto bad; @@ -2549,11 +2690,11 @@ static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_priso static int cache_map(struct dm_target *ti, struct bio *bio) { int r; - struct dm_bio_prison_cell *cell; + struct dm_bio_prison_cell *cell = NULL; struct cache *cache = ti->private; r = __cache_map(cache, bio, &cell); - if (r == DM_MAPIO_REMAPPED) { + if (r == DM_MAPIO_REMAPPED && cell) { inc_ds(cache, bio, cell); cell_defer(cache, cell, false); } @@ -2599,16 +2740,16 @@ static int write_discard_bitset(struct cache *cache) { unsigned i, r; - r = dm_cache_discard_bitset_resize(cache->cmd, cache->sectors_per_block, - cache->origin_blocks); + r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size, + cache->discard_nr_blocks); if (r) { DMERR("could not resize on-disk discard bitset"); return r; } - for (i = 0; i < from_oblock(cache->discard_nr_blocks); i++) { - r = dm_cache_set_discard(cache->cmd, to_oblock(i), - is_discarded(cache, to_oblock(i))); + for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) { + r = dm_cache_set_discard(cache->cmd, to_dblock(i), + is_discarded(cache, to_dblock(i))); if (r) return r; } @@ -2680,15 +2821,86 @@ static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock, return 0; } +/* + * The discard block size in the on disk metadata is not + * neccessarily the same as we're currently using. So we have to + * be careful to only set the discarded attribute if we know it + * covers a complete block of the new size. + */ +struct discard_load_info { + struct cache *cache; + + /* + * These blocks are sized using the on disk dblock size, rather + * than the current one. + */ + dm_block_t block_size; + dm_block_t discard_begin, discard_end; +}; + +static void discard_load_info_init(struct cache *cache, + struct discard_load_info *li) +{ + li->cache = cache; + li->discard_begin = li->discard_end = 0; +} + +static void set_discard_range(struct discard_load_info *li) +{ + sector_t b, e; + + if (li->discard_begin == li->discard_end) + return; + + /* + * Convert to sectors. + */ + b = li->discard_begin * li->block_size; + e = li->discard_end * li->block_size; + + /* + * Then convert back to the current dblock size. + */ + b = dm_sector_div_up(b, li->cache->discard_block_size); + sector_div(e, li->cache->discard_block_size); + + /* + * The origin may have shrunk, so we need to check we're still in + * bounds. + */ + if (e > from_dblock(li->cache->discard_nr_blocks)) + e = from_dblock(li->cache->discard_nr_blocks); + + for (; b < e; b++) + set_discard(li->cache, to_dblock(b)); +} + static int load_discard(void *context, sector_t discard_block_size, - dm_oblock_t oblock, bool discard) + dm_dblock_t dblock, bool discard) { - struct cache *cache = context; + struct discard_load_info *li = context; - if (discard) - set_discard(cache, oblock); - else - clear_discard(cache, oblock); + li->block_size = discard_block_size; + + if (discard) { + if (from_dblock(dblock) == li->discard_end) + /* + * We're already in a discard range, just extend it. + */ + li->discard_end = li->discard_end + 1ULL; + + else { + /* + * Emit the old range and start a new one. + */ + set_discard_range(li); + li->discard_begin = from_dblock(dblock); + li->discard_end = li->discard_begin + 1ULL; + } + } else { + set_discard_range(li); + li->discard_begin = li->discard_end = 0; + } return 0; } @@ -2730,7 +2942,7 @@ static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size) return r; } - cache->cache_size = new_size; + set_cache_size(cache, new_size); return 0; } @@ -2772,11 +2984,22 @@ static int cache_preresume(struct dm_target *ti) } if (!cache->loaded_discards) { - r = dm_cache_load_discards(cache->cmd, load_discard, cache); + struct discard_load_info li; + + /* + * The discard bitset could have been resized, or the + * discard block size changed. To be safe we start by + * setting every dblock to not discarded. + */ + clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks)); + + discard_load_info_init(cache, &li); + r = dm_cache_load_discards(cache->cmd, load_discard, &li); if (r) { DMERR("could not load origin discards"); return r; } + set_discard_range(&li); cache->loaded_discards = true; } @@ -3079,8 +3302,9 @@ static void set_discard_limits(struct cache *cache, struct queue_limits *limits) /* * FIXME: these limits may be incompatible with the cache device */ - limits->max_discard_sectors = cache->sectors_per_block; - limits->discard_granularity = cache->sectors_per_block << SECTOR_SHIFT; + limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024, + cache->origin_sectors); + limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT; } static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) @@ -3104,7 +3328,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type cache_target = { .name = "cache", - .version = {1, 5, 0}, + .version = {1, 6, 0}, .module = THIS_MODULE, .ctr = cache_ctr, .dtr = cache_dtr, |