diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bcache/closure.h | 3 | ||||
-rw-r--r-- | drivers/md/bcache/io.c | 1 | ||||
-rw-r--r-- | drivers/md/bcache/journal.c | 2 | ||||
-rw-r--r-- | drivers/md/bcache/request.c | 14 | ||||
-rw-r--r-- | drivers/md/bitmap.c | 28 | ||||
-rw-r--r-- | drivers/md/dm-cache-target.c | 38 | ||||
-rw-r--r-- | drivers/md/dm-thin.c | 44 | ||||
-rw-r--r-- | drivers/md/dm.c | 12 | ||||
-rw-r--r-- | drivers/md/md-cluster.c | 12 | ||||
-rw-r--r-- | drivers/md/md-cluster.h | 2 | ||||
-rw-r--r-- | drivers/md/md.c | 4 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-btree-remove.c | 6 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-btree.c | 2 | ||||
-rw-r--r-- | drivers/md/raid1.c | 9 | ||||
-rw-r--r-- | drivers/md/raid10.c | 5 | ||||
-rw-r--r-- | drivers/md/raid5.c | 35 | ||||
-rw-r--r-- | drivers/md/raid5.h | 3 |
17 files changed, 153 insertions, 67 deletions
diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h index a08e3ee..79a6d63 100644 --- a/drivers/md/bcache/closure.h +++ b/drivers/md/bcache/closure.h @@ -320,7 +320,6 @@ static inline void closure_wake_up(struct closure_waitlist *list) do { \ set_closure_fn(_cl, _fn, _wq); \ closure_sub(_cl, CLOSURE_RUNNING + 1); \ - return; \ } while (0) /** @@ -349,7 +348,6 @@ do { \ do { \ set_closure_fn(_cl, _fn, _wq); \ closure_queue(_cl); \ - return; \ } while (0) /** @@ -365,7 +363,6 @@ do { \ do { \ set_closure_fn(_cl, _destructor, NULL); \ closure_sub(_cl, CLOSURE_RUNNING - CLOSURE_DESTRUCTOR + 1); \ - return; \ } while (0) /** diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index cb64e64a..bf6a9ca 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -105,6 +105,7 @@ void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p) } while (n != bio); continue_at(&s->cl, bch_bio_submit_split_done, NULL); + return; submit: generic_make_request(bio); } diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index ce64fc8..418607a 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -592,12 +592,14 @@ static void journal_write_unlocked(struct closure *cl) if (!w->need_write) { closure_return_with_destructor(cl, journal_write_unlock); + return; } else if (journal_full(&c->journal)) { journal_reclaim(c); spin_unlock(&c->journal.lock); btree_flush_write(c); continue_at(cl, journal_write, system_wq); + return; } c->journal.blocks_free -= set_blocks(w->data, block_bytes(c)); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 4afb2d2..f292790 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -88,8 +88,10 @@ static void bch_data_insert_keys(struct closure *cl) if (journal_ref) atomic_dec_bug(journal_ref); - if (!op->insert_data_done) + if (!op->insert_data_done) { continue_at(cl, bch_data_insert_start, op->wq); + return; + } bch_keylist_free(&op->insert_keys); closure_return(cl); @@ -216,8 +218,10 @@ static void bch_data_insert_start(struct closure *cl) /* 1 for the device pointer and 1 for the chksum */ if (bch_keylist_realloc(&op->insert_keys, 3 + (op->csum ? 1 : 0), - op->c)) + op->c)) { continue_at(cl, bch_data_insert_keys, op->wq); + return; + } k = op->insert_keys.top; bkey_init(k); @@ -255,6 +259,7 @@ static void bch_data_insert_start(struct closure *cl) op->insert_data_done = true; continue_at(cl, bch_data_insert_keys, op->wq); + return; err: /* bch_alloc_sectors() blocks if s->writeback = true */ BUG_ON(op->writeback); @@ -576,8 +581,10 @@ static void cache_lookup(struct closure *cl) ret = bch_btree_map_keys(&s->op, s->iop.c, &KEY(s->iop.inode, bio->bi_iter.bi_sector, 0), cache_lookup_fn, MAP_END_KEY); - if (ret == -EAGAIN) + if (ret == -EAGAIN) { continue_at(cl, cache_lookup, bcache_wq); + return; + } closure_return(cl); } @@ -1085,6 +1092,7 @@ static void flash_dev_make_request(struct request_queue *q, struct bio *bio) continue_at_nobarrier(&s->cl, flash_dev_nodata, bcache_wq); + return; } else if (rw) { bch_keybuf_check_overlapping(&s->iop.c->moving_gc_keys, &KEY(d->id, bio->bi_iter.bi_sector, 0), diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index ed2346d..e51de52 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -494,7 +494,7 @@ static int bitmap_new_disk_sb(struct bitmap *bitmap) bitmap_super_t *sb; unsigned long chunksize, daemon_sleep, write_behind; - bitmap->storage.sb_page = alloc_page(GFP_KERNEL); + bitmap->storage.sb_page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (bitmap->storage.sb_page == NULL) return -ENOMEM; bitmap->storage.sb_page->index = 0; @@ -541,6 +541,7 @@ static int bitmap_new_disk_sb(struct bitmap *bitmap) sb->state = cpu_to_le32(bitmap->flags); bitmap->events_cleared = bitmap->mddev->events; sb->events_cleared = cpu_to_le64(bitmap->mddev->events); + bitmap->mddev->bitmap_info.nodes = 0; kunmap_atomic(sb); @@ -558,6 +559,7 @@ static int bitmap_read_sb(struct bitmap *bitmap) unsigned long sectors_reserved = 0; int err = -EINVAL; struct page *sb_page; + loff_t offset = bitmap->mddev->bitmap_info.offset; if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) { chunksize = 128 * 1024 * 1024; @@ -584,9 +586,9 @@ re_read: bm_blocks = ((bm_blocks+7) >> 3) + sizeof(bitmap_super_t); /* to 4k blocks */ bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 4096); - bitmap->mddev->bitmap_info.offset += bitmap->cluster_slot * (bm_blocks << 3); + offset = bitmap->mddev->bitmap_info.offset + (bitmap->cluster_slot * (bm_blocks << 3)); pr_info("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__, - bitmap->cluster_slot, (unsigned long long)bitmap->mddev->bitmap_info.offset); + bitmap->cluster_slot, offset); } if (bitmap->storage.file) { @@ -597,7 +599,7 @@ re_read: bitmap, bytes, sb_page); } else { err = read_sb_page(bitmap->mddev, - bitmap->mddev->bitmap_info.offset, + offset, sb_page, 0, sizeof(bitmap_super_t)); } @@ -611,8 +613,16 @@ re_read: daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ; write_behind = le32_to_cpu(sb->write_behind); sectors_reserved = le32_to_cpu(sb->sectors_reserved); - nodes = le32_to_cpu(sb->nodes); - strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64); + /* XXX: This is a hack to ensure that we don't use clustering + * in case: + * - dm-raid is in use and + * - the nodes written in bitmap_sb is erroneous. + */ + if (!bitmap->mddev->sync_super) { + nodes = le32_to_cpu(sb->nodes); + strlcpy(bitmap->mddev->bitmap_info.cluster_name, + sb->cluster_name, 64); + } /* verify that the bitmap-specific fields are valid */ if (sb->magic != cpu_to_le32(BITMAP_MAGIC)) @@ -671,7 +681,7 @@ out: kunmap_atomic(sb); /* Assiging chunksize is required for "re_read" */ bitmap->mddev->bitmap_info.chunksize = chunksize; - if (nodes && (bitmap->cluster_slot < 0)) { + if (err == 0 && nodes && (bitmap->cluster_slot < 0)) { err = md_setup_cluster(bitmap->mddev, nodes); if (err) { pr_err("%s: Could not setup cluster service (%d)\n", @@ -1866,10 +1876,6 @@ int bitmap_copy_from_slot(struct mddev *mddev, int slot, if (IS_ERR(bitmap)) return PTR_ERR(bitmap); - rv = bitmap_read_sb(bitmap); - if (rv) - goto err; - rv = bitmap_init_from_disk(bitmap, 0); if (rv) goto err; diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 1b4e175..b680da5 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -424,7 +424,6 @@ static void free_migration(struct dm_cache_migration *mg) wake_up(&cache->migration_wait); mempool_free(mg, cache->migration_pool); - wake_worker(cache); } static int prealloc_data_structs(struct cache *cache, struct prealloc *p) @@ -1947,6 +1946,7 @@ static int commit_if_needed(struct cache *cache) static void process_deferred_bios(struct cache *cache) { + bool prealloc_used = false; unsigned long flags; struct bio_list bios; struct bio *bio; @@ -1981,13 +1981,16 @@ static void process_deferred_bios(struct cache *cache) process_discard_bio(cache, &structs, bio); else process_bio(cache, &structs, bio); + prealloc_used = true; } - prealloc_free_structs(cache, &structs); + if (prealloc_used) + prealloc_free_structs(cache, &structs); } static void process_deferred_cells(struct cache *cache) { + bool prealloc_used = false; unsigned long flags; struct dm_bio_prison_cell *cell, *tmp; struct list_head cells; @@ -2015,9 +2018,11 @@ static void process_deferred_cells(struct cache *cache) } process_cell(cache, &structs, cell); + prealloc_used = true; } - prealloc_free_structs(cache, &structs); + if (prealloc_used) + prealloc_free_structs(cache, &structs); } static void process_deferred_flush_bios(struct cache *cache, bool submit_bios) @@ -2062,7 +2067,7 @@ static void process_deferred_writethrough_bios(struct cache *cache) static void writeback_some_dirty_blocks(struct cache *cache) { - int r = 0; + bool prealloc_used = false; dm_oblock_t oblock; dm_cblock_t cblock; struct prealloc structs; @@ -2072,23 +2077,21 @@ static void writeback_some_dirty_blocks(struct cache *cache) memset(&structs, 0, sizeof(structs)); while (spare_migration_bandwidth(cache)) { - if (prealloc_data_structs(cache, &structs)) - break; + if (policy_writeback_work(cache->policy, &oblock, &cblock, busy)) + break; /* no work to do */ - r = policy_writeback_work(cache->policy, &oblock, &cblock, busy); - if (r) - break; - - r = get_cell(cache, oblock, &structs, &old_ocell); - if (r) { + if (prealloc_data_structs(cache, &structs) || + get_cell(cache, oblock, &structs, &old_ocell)) { policy_set_dirty(cache->policy, oblock); break; } writeback(cache, &structs, oblock, cblock, old_ocell); + prealloc_used = true; } - prealloc_free_structs(cache, &structs); + if (prealloc_used) + prealloc_free_structs(cache, &structs); } /*---------------------------------------------------------------- @@ -3496,7 +3499,7 @@ static void cache_resume(struct dm_target *ti) * <#demotions> <#promotions> <#dirty> * <#features> <features>* * <#core args> <core args> - * <policy name> <#policy args> <policy args>* <cache metadata mode> + * <policy name> <#policy args> <policy args>* <cache metadata mode> <needs_check> */ static void cache_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, unsigned maxlen) @@ -3582,6 +3585,11 @@ static void cache_status(struct dm_target *ti, status_type_t type, else DMEMIT("rw "); + if (dm_cache_metadata_needs_check(cache->cmd)) + DMEMIT("needs_check "); + else + DMEMIT("- "); + break; case STATUSTYPE_TABLE: @@ -3820,7 +3828,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type cache_target = { .name = "cache", - .version = {1, 7, 0}, + .version = {1, 8, 0}, .module = THIS_MODULE, .ctr = cache_ctr, .dtr = cache_dtr, diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index c33f61a4..1c50c58 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -18,6 +18,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/slab.h> +#include <linux/vmalloc.h> #include <linux/sort.h> #include <linux/rbtree.h> @@ -268,7 +269,7 @@ struct pool { process_mapping_fn process_prepared_mapping; process_mapping_fn process_prepared_discard; - struct dm_bio_prison_cell *cell_sort_array[CELL_SORT_ARRAY_SIZE]; + struct dm_bio_prison_cell **cell_sort_array; }; static enum pool_mode get_pool_mode(struct pool *pool); @@ -2281,18 +2282,23 @@ static void do_waker(struct work_struct *ws) queue_delayed_work(pool->wq, &pool->waker, COMMIT_PERIOD); } +static void notify_of_pool_mode_change_to_oods(struct pool *pool); + /* * We're holding onto IO to allow userland time to react. After the * timeout either the pool will have been resized (and thus back in - * PM_WRITE mode), or we degrade to PM_READ_ONLY and start erroring IO. + * PM_WRITE mode), or we degrade to PM_OUT_OF_DATA_SPACE w/ error_if_no_space. */ static void do_no_space_timeout(struct work_struct *ws) { struct pool *pool = container_of(to_delayed_work(ws), struct pool, no_space_timeout); - if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) - set_pool_mode(pool, PM_READ_ONLY); + if (get_pool_mode(pool) == PM_OUT_OF_DATA_SPACE && !pool->pf.error_if_no_space) { + pool->pf.error_if_no_space = true; + notify_of_pool_mode_change_to_oods(pool); + error_retry_list(pool); + } } /*----------------------------------------------------------------*/ @@ -2370,6 +2376,14 @@ static void notify_of_pool_mode_change(struct pool *pool, const char *new_mode) dm_device_name(pool->pool_md), new_mode); } +static void notify_of_pool_mode_change_to_oods(struct pool *pool) +{ + if (!pool->pf.error_if_no_space) + notify_of_pool_mode_change(pool, "out-of-data-space (queue IO)"); + else + notify_of_pool_mode_change(pool, "out-of-data-space (error IO)"); +} + static bool passdown_enabled(struct pool_c *pt) { return pt->adjusted_pf.discard_passdown; @@ -2454,7 +2468,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) * frequently seeing this mode. */ if (old_mode != new_mode) - notify_of_pool_mode_change(pool, "out-of-data-space"); + notify_of_pool_mode_change_to_oods(pool); pool->process_bio = process_bio_read_only; pool->process_discard = process_discard_bio; pool->process_cell = process_cell_read_only; @@ -2777,6 +2791,7 @@ static void __pool_destroy(struct pool *pool) { __pool_table_remove(pool); + vfree(pool->cell_sort_array); if (dm_pool_metadata_close(pool->pmd) < 0) DMWARN("%s: dm_pool_metadata_close() failed.", __func__); @@ -2889,6 +2904,13 @@ static struct pool *pool_create(struct mapped_device *pool_md, goto bad_mapping_pool; } + pool->cell_sort_array = vmalloc(sizeof(*pool->cell_sort_array) * CELL_SORT_ARRAY_SIZE); + if (!pool->cell_sort_array) { + *error = "Error allocating cell sort array"; + err_p = ERR_PTR(-ENOMEM); + goto bad_sort_array; + } + pool->ref_count = 1; pool->last_commit_jiffies = jiffies; pool->pool_md = pool_md; @@ -2897,6 +2919,8 @@ static struct pool *pool_create(struct mapped_device *pool_md, return pool; +bad_sort_array: + mempool_destroy(pool->mapping_pool); bad_mapping_pool: dm_deferred_set_destroy(pool->all_io_ds); bad_all_io_ds: @@ -3714,6 +3738,7 @@ static void emit_flags(struct pool_features *pf, char *result, * Status line is: * <transaction id> <used metadata sectors>/<total metadata sectors> * <used data sectors>/<total data sectors> <held metadata root> + * <pool mode> <discard config> <no space config> <needs_check> */ static void pool_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, unsigned maxlen) @@ -3815,6 +3840,11 @@ static void pool_status(struct dm_target *ti, status_type_t type, else DMEMIT("queue_if_no_space "); + if (dm_pool_metadata_needs_check(pool->pmd)) + DMEMIT("needs_check "); + else + DMEMIT("- "); + break; case STATUSTYPE_TABLE: @@ -3918,7 +3948,7 @@ static struct target_type pool_target = { .name = "thin-pool", .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | DM_TARGET_IMMUTABLE, - .version = {1, 15, 0}, + .version = {1, 16, 0}, .module = THIS_MODULE, .ctr = pool_ctr, .dtr = pool_dtr, @@ -4305,7 +4335,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type thin_target = { .name = "thin", - .version = {1, 15, 0}, + .version = {1, 16, 0}, .module = THIS_MODULE, .ctr = thin_ctr, .dtr = thin_dtr, diff --git a/drivers/md/dm.c b/drivers/md/dm.c index f331d88..ab37ae1 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1067,13 +1067,10 @@ static void rq_end_stats(struct mapped_device *md, struct request *orig) */ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) { - int nr_requests_pending; - atomic_dec(&md->pending[rw]); /* nudge anyone waiting on suspend queue */ - nr_requests_pending = md_in_flight(md); - if (!nr_requests_pending) + if (!md_in_flight(md)) wake_up(&md->wait); /* @@ -1085,8 +1082,7 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) if (run_queue) { if (md->queue->mq_ops) blk_mq_run_hw_queues(md->queue, true); - else if (!nr_requests_pending || - (nr_requests_pending >= md->queue->nr_congestion_on)) + else blk_run_queue_async(md->queue); } @@ -2281,8 +2277,6 @@ static void dm_init_old_md_queue(struct mapped_device *md) static void cleanup_mapped_device(struct mapped_device *md) { - cleanup_srcu_struct(&md->io_barrier); - if (md->wq) destroy_workqueue(md->wq); if (md->kworker_task) @@ -2294,6 +2288,8 @@ static void cleanup_mapped_device(struct mapped_device *md) if (md->bs) bioset_free(md->bs); + cleanup_srcu_struct(&md->io_barrier); + if (md->disk) { spin_lock(&_minor_lock); md->disk->private_data = NULL; diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index fcfc4b9..0072190 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -44,6 +44,7 @@ struct resync_info { /* md_cluster_info flags */ #define MD_CLUSTER_WAITING_FOR_NEWDISK 1 +#define MD_CLUSTER_SUSPEND_READ_BALANCING 2 struct md_cluster_info { @@ -275,6 +276,9 @@ clear_bit: static void recover_prep(void *arg) { + struct mddev *mddev = arg; + struct md_cluster_info *cinfo = mddev->cluster_info; + set_bit(MD_CLUSTER_SUSPEND_READ_BALANCING, &cinfo->state); } static void recover_slot(void *arg, struct dlm_slot *slot) @@ -307,6 +311,7 @@ static void recover_done(void *arg, struct dlm_slot *slots, cinfo->slot_number = our_slot; complete(&cinfo->completion); + clear_bit(MD_CLUSTER_SUSPEND_READ_BALANCING, &cinfo->state); } static const struct dlm_lockspace_ops md_ls_ops = { @@ -816,12 +821,17 @@ static void resync_finish(struct mddev *mddev) resync_send(mddev, RESYNCING, 0, 0); } -static int area_resyncing(struct mddev *mddev, sector_t lo, sector_t hi) +static int area_resyncing(struct mddev *mddev, int direction, + sector_t lo, sector_t hi) { struct md_cluster_info *cinfo = mddev->cluster_info; int ret = 0; struct suspend_info *s; + if ((direction == READ) && + test_bit(MD_CLUSTER_SUSPEND_READ_BALANCING, &cinfo->state)) + return 1; + spin_lock_irq(&cinfo->suspend_lock); if (list_empty(&cinfo->suspend_list)) goto out; diff --git a/drivers/md/md-cluster.h b/drivers/md/md-cluster.h index 6817ee0..00defe2 100644 --- a/drivers/md/md-cluster.h +++ b/drivers/md/md-cluster.h @@ -18,7 +18,7 @@ struct md_cluster_operations { int (*metadata_update_start)(struct mddev *mddev); int (*metadata_update_finish)(struct mddev *mddev); int (*metadata_update_cancel)(struct mddev *mddev); - int (*area_resyncing)(struct mddev *mddev, sector_t lo, sector_t hi); + int (*area_resyncing)(struct mddev *mddev, int direction, sector_t lo, sector_t hi); int (*add_new_disk_start)(struct mddev *mddev, struct md_rdev *rdev); int (*add_new_disk_finish)(struct mddev *mddev); int (*new_disk_ack)(struct mddev *mddev, bool ack); diff --git a/drivers/md/md.c b/drivers/md/md.c index d429c30..0c2a4e8 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5382,6 +5382,8 @@ static void __md_stop(struct mddev *mddev) { struct md_personality *pers = mddev->pers; mddev_detach(mddev); + /* Ensure ->event_work is done */ + flush_workqueue(md_misc_wq); spin_lock(&mddev->lock); mddev->ready = 0; mddev->pers = NULL; @@ -7437,7 +7439,7 @@ int md_setup_cluster(struct mddev *mddev, int nodes) err = request_module("md-cluster"); if (err) { pr_err("md-cluster module not found.\n"); - return err; + return -ENOENT; } spin_lock(&pers_lock); diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c index e04cfd2..9836c0a 100644 --- a/drivers/md/persistent-data/dm-btree-remove.c +++ b/drivers/md/persistent-data/dm-btree-remove.c @@ -309,8 +309,8 @@ static void redistribute3(struct dm_btree_info *info, struct btree_node *parent, if (s < 0 && nr_center < -s) { /* not enough in central node */ - shift(left, center, nr_center); - s = nr_center - target; + shift(left, center, -nr_center); + s += nr_center; shift(left, right, s); nr_right += s; } else @@ -323,7 +323,7 @@ static void redistribute3(struct dm_btree_info *info, struct btree_node *parent, if (s > 0 && nr_center < s) { /* not enough in central node */ shift(center, right, nr_center); - s = target - nr_center; + s -= nr_center; shift(left, right, s); nr_left -= s; } else diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index 200ac12..fdd3793 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -255,7 +255,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root) int r; struct del_stack *s; - s = kmalloc(sizeof(*s), GFP_KERNEL); + s = kmalloc(sizeof(*s), GFP_NOIO); if (!s) return -ENOMEM; s->info = info; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index f80f1af..94f5b55 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -336,7 +336,7 @@ static void raid1_end_read_request(struct bio *bio, int error) spin_lock_irqsave(&conf->device_lock, flags); if (r1_bio->mddev->degraded == conf->raid_disks || (r1_bio->mddev->degraded == conf->raid_disks-1 && - !test_bit(Faulty, &conf->mirrors[mirror].rdev->flags))) + test_bit(In_sync, &conf->mirrors[mirror].rdev->flags))) uptodate = 1; spin_unlock_irqrestore(&conf->device_lock, flags); } @@ -541,7 +541,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect if ((conf->mddev->recovery_cp < this_sector + sectors) || (mddev_is_clustered(conf->mddev) && - md_cluster_ops->area_resyncing(conf->mddev, this_sector, + md_cluster_ops->area_resyncing(conf->mddev, READ, this_sector, this_sector + sectors))) choose_first = 1; else @@ -1111,7 +1111,8 @@ static void make_request(struct mddev *mddev, struct bio * bio) ((bio_end_sector(bio) > mddev->suspend_lo && bio->bi_iter.bi_sector < mddev->suspend_hi) || (mddev_is_clustered(mddev) && - md_cluster_ops->area_resyncing(mddev, bio->bi_iter.bi_sector, bio_end_sector(bio))))) { + md_cluster_ops->area_resyncing(mddev, WRITE, + bio->bi_iter.bi_sector, bio_end_sector(bio))))) { /* As the suspend_* range is controlled by * userspace, we want an interruptible * wait. @@ -1124,7 +1125,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) if (bio_end_sector(bio) <= mddev->suspend_lo || bio->bi_iter.bi_sector >= mddev->suspend_hi || (mddev_is_clustered(mddev) && - !md_cluster_ops->area_resyncing(mddev, + !md_cluster_ops->area_resyncing(mddev, WRITE, bio->bi_iter.bi_sector, bio_end_sector(bio)))) break; schedule(); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 940f2f3..38c58e1 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3556,6 +3556,7 @@ static struct r10conf *setup_conf(struct mddev *mddev) /* far_copies must be 1 */ conf->prev.stride = conf->dev_sectors; } + conf->reshape_safe = conf->reshape_progress; spin_lock_init(&conf->device_lock); INIT_LIST_HEAD(&conf->retry_list); @@ -3760,7 +3761,6 @@ static int run(struct mddev *mddev) } conf->offset_diff = min_offset_diff; - conf->reshape_safe = conf->reshape_progress; clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); @@ -4103,6 +4103,7 @@ static int raid10_start_reshape(struct mddev *mddev) conf->reshape_progress = size; } else conf->reshape_progress = 0; + conf->reshape_safe = conf->reshape_progress; spin_unlock_irq(&conf->device_lock); if (mddev->delta_disks && mddev->bitmap) { @@ -4170,6 +4171,7 @@ abort: rdev->new_data_offset = rdev->data_offset; smp_wmb(); conf->reshape_progress = MaxSector; + conf->reshape_safe = MaxSector; mddev->reshape_position = MaxSector; spin_unlock_irq(&conf->device_lock); return ret; @@ -4524,6 +4526,7 @@ static void end_reshape(struct r10conf *conf) md_finish_reshape(conf->mddev); smp_wmb(); conf->reshape_progress = MaxSector; + conf->reshape_safe = MaxSector; spin_unlock_irq(&conf->device_lock); /* read-ahead size must cover two whole stripes, which is diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 59e44e9..643d217 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2162,6 +2162,9 @@ static int resize_stripes(struct r5conf *conf, int newsize) if (!sc) return -ENOMEM; + /* Need to ensure auto-resizing doesn't interfere */ + mutex_lock(&conf->cache_size_mutex); + for (i = conf->max_nr_stripes; i; i--) { nsh = alloc_stripe(sc, GFP_KERNEL); if (!nsh) @@ -2178,6 +2181,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) kmem_cache_free(sc, nsh); } kmem_cache_destroy(sc); + mutex_unlock(&conf->cache_size_mutex); return -ENOMEM; } /* Step 2 - Must use GFP_NOIO now. @@ -2224,6 +2228,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) } else err = -ENOMEM; + mutex_unlock(&conf->cache_size_mutex); /* Step 4, return new stripes to service */ while(!list_empty(&newstripes)) { nsh = list_entry(newstripes.next, struct stripe_head, lru); @@ -4061,8 +4066,10 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) &first_bad, &bad_sectors)) set_bit(R5_ReadRepl, &dev->flags); else { - if (rdev) + if (rdev && !test_bit(Faulty, &rdev->flags)) set_bit(R5_NeedReplace, &dev->flags); + else + clear_bit(R5_NeedReplace, &dev->flags); rdev = rcu_dereference(conf->disks[i].rdev); clear_bit(R5_ReadRepl, &dev->flags); } @@ -5857,12 +5864,14 @@ static void raid5d(struct md_thread *thread) pr_debug("%d stripes handled\n", handled); spin_unlock_irq(&conf->device_lock); - if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state)) { + if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state) && + mutex_trylock(&conf->cache_size_mutex)) { grow_one_stripe(conf, __GFP_NOWARN); /* Set flag even if allocation failed. This helps * slow down allocation requests when mem is short */ set_bit(R5_DID_ALLOC, &conf->cache_state); + mutex_unlock(&conf->cache_size_mutex); } async_tx_issue_pending_all(); @@ -5894,18 +5903,22 @@ raid5_set_cache_size(struct mddev *mddev, int size) return -EINVAL; conf->min_nr_stripes = size; + mutex_lock(&conf->cache_size_mutex); while (size < conf->max_nr_stripes && drop_one_stripe(conf)) ; + mutex_unlock(&conf->cache_size_mutex); err = md_allow_write(mddev); if (err) return err; + mutex_lock(&conf->cache_size_mutex); while (size > conf->max_nr_stripes) if (!grow_one_stripe(conf, GFP_KERNEL)) break; + mutex_unlock(&conf->cache_size_mutex); return 0; } @@ -6371,11 +6384,18 @@ static unsigned long raid5_cache_scan(struct shrinker *shrink, struct shrink_control *sc) { struct r5conf *conf = container_of(shrink, struct r5conf, shrinker); - int ret = 0; - while (ret < sc->nr_to_scan) { - if (drop_one_stripe(conf) == 0) - return SHRINK_STOP; - ret++; + unsigned long ret = SHRINK_STOP; + + if (mutex_trylock(&conf->cache_size_mutex)) { + ret= 0; + while (ret < sc->nr_to_scan) { + if (drop_one_stripe(conf) == 0) { + ret = SHRINK_STOP; + break; + } + ret++; + } + mutex_unlock(&conf->cache_size_mutex); } return ret; } @@ -6444,6 +6464,7 @@ static struct r5conf *setup_conf(struct mddev *mddev) goto abort; spin_lock_init(&conf->device_lock); seqcount_init(&conf->gen_lock); + mutex_init(&conf->cache_size_mutex); init_waitqueue_head(&conf->wait_for_quiescent); for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) { init_waitqueue_head(&conf->wait_for_stripe[i]); diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 02c3bf8..d051442 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -482,7 +482,8 @@ struct r5conf { */ int active_name; char cache_name[2][32]; - struct kmem_cache *slab_cache; /* for allocating stripes */ + struct kmem_cache *slab_cache; /* for allocating stripes */ + struct mutex cache_size_mutex; /* Protect changes to cache size */ int seq_flush, seq_write; int quiesce; |