diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/md.c | 12 | ||||
-rw-r--r-- | drivers/md/raid5-cache.c | 46 | ||||
-rw-r--r-- | drivers/md/raid5.c | 98 | ||||
-rw-r--r-- | drivers/md/raid5.h | 4 |
4 files changed, 61 insertions, 99 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 67642ba..915e84d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7610,16 +7610,12 @@ EXPORT_SYMBOL(unregister_md_cluster_operations); int md_setup_cluster(struct mddev *mddev, int nodes) { - int err; - - err = request_module("md-cluster"); - if (err) { - pr_err("md-cluster module not found.\n"); - return -ENOENT; - } - + if (!md_cluster_ops) + request_module("md-cluster"); spin_lock(&pers_lock); + /* ensure module won't be unloaded */ if (!md_cluster_ops || !try_module_get(md_cluster_mod)) { + pr_err("can't find md-cluster module or get it's reference.\n"); spin_unlock(&pers_lock); return -ENOENT; } diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 51f76dd..1b1ab4a 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -96,7 +96,6 @@ struct r5l_log { spinlock_t no_space_stripes_lock; bool need_cache_flush; - bool in_teardown; }; /* @@ -704,31 +703,22 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log, mddev = log->rdev->mddev; /* - * This is to avoid a deadlock. r5l_quiesce holds reconfig_mutex and - * wait for this thread to finish. This thread waits for - * MD_CHANGE_PENDING clear, which is supposed to be done in - * md_check_recovery(). md_check_recovery() tries to get - * reconfig_mutex. Since r5l_quiesce already holds the mutex, - * md_check_recovery() fails, so the PENDING never get cleared. The - * in_teardown check workaround this issue. + * Discard could zero data, so before discard we must make sure + * superblock is updated to new log tail. Updating superblock (either + * directly call md_update_sb() or depend on md thread) must hold + * reconfig mutex. On the other hand, raid5_quiesce is called with + * reconfig_mutex hold. The first step of raid5_quiesce() is waitting + * for all IO finish, hence waitting for reclaim thread, while reclaim + * thread is calling this function and waitting for reconfig mutex. So + * there is a deadlock. We workaround this issue with a trylock. + * FIXME: we could miss discard if we can't take reconfig mutex */ - if (!log->in_teardown) { - set_mask_bits(&mddev->flags, 0, - BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING)); - md_wakeup_thread(mddev->thread); - wait_event(mddev->sb_wait, - !test_bit(MD_CHANGE_PENDING, &mddev->flags) || - log->in_teardown); - /* - * r5l_quiesce could run after in_teardown check and hold - * mutex first. Superblock might get updated twice. - */ - if (log->in_teardown) - md_update_sb(mddev, 1); - } else { - WARN_ON(!mddev_is_locked(mddev)); - md_update_sb(mddev, 1); - } + set_mask_bits(&mddev->flags, 0, + BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING)); + if (!mddev_trylock(mddev)) + return; + md_update_sb(mddev, 1); + mddev_unlock(mddev); /* discard IO error really doesn't matter, ignore it */ if (log->last_checkpoint < end) { @@ -827,7 +817,6 @@ void r5l_quiesce(struct r5l_log *log, int state) if (!log || state == 2) return; if (state == 0) { - log->in_teardown = 0; /* * This is a special case for hotadd. In suspend, the array has * no journal. In resume, journal is initialized as well as the @@ -838,11 +827,6 @@ void r5l_quiesce(struct r5l_log *log, int state) log->reclaim_thread = md_register_thread(r5l_reclaim_thread, log->rdev->mddev, "reclaim"); } else if (state == 1) { - /* - * at this point all stripes are finished, so io_unit is at - * least in STRIPE_END state - */ - log->in_teardown = 1; /* make sure r5l_write_super_and_discard_space exits */ mddev = log->rdev->mddev; wake_up(&mddev->sb_wait); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index da583bb..5287e79 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2423,10 +2423,10 @@ static void raid5_end_read_request(struct bio * bi) } } rdev_dec_pending(rdev, conf->mddev); + bio_reset(bi); clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); raid5_release_stripe(sh); - bio_reset(bi); } static void raid5_end_write_request(struct bio *bi) @@ -2498,6 +2498,7 @@ static void raid5_end_write_request(struct bio *bi) if (sh->batch_head && bi->bi_error && !replacement) set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state); + bio_reset(bi); if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags)) clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); @@ -2505,7 +2506,6 @@ static void raid5_end_write_request(struct bio *bi) if (sh->batch_head && sh != sh->batch_head) raid5_release_stripe(sh->batch_head); - bio_reset(bi); } static void raid5_build_block(struct stripe_head *sh, int i, int previous) @@ -6349,22 +6349,20 @@ static int alloc_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu return 0; } -static void raid5_free_percpu(struct r5conf *conf) +static int raid456_cpu_dead(unsigned int cpu, struct hlist_node *node) { - unsigned long cpu; + struct r5conf *conf = hlist_entry_safe(node, struct r5conf, node); + free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu)); + return 0; +} + +static void raid5_free_percpu(struct r5conf *conf) +{ if (!conf->percpu) return; -#ifdef CONFIG_HOTPLUG_CPU - unregister_cpu_notifier(&conf->cpu_notify); -#endif - - get_online_cpus(); - for_each_possible_cpu(cpu) - free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu)); - put_online_cpus(); - + cpuhp_state_remove_instance(CPUHP_MD_RAID5_PREPARE, &conf->node); free_percpu(conf->percpu); } @@ -6383,64 +6381,28 @@ static void free_conf(struct r5conf *conf) kfree(conf); } -#ifdef CONFIG_HOTPLUG_CPU -static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action, - void *hcpu) +static int raid456_cpu_up_prepare(unsigned int cpu, struct hlist_node *node) { - struct r5conf *conf = container_of(nfb, struct r5conf, cpu_notify); - long cpu = (long)hcpu; + struct r5conf *conf = hlist_entry_safe(node, struct r5conf, node); struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu); - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - if (alloc_scratch_buffer(conf, percpu)) { - pr_err("%s: failed memory allocation for cpu%ld\n", - __func__, cpu); - return notifier_from_errno(-ENOMEM); - } - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu)); - break; - default: - break; + if (alloc_scratch_buffer(conf, percpu)) { + pr_err("%s: failed memory allocation for cpu%u\n", + __func__, cpu); + return -ENOMEM; } - return NOTIFY_OK; + return 0; } -#endif static int raid5_alloc_percpu(struct r5conf *conf) { - unsigned long cpu; int err = 0; conf->percpu = alloc_percpu(struct raid5_percpu); if (!conf->percpu) return -ENOMEM; -#ifdef CONFIG_HOTPLUG_CPU - conf->cpu_notify.notifier_call = raid456_cpu_notify; - conf->cpu_notify.priority = 0; - err = register_cpu_notifier(&conf->cpu_notify); - if (err) - return err; -#endif - - get_online_cpus(); - for_each_present_cpu(cpu) { - err = alloc_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu)); - if (err) { - pr_err("%s: failed memory allocation for cpu%ld\n", - __func__, cpu); - break; - } - } - put_online_cpus(); - + err = cpuhp_state_add_instance(CPUHP_MD_RAID5_PREPARE, &conf->node); if (!err) { conf->scribble_disks = max(conf->raid_disks, conf->previous_raid_disks); @@ -6639,6 +6601,16 @@ static struct r5conf *setup_conf(struct mddev *mddev) } conf->min_nr_stripes = NR_STRIPES; + if (mddev->reshape_position != MaxSector) { + int stripes = max_t(int, + ((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4, + ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4); + conf->min_nr_stripes = max(NR_STRIPES, stripes); + if (conf->min_nr_stripes != NR_STRIPES) + printk(KERN_INFO + "md/raid:%s: force stripe size %d for reshape\n", + mdname(mddev), conf->min_nr_stripes); + } memory = conf->min_nr_stripes * (sizeof(struct stripe_head) + max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024; atomic_set(&conf->empty_inactive_list_nr, NR_STRIPE_HASH_LOCKS); @@ -7975,10 +7947,21 @@ static struct md_personality raid4_personality = static int __init raid5_init(void) { + int ret; + raid5_wq = alloc_workqueue("raid5wq", WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE|WQ_SYSFS, 0); if (!raid5_wq) return -ENOMEM; + + ret = cpuhp_setup_state_multi(CPUHP_MD_RAID5_PREPARE, + "md/raid5:prepare", + raid456_cpu_up_prepare, + raid456_cpu_dead); + if (ret) { + destroy_workqueue(raid5_wq); + return ret; + } register_md_personality(&raid6_personality); register_md_personality(&raid5_personality); register_md_personality(&raid4_personality); @@ -7990,6 +7973,7 @@ static void raid5_exit(void) unregister_md_personality(&raid6_personality); unregister_md_personality(&raid5_personality); unregister_md_personality(&raid4_personality); + cpuhp_remove_multi_state(CPUHP_MD_RAID5_PREPARE); destroy_workqueue(raid5_wq); } diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 517d4b6..57ec49f 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -512,9 +512,7 @@ struct r5conf { } __percpu *percpu; int scribble_disks; int scribble_sectors; -#ifdef CONFIG_HOTPLUG_CPU - struct notifier_block cpu_notify; -#endif + struct hlist_node node; /* * Free stripes pool |