From f7bee80945155ad0326916486dabc38428c6cdef Mon Sep 17 00:00:00 2001 From: Krzysztof Wojcik Date: Mon, 14 Feb 2011 10:01:41 +1100 Subject: md: Fix raid1->raid0 takeover Takeover raid1->raid0 not succeded. Kernel message is shown: "md/raid0:md126: too few disks (1 of 2) - aborting!" Problem was that we weren't updating ->raid_disks for that takeover, unlike all the others. Signed-off-by: Krzysztof Wojcik Signed-off-by: NeilBrown --- drivers/md/raid0.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/md') diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 637a968..75671df 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -670,6 +670,7 @@ static void *raid0_takeover_raid1(mddev_t *mddev) mddev->new_layout = 0; mddev->new_chunk_sectors = 128; /* by default set chunk size to 64k */ mddev->delta_disks = 1 - mddev->raid_disks; + mddev->raid_disks = 1; /* make sure it will be not marked as dirty */ mddev->recovery_cp = MaxSector; -- cgit v1.1 From cbe6ef1d2622e08e272600b3cb6040bed60f0450 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 16 Feb 2011 13:58:38 +1100 Subject: md: don't set_capacity before array is active. If the desired size of an array is set (via sysfs) before the array is active (which is the normal sequence), we currrently call set_capacity immediately. This means that a subsequent 'open' (as can be caused by some udev-triggers program) will notice the new size and try to probe for partitions. However as the array isn't quite ready yet the read will fail. Then when the array is read, as the size doesn't change again we don't try to re-probe. So when setting array size via sysfs, only call set_capacity if the array is already active. Signed-off-by: NeilBrown --- drivers/md/md.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index 0cc30ec..6818ff4 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4138,10 +4138,10 @@ array_size_store(mddev_t *mddev, const char *buf, size_t len) } mddev->array_sectors = sectors; - set_capacity(mddev->gendisk, mddev->array_sectors); - if (mddev->pers) + if (mddev->pers) { + set_capacity(mddev->gendisk, mddev->array_sectors); revalidate_disk(mddev->gendisk); - + } return len; } -- cgit v1.1 From 8f5f02c460b7ca74ce55ce126ce0c1e58a3f923d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 16 Feb 2011 13:58:51 +1100 Subject: md: correctly handle probe of an 'mdp' device. 'mdp' devices are md devices with preallocated device numbers for partitions. As such it is possible to mknod and open a partition before opening the whole device. this causes md_probe() to be called with a device number of a partition, which in-turn calls mddev_find with such a number. However mddev_find expects the number of a 'whole device' and does the wrong thing with partition numbers. So add code to mddev_find to remove the 'partition' part of a device number and just work with the 'whole device'. This patch addresses https://bugzilla.kernel.org/show_bug.cgi?id=28652 Reported-by: hkmaly@bigfoot.com Signed-off-by: NeilBrown Cc: --- drivers/md/md.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index 6818ff4..330addf 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -553,6 +553,9 @@ static mddev_t * mddev_find(dev_t unit) { mddev_t *mddev, *new = NULL; + if (unit && MAJOR(unit) != MD_MAJOR) + unit &= ~((1< Date: Mon, 21 Feb 2011 18:25:57 +1100 Subject: md: avoid spinlock problem in blk_throtl_exit blk_throtl_exit assumes that ->queue_lock still exists, so make sure that it does. To do this, we stop redirecting ->queue_lock to conf->device_lock and leave it pointing where it is initialised - __queue_lock. As the blk_plug functions check the ->queue_lock is held, we now take that spin_lock explicitly around the plug functions. We don't need the locking, just the warning removal. This is needed for any kernel with the blk_throtl code, which is which is 2.6.37 and later. Cc: stable@kernel.org Signed-off-by: NeilBrown --- drivers/md/linear.c | 1 - drivers/md/multipath.c | 1 - drivers/md/raid0.c | 1 - drivers/md/raid1.c | 6 ++++-- drivers/md/raid10.c | 7 ++++--- drivers/md/raid5.c | 1 - 6 files changed, 8 insertions(+), 9 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 8a2f767..0ed7f6b 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -216,7 +216,6 @@ static int linear_run (mddev_t *mddev) if (md_check_no_bitmap(mddev)) return -EINVAL; - mddev->queue->queue_lock = &mddev->queue->__queue_lock; conf = linear_conf(mddev, mddev->raid_disks); if (!conf) diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 6d7ddf3..3a62d44 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -435,7 +435,6 @@ static int multipath_run (mddev_t *mddev) * bookkeeping area. [whatever we allocate in multipath_run(), * should be freed in multipath_stop()] */ - mddev->queue->queue_lock = &mddev->queue->__queue_lock; conf = kzalloc(sizeof(multipath_conf_t), GFP_KERNEL); mddev->private = conf; diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 75671df..c0ac457 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -361,7 +361,6 @@ static int raid0_run(mddev_t *mddev) if (md_check_no_bitmap(mddev)) return -EINVAL; blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); - mddev->queue->queue_lock = &mddev->queue->__queue_lock; /* if private is not null, we are here after takeover */ if (mddev->private == NULL) { diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index a23ffa3..06cd712 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -593,7 +593,10 @@ static int flush_pending_writes(conf_t *conf) if (conf->pending_bio_list.head) { struct bio *bio; bio = bio_list_get(&conf->pending_bio_list); + /* Only take the spinlock to quiet a warning */ + spin_lock(conf->mddev->queue->queue_lock); blk_remove_plug(conf->mddev->queue); + spin_unlock(conf->mddev->queue->queue_lock); spin_unlock_irq(&conf->device_lock); /* flush any pending bitmap writes to * disk before proceeding w/ I/O */ @@ -959,7 +962,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) atomic_inc(&r1_bio->remaining); spin_lock_irqsave(&conf->device_lock, flags); bio_list_add(&conf->pending_bio_list, mbio); - blk_plug_device(mddev->queue); + blk_plug_device_unlocked(mddev->queue); spin_unlock_irqrestore(&conf->device_lock, flags); } r1_bio_write_done(r1_bio, bio->bi_vcnt, behind_pages, behind_pages != NULL); @@ -2021,7 +2024,6 @@ static int run(mddev_t *mddev) if (IS_ERR(conf)) return PTR_ERR(conf); - mddev->queue->queue_lock = &conf->device_lock; list_for_each_entry(rdev, &mddev->disks, same_set) { disk_stack_limits(mddev->gendisk, rdev->bdev, rdev->data_offset << 9); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 3b607b2..747d061 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -662,7 +662,10 @@ static int flush_pending_writes(conf_t *conf) if (conf->pending_bio_list.head) { struct bio *bio; bio = bio_list_get(&conf->pending_bio_list); + /* Spinlock only taken to quiet a warning */ + spin_lock(conf->mddev->queue->queue_lock); blk_remove_plug(conf->mddev->queue); + spin_unlock(conf->mddev->queue->queue_lock); spin_unlock_irq(&conf->device_lock); /* flush any pending bitmap writes to disk * before proceeding w/ I/O */ @@ -971,7 +974,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) atomic_inc(&r10_bio->remaining); spin_lock_irqsave(&conf->device_lock, flags); bio_list_add(&conf->pending_bio_list, mbio); - blk_plug_device(mddev->queue); + blk_plug_device_unlocked(mddev->queue); spin_unlock_irqrestore(&conf->device_lock, flags); } @@ -2304,8 +2307,6 @@ static int run(mddev_t *mddev) if (!conf) goto out; - mddev->queue->queue_lock = &conf->device_lock; - mddev->thread = conf->thread; conf->thread = NULL; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7028128..78536fd 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5204,7 +5204,6 @@ static int run(mddev_t *mddev) mddev->queue->backing_dev_info.congested_data = mddev; mddev->queue->backing_dev_info.congested_fn = raid5_congested; - mddev->queue->queue_lock = &conf->device_lock; mddev->queue->unplug_fn = raid5_unplug_queue; chunk_size = mddev->chunk_sectors << 9; -- cgit v1.1 From f0b4f7e2f29af678bd9af43422c537dcb6008603 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 24 Feb 2011 17:26:41 +1100 Subject: md: Fix - again - partition detection when array becomes active Revert b821eaa572fd737faaf6928ba046e571526c36c6 and f3b99be19ded511a1bf05a148276239d9f13eefa When I wrote the first of these I had a wrong idea about the lifetime of 'struct block_device'. It can disappear at any time that the block device is not open if it falls out of the inode cache. So relying on the 'size' recorded with it to detect when the device size has changed and so we need to revalidate, is wrong. Rather, we really do need the 'changed' attribute stored directly in the mddev and set/tested as appropriate. Without this patch, a sequence of: mknod / open / close / unlink (which can cause a block_device to be created and then destroyed) will result in a rescan of the partition table and consequence removal and addition of partitions. Several of these in a row can get udev racing to create and unlink and other code can get confused. With the patch, the rescan is only performed when needed and so there are no races. This is suitable for any stable kernel from 2.6.35. Reported-by: "Wojcik, Krzysztof" Signed-off-by: NeilBrown Cc: stable@kernel.org --- drivers/md/md.c | 22 +++++++++++++++++++++- drivers/md/md.h | 2 ++ 2 files changed, 23 insertions(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index 330addf..818313e 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4627,6 +4627,7 @@ static int do_md_run(mddev_t *mddev) } set_capacity(mddev->gendisk, mddev->array_sectors); revalidate_disk(mddev->gendisk); + mddev->changed = 1; kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); out: return err; @@ -4715,6 +4716,7 @@ static void md_clean(mddev_t *mddev) mddev->sync_speed_min = mddev->sync_speed_max = 0; mddev->recovery = 0; mddev->in_sync = 0; + mddev->changed = 0; mddev->degraded = 0; mddev->safemode = 0; mddev->bitmap_info.offset = 0; @@ -4830,6 +4832,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) set_capacity(disk, 0); mutex_unlock(&mddev->open_mutex); + mddev->changed = 1; revalidate_disk(disk); if (mddev->ro) @@ -6014,7 +6017,7 @@ static int md_open(struct block_device *bdev, fmode_t mode) atomic_inc(&mddev->openers); mutex_unlock(&mddev->open_mutex); - check_disk_size_change(mddev->gendisk, bdev); + check_disk_change(bdev); out: return err; } @@ -6029,6 +6032,21 @@ static int md_release(struct gendisk *disk, fmode_t mode) return 0; } + +static int md_media_changed(struct gendisk *disk) +{ + mddev_t *mddev = disk->private_data; + + return mddev->changed; +} + +static int md_revalidate(struct gendisk *disk) +{ + mddev_t *mddev = disk->private_data; + + mddev->changed = 0; + return 0; +} static const struct block_device_operations md_fops = { .owner = THIS_MODULE, @@ -6039,6 +6057,8 @@ static const struct block_device_operations md_fops = .compat_ioctl = md_compat_ioctl, #endif .getgeo = md_getgeo, + .media_changed = md_media_changed, + .revalidate_disk= md_revalidate, }; static int md_thread(void * arg) diff --git a/drivers/md/md.h b/drivers/md/md.h index 7e90b85..12215d4 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -274,6 +274,8 @@ struct mddev_s atomic_t active; /* general refcount */ atomic_t openers; /* number of active opens */ + int changed; /* True if we might need to + * reread partition info */ int degraded; /* whether md should consider * adding a spare */ -- cgit v1.1