summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-01-24 17:41:50 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2014-01-24 17:41:50 -0800
commit5c85121bf618aece49155f6eea0d0b2c14c1a121 (patch)
tree2991bd0bf74f9e5a3ad4186d64514eee1ceb90cb
parent4d8880a0ee5b3cdf7927c6cf59a164f352e4f436 (diff)
parent7da9d450ab2843bf1db378c156acc6304dbc1c2b (diff)
downloadop-kernel-dev-5c85121bf618aece49155f6eea0d0b2c14c1a121.zip
op-kernel-dev-5c85121bf618aece49155f6eea0d0b2c14c1a121.tar.gz
Merge tag 'md/3.14' of git://neil.brown.name/md
Pull md updates from Neil Brown: "All bug fixes, two tagged for -stable" * tag 'md/3.14' of git://neil.brown.name/md: md/raid5: close recently introduced race in stripe_head management. md/raid5: fix long-standing problem with bitmap handling on write failure. md: check command validity early in md_ioctl(). md: ensure metadata is writen after raid level change. md/raid10: avoid fullsync when not necessary. md: allow a partially recovered device to be hot-added to an array. md: Change handling of save_raid_disk and metadata update during recovery.
-rw-r--r--drivers/md/md.c76
-rw-r--r--drivers/md/raid10.c3
-rw-r--r--drivers/md/raid5.c9
-rw-r--r--include/uapi/linux/raid/md_p.h6
4 files changed, 68 insertions, 26 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 369d919..40c5313 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1173,6 +1173,7 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
desc->raid_disk < mddev->raid_disks */) {
set_bit(In_sync, &rdev->flags);
rdev->raid_disk = desc->raid_disk;
+ rdev->saved_raid_disk = desc->raid_disk;
} else if (desc->state & (1<<MD_DISK_ACTIVE)) {
/* active but not in sync implies recovery up to
* reshape position. We don't know exactly where
@@ -1671,10 +1672,14 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
set_bit(Faulty, &rdev->flags);
break;
default:
+ rdev->saved_raid_disk = role;
if ((le32_to_cpu(sb->feature_map) &
- MD_FEATURE_RECOVERY_OFFSET))
+ MD_FEATURE_RECOVERY_OFFSET)) {
rdev->recovery_offset = le64_to_cpu(sb->recovery_offset);
- else
+ if (!(le32_to_cpu(sb->feature_map) &
+ MD_FEATURE_RECOVERY_BITMAP))
+ rdev->saved_raid_disk = -1;
+ } else
set_bit(In_sync, &rdev->flags);
rdev->raid_disk = role;
break;
@@ -1736,6 +1741,9 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
cpu_to_le32(MD_FEATURE_RECOVERY_OFFSET);
sb->recovery_offset =
cpu_to_le64(rdev->recovery_offset);
+ if (rdev->saved_raid_disk >= 0 && mddev->bitmap)
+ sb->feature_map |=
+ cpu_to_le32(MD_FEATURE_RECOVERY_BITMAP);
}
if (test_bit(Replacement, &rdev->flags))
sb->feature_map |=
@@ -2477,8 +2485,7 @@ repeat:
if (rdev->sb_loaded != 1)
continue; /* no noise on spare devices */
- if (!test_bit(Faulty, &rdev->flags) &&
- rdev->saved_raid_disk == -1) {
+ if (!test_bit(Faulty, &rdev->flags)) {
md_super_write(mddev,rdev,
rdev->sb_start, rdev->sb_size,
rdev->sb_page);
@@ -2494,11 +2501,9 @@ repeat:
rdev->badblocks.size = 0;
}
- } else if (test_bit(Faulty, &rdev->flags))
+ } else
pr_debug("md: %s (skipping faulty)\n",
bdevname(rdev->bdev, b));
- else
- pr_debug("(skipping incremental s/r ");
if (mddev->level == LEVEL_MULTIPATH)
/* only need to write one superblock... */
@@ -2614,6 +2619,8 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
* blocked - sets the Blocked flags
* -blocked - clears the Blocked and possibly simulates an error
* insync - sets Insync providing device isn't active
+ * -insync - clear Insync for a device with a slot assigned,
+ * so that it gets rebuilt based on bitmap
* write_error - sets WriteErrorSeen
* -write_error - clears WriteErrorSeen
*/
@@ -2662,6 +2669,11 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
} else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
set_bit(In_sync, &rdev->flags);
err = 0;
+ } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0) {
+ clear_bit(In_sync, &rdev->flags);
+ rdev->saved_raid_disk = rdev->raid_disk;
+ rdev->raid_disk = -1;
+ err = 0;
} else if (cmd_match(buf, "write_error")) {
set_bit(WriteErrorSeen, &rdev->flags);
err = 0;
@@ -3589,6 +3601,8 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
pers->run(mddev);
set_bit(MD_CHANGE_DEVS, &mddev->flags);
mddev_resume(mddev);
+ if (!mddev->thread)
+ md_update_sb(mddev, 1);
sysfs_notify(&mddev->kobj, NULL, "level");
md_new_event(mddev);
return rv;
@@ -5770,6 +5784,7 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info)
clear_bit(Bitmap_sync, &rdev->flags);
} else
rdev->raid_disk = -1;
+ rdev->saved_raid_disk = rdev->raid_disk;
} else
super_types[mddev->major_version].
validate_super(mddev, rdev);
@@ -5782,11 +5797,6 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info)
return -EINVAL;
}
- if (test_bit(In_sync, &rdev->flags))
- rdev->saved_raid_disk = rdev->raid_disk;
- else
- rdev->saved_raid_disk = -1;
-
clear_bit(In_sync, &rdev->flags); /* just to be sure */
if (info->state & (1<<MD_DISK_WRITEMOSTLY))
set_bit(WriteMostly, &rdev->flags);
@@ -6336,6 +6346,32 @@ static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
return 0;
}
+static inline bool md_ioctl_valid(unsigned int cmd)
+{
+ switch (cmd) {
+ case ADD_NEW_DISK:
+ case BLKROSET:
+ case GET_ARRAY_INFO:
+ case GET_BITMAP_FILE:
+ case GET_DISK_INFO:
+ case HOT_ADD_DISK:
+ case HOT_REMOVE_DISK:
+ case PRINT_RAID_DEBUG:
+ case RAID_AUTORUN:
+ case RAID_VERSION:
+ case RESTART_ARRAY_RW:
+ case RUN_ARRAY:
+ case SET_ARRAY_INFO:
+ case SET_BITMAP_FILE:
+ case SET_DISK_FAULTY:
+ case STOP_ARRAY:
+ case STOP_ARRAY_RO:
+ return true;
+ default:
+ return false;
+ }
+}
+
static int md_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
@@ -6344,6 +6380,9 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
struct mddev *mddev = NULL;
int ro;
+ if (!md_ioctl_valid(cmd))
+ return -ENOTTY;
+
switch (cmd) {
case RAID_VERSION:
case GET_ARRAY_INFO:
@@ -7718,7 +7757,8 @@ static int remove_and_add_spares(struct mddev *mddev,
!test_bit(Bitmap_sync, &rdev->flags)))
continue;
- rdev->recovery_offset = 0;
+ if (rdev->saved_raid_disk < 0)
+ rdev->recovery_offset = 0;
if (mddev->pers->
hot_add_disk(mddev, rdev) == 0) {
if (sysfs_link_rdev(mddev, rdev))
@@ -7938,14 +7978,10 @@ void md_reap_sync_thread(struct mddev *mddev)
mddev->pers->finish_reshape(mddev);
/* If array is no-longer degraded, then any saved_raid_disk
- * information must be scrapped. Also if any device is now
- * In_sync we must scrape the saved_raid_disk for that device
- * do the superblock for an incrementally recovered device
- * written out.
+ * information must be scrapped.
*/
- rdev_for_each(rdev, mddev)
- if (!mddev->degraded ||
- test_bit(In_sync, &rdev->flags))
+ if (!mddev->degraded)
+ rdev_for_each(rdev, mddev)
rdev->saved_raid_disk = -1;
md_update_sb(mddev, 1);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 06eeb99..8d39d63 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -3747,7 +3747,8 @@ static int run(struct mddev *mddev)
!test_bit(In_sync, &disk->rdev->flags)) {
disk->head_position = 0;
mddev->degraded++;
- if (disk->rdev)
+ if (disk->rdev &&
+ disk->rdev->saved_raid_disk < 0)
conf->fullsync = 1;
}
disk->recovery_disabled = mddev->recovery_disabled - 1;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index cbb1571..03f82ab 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -675,8 +675,10 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
|| !conf->inactive_blocked),
*(conf->hash_locks + hash));
conf->inactive_blocked = 0;
- } else
+ } else {
init_stripe(sh, sector, previous);
+ atomic_inc(&sh->count);
+ }
} else {
spin_lock(&conf->device_lock);
if (atomic_read(&sh->count)) {
@@ -695,13 +697,11 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
sh->group = NULL;
}
}
+ atomic_inc(&sh->count);
spin_unlock(&conf->device_lock);
}
} while (sh == NULL);
- if (sh)
- atomic_inc(&sh->count);
-
spin_unlock_irq(conf->hash_locks + hash);
return sh;
}
@@ -2111,6 +2111,7 @@ static void raid5_end_write_request(struct bio *bi, int error)
set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
} else {
if (!uptodate) {
+ set_bit(STRIPE_DEGRADED, &sh->state);
set_bit(WriteErrorSeen, &rdev->flags);
set_bit(R5_WriteError, &sh->dev[i].flags);
if (!test_and_set_bit(WantReplacement, &rdev->flags))
diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h
index f7cf7f3..49f4210 100644
--- a/include/uapi/linux/raid/md_p.h
+++ b/include/uapi/linux/raid/md_p.h
@@ -292,6 +292,9 @@ struct mdp_superblock_1 {
* backwards anyway.
*/
#define MD_FEATURE_NEW_OFFSET 64 /* new_offset must be honoured */
+#define MD_FEATURE_RECOVERY_BITMAP 128 /* recovery that is happening
+ * is guided by bitmap.
+ */
#define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \
|MD_FEATURE_RECOVERY_OFFSET \
|MD_FEATURE_RESHAPE_ACTIVE \
@@ -299,6 +302,7 @@ struct mdp_superblock_1 {
|MD_FEATURE_REPLACEMENT \
|MD_FEATURE_RESHAPE_BACKWARDS \
|MD_FEATURE_NEW_OFFSET \
+ |MD_FEATURE_RECOVERY_BITMAP \
)
-#endif
+#endif
OpenPOWER on IntegriCloud