From 70bcecdb1534a7dcd82503b705c27a048d568c9d Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Fri, 21 Aug 2015 10:33:39 -0500 Subject: md-cluster: Improve md_reload_sb to be less error prone md_reload_sb is too simplistic and it explicitly needs to determine the changes made by the writing node. However, there are multiple areas where a simple reload could fail. Instead, read the superblock of one of the "good" rdevs and update the necessary information: - read the superblock into a newly allocated page, by temporarily swapping out rdev->sb_page and calling ->load_super. - if that fails return - if it succeeds, call check_sb_changes 1. iterates over list of active devices and checks the matching dev_roles[] value. If that is 'faulty', the device must be marked as faulty - call md_error to mark the device as faulty. Make sure not to set CHANGE_DEVS and wakeup mddev->thread or else it would initiate a resync process, which is the responsibility of the "primary" node. - clear the Blocked bit - Call remove_and_add_spares() to hot remove the device. If the device is 'spare': - call remove_and_add_spares() to get the number of spares added in this operation. - Reduce mddev->degraded to mark the array as not degraded. 2. reset recovery_cp - read the rest of the rdevs to update recovery_offset. If recovery_offset is equal to MaxSector, call spare_active() to set it In_sync This required that recovery_offset be initialized to MaxSector, as opposed to zero so as to communicate the end of sync for a rdev. Signed-off-by: Goldwyn Rodrigues --- drivers/md/md-cluster.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'drivers/md/md-cluster.c') diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 58eadc0..2eb3a50 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -427,8 +427,7 @@ static void process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg) static void process_metadata_update(struct mddev *mddev, struct cluster_msg *msg) { struct md_cluster_info *cinfo = mddev->cluster_info; - - md_reload_sb(mddev); + md_reload_sb(mddev, le32_to_cpu(msg->raid_slot)); dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR); } @@ -834,11 +833,23 @@ static int metadata_update_finish(struct mddev *mddev) { struct md_cluster_info *cinfo = mddev->cluster_info; struct cluster_msg cmsg; - int ret; + struct md_rdev *rdev; + int ret = 0; memset(&cmsg, 0, sizeof(cmsg)); cmsg.type = cpu_to_le32(METADATA_UPDATED); - ret = __sendmsg(cinfo, &cmsg); + cmsg.raid_slot = -1; + /* Pick up a good active device number to send. + */ + rdev_for_each(rdev, mddev) + if (rdev->raid_disk > -1 && !test_bit(Faulty, &rdev->flags)) { + cmsg.raid_slot = cpu_to_le32(rdev->desc_nr); + break; + } + if (cmsg.raid_slot >= 0) + ret = __sendmsg(cinfo, &cmsg); + else + pr_warn("md-cluster: No good device id found to send\n"); unlock_comm(cinfo); return ret; } @@ -922,15 +933,9 @@ static int add_new_disk_start(struct mddev *mddev, struct md_rdev *rdev) static int add_new_disk_finish(struct mddev *mddev) { - struct cluster_msg cmsg; - struct md_cluster_info *cinfo = mddev->cluster_info; - int ret; /* Write sb and inform others */ md_update_sb(mddev, 1); - cmsg.type = METADATA_UPDATED; - ret = __sendmsg(cinfo, &cmsg); - unlock_comm(cinfo); - return ret; + return metadata_update_finish(mddev); } static int new_disk_ack(struct mddev *mddev, bool ack) -- cgit v1.1