summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGuoqing Jiang <gqjiang@suse.com>2016-05-02 11:33:08 -0400
committerShaohua Li <shli@fb.com>2016-05-04 12:39:35 -0700
commit41a9a0dcf8954654467f979838938e39ef4da590 (patch)
treee36b1195c3ca80d7b795a5d4211a23c4f9540c2e
parent4810d9682971e8eee659f96e4f9d9154e3c6c0b4 (diff)
downloadop-kernel-dev-41a9a0dcf8954654467f979838938e39ef4da590.zip
op-kernel-dev-41a9a0dcf8954654467f979838938e39ef4da590.tar.gz
md-cluster: change resync lock from asynchronous to synchronous
If multiple nodes choose to attempt do resync at the same time they need to be serialized so they don't duplicate effort. This serialization is done by locking the 'resync' DLM lock. Currently if a node cannot get the lock immediately it doesn't request notification when the lock becomes available (i.e. DLM_LKF_NOQUEUE is set), so it may not reliably find out when it is safe to try again. Rather than trying to arrange an async wake-up when the lock becomes available, switch to using synchronous locking - this is a lot easier to think about. As it is not permitted to block in the 'raid1d' thread, move the locking to the resync thread. So the rsync thread is forked immediately, but it blocks until the resync lock is available. Once the lock is locked it checks again if any resync action is needed. A particular symptom of the current problem is that a node can get stuck with "resync=pending" indefinitely. Reviewed-by: NeilBrown <neilb@suse.com> Signed-off-by: Guoqing Jiang <gqjiang@suse.com> Signed-off-by: Shaohua Li <shli@fb.com>
-rw-r--r--drivers/md/md-cluster.c2
-rw-r--r--drivers/md/md.c23
2 files changed, 14 insertions, 11 deletions
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index dd97d42..12fbfec 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -937,7 +937,6 @@ static void metadata_update_cancel(struct mddev *mddev)
static int resync_start(struct mddev *mddev)
{
struct md_cluster_info *cinfo = mddev->cluster_info;
- cinfo->resync_lockres->flags |= DLM_LKF_NOQUEUE;
return dlm_lock_sync(cinfo->resync_lockres, DLM_LOCK_EX);
}
@@ -967,7 +966,6 @@ static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
static int resync_finish(struct mddev *mddev)
{
struct md_cluster_info *cinfo = mddev->cluster_info;
- cinfo->resync_lockres->flags &= ~DLM_LKF_NOQUEUE;
dlm_unlock_sync(cinfo->resync_lockres);
return resync_info_update(mddev, 0, 0);
}
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 14d3b37..4fd7d77 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -7786,6 +7786,7 @@ void md_do_sync(struct md_thread *thread)
char *desc, *action = NULL;
struct blk_plug plug;
bool cluster_resync_finished = false;
+ int ret;
/* just incase thread restarts... */
if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
@@ -7795,6 +7796,19 @@ void md_do_sync(struct md_thread *thread)
return;
}
+ if (mddev_is_clustered(mddev)) {
+ ret = md_cluster_ops->resync_start(mddev);
+ if (ret)
+ goto skip;
+
+ if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
+ test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||
+ test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
+ && ((unsigned long long)mddev->curr_resync_completed
+ < (unsigned long long)mddev->resync_max_sectors))
+ goto skip;
+ }
+
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
desc = "data-check";
@@ -8226,18 +8240,9 @@ static void md_start_sync(struct work_struct *ws)
struct mddev *mddev = container_of(ws, struct mddev, del_work);
int ret = 0;
- if (mddev_is_clustered(mddev)) {
- ret = md_cluster_ops->resync_start(mddev);
- if (ret) {
- mddev->sync_thread = NULL;
- goto out;
- }
- }
-
mddev->sync_thread = md_register_thread(md_do_sync,
mddev,
"resync");
-out:
if (!mddev->sync_thread) {
if (!(mddev_is_clustered(mddev) && ret == -EAGAIN))
printk(KERN_ERR "%s: could not start resync"
OpenPOWER on IntegriCloud