diff options
author | Stefan Behrens <sbehrens@giantdisaster.de> | 2012-11-06 13:15:27 +0100 |
---|---|---|
committer | Josef Bacik <jbacik@fusionio.com> | 2012-12-12 17:15:42 -0500 |
commit | 8dabb7420f014ab0f9f04afae8ae046c0f48b270 (patch) | |
tree | 6342f353ac71003d749a776dd6dc6a18b1bd959b | |
parent | e93c89c1aaaaaec3487c4c18dd02360371790722 (diff) | |
download | op-kernel-dev-8dabb7420f014ab0f9f04afae8ae046c0f48b270.zip op-kernel-dev-8dabb7420f014ab0f9f04afae8ae046c0f48b270.tar.gz |
Btrfs: change core code of btrfs to support the device replace operations
This commit contains all the essential changes to the core code
of Btrfs for support of the device replace procedure.
Signed-off-by: Stefan Behrens <sbehrens@giantdisaster.de>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
-rw-r--r-- | fs/btrfs/disk-io.c | 24 | ||||
-rw-r--r-- | fs/btrfs/reada.c | 17 | ||||
-rw-r--r-- | fs/btrfs/scrub.c | 7 | ||||
-rw-r--r-- | fs/btrfs/super.c | 13 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 7 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 54 | ||||
-rw-r--r-- | fs/btrfs/volumes.h | 3 |
7 files changed, 111 insertions, 14 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0e41047..76b8250 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -45,6 +45,7 @@ #include "inode-map.h" #include "check-integrity.h" #include "rcu-string.h" +#include "dev-replace.h" #ifdef CONFIG_X86 #include <asm/cpufeature.h> @@ -2438,7 +2439,11 @@ int open_ctree(struct super_block *sb, goto fail_tree_roots; } - btrfs_close_extra_devices(fs_devices); + /* + * keep the device that is marked to be the target device for the + * dev_replace procedure + */ + btrfs_close_extra_devices(fs_info, fs_devices, 0); if (!fs_devices->latest_bdev) { printk(KERN_CRIT "btrfs: failed to read devices on %s\n", @@ -2510,6 +2515,14 @@ retry_root_backup: goto fail_block_groups; } + ret = btrfs_init_dev_replace(fs_info); + if (ret) { + pr_err("btrfs: failed to init dev_replace: %d\n", ret); + goto fail_block_groups; + } + + btrfs_close_extra_devices(fs_info, fs_devices, 1); + ret = btrfs_init_space_info(fs_info); if (ret) { printk(KERN_ERR "Failed to initial space info: %d\n", ret); @@ -2658,6 +2671,13 @@ retry_root_backup: return ret; } + ret = btrfs_resume_dev_replace_async(fs_info); + if (ret) { + pr_warn("btrfs: failed to resume dev_replace\n"); + close_ctree(tree_root); + return ret; + } + return 0; fail_qgroup: @@ -3300,6 +3320,8 @@ int close_ctree(struct btrfs_root *root) /* pause restriper - we want to resume on mount */ btrfs_pause_balance(fs_info); + btrfs_dev_replace_suspend_for_unmount(fs_info); + btrfs_scrub_cancel(fs_info); /* wait for any defraggers to finish */ diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 9f363e1..c705a48 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -27,6 +27,7 @@ #include "volumes.h" #include "disk-io.h" #include "transaction.h" +#include "dev-replace.h" #undef DEBUG @@ -331,6 +332,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, int nzones = 0; int i; unsigned long index = logical >> PAGE_CACHE_SHIFT; + int dev_replace_is_ongoing; spin_lock(&fs_info->reada_lock); re = radix_tree_lookup(&fs_info->reada_tree, index); @@ -392,6 +394,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, } /* insert extent in reada_tree + all per-device trees, all or nothing */ + btrfs_dev_replace_lock(&fs_info->dev_replace); spin_lock(&fs_info->reada_lock); ret = radix_tree_insert(&fs_info->reada_tree, index, re); if (ret == -EEXIST) { @@ -399,13 +402,17 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, BUG_ON(!re_exist); re_exist->refcnt++; spin_unlock(&fs_info->reada_lock); + btrfs_dev_replace_unlock(&fs_info->dev_replace); goto error; } if (ret) { spin_unlock(&fs_info->reada_lock); + btrfs_dev_replace_unlock(&fs_info->dev_replace); goto error; } prev_dev = NULL; + dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing( + &fs_info->dev_replace); for (i = 0; i < nzones; ++i) { dev = bbio->stripes[i].dev; if (dev == prev_dev) { @@ -422,6 +429,14 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, /* cannot read ahead on missing device */ continue; } + if (dev_replace_is_ongoing && + dev == fs_info->dev_replace.tgtdev) { + /* + * as this device is selected for reading only as + * a last resort, skip it for read ahead. + */ + continue; + } prev_dev = dev; ret = radix_tree_insert(&dev->reada_extents, index, re); if (ret) { @@ -434,10 +449,12 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, BUG_ON(fs_info == NULL); radix_tree_delete(&fs_info->reada_tree, index); spin_unlock(&fs_info->reada_lock); + btrfs_dev_replace_unlock(&fs_info->dev_replace); goto error; } } spin_unlock(&fs_info->reada_lock); + btrfs_dev_replace_unlock(&fs_info->dev_replace); kfree(bbio); return re; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 61157a2..30cbf69 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2843,12 +2843,17 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, return -EIO; } - if (dev->scrub_device) { + btrfs_dev_replace_lock(&fs_info->dev_replace); + if (dev->scrub_device || + (!is_dev_replace && + btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) { + btrfs_dev_replace_unlock(&fs_info->dev_replace); mutex_unlock(&fs_info->scrub_lock); mutex_unlock(&fs_info->fs_devices->device_list_mutex); scrub_workers_put(fs_info); return -EINPROGRESS; } + btrfs_dev_replace_unlock(&fs_info->dev_replace); sctx = scrub_setup_ctx(dev, is_dev_replace); if (IS_ERR(sctx)) { mutex_unlock(&fs_info->scrub_lock); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index ad43806..def4f24 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -55,6 +55,7 @@ #include "export.h" #include "compression.h" #include "rcu-string.h" +#include "dev-replace.h" #define CREATE_TRACE_POINTS #include <trace/events/btrfs.h> @@ -1225,8 +1226,15 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) return 0; if (*flags & MS_RDONLY) { + /* + * this also happens on 'umount -rf' or on shutdown, when + * the filesystem is busy. + */ sb->s_flags |= MS_RDONLY; + btrfs_dev_replace_suspend_for_unmount(fs_info); + btrfs_scrub_cancel(fs_info); + ret = btrfs_commit_super(root); if (ret) goto restore; @@ -1263,6 +1271,11 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) if (ret) goto restore; + ret = btrfs_resume_dev_replace_async(fs_info); + if (ret) { + pr_warn("btrfs: failed to resume dev_replace\n"); + goto restore; + } sb->s_flags &= ~MS_RDONLY; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 7b29735..bcc6b65 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -30,6 +30,7 @@ #include "tree-log.h" #include "inode-map.h" #include "volumes.h" +#include "dev-replace.h" #define BTRFS_ROOT_TRANS_TAG 0 @@ -845,7 +846,9 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, return ret; ret = btrfs_run_dev_stats(trans, root->fs_info); - BUG_ON(ret); + WARN_ON(ret); + ret = btrfs_run_dev_replace(trans, root->fs_info); + WARN_ON(ret); ret = btrfs_run_qgroups(trans, root->fs_info); BUG_ON(ret); @@ -868,6 +871,8 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, switch_commit_root(fs_info->extent_root); up_write(&fs_info->extent_commit_sem); + btrfs_after_dev_replace_commit(fs_info); + return 0; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5777e6a..a4e0963 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -36,6 +36,7 @@ #include "check-integrity.h" #include "rcu-string.h" #include "math.h" +#include "dev-replace.h" static int init_first_rw_device(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -505,7 +506,8 @@ error: return ERR_PTR(-ENOMEM); } -void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) +void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info, + struct btrfs_fs_devices *fs_devices, int step) { struct btrfs_device *device, *next; @@ -528,6 +530,21 @@ again: continue; } + if (device->devid == BTRFS_DEV_REPLACE_DEVID) { + /* + * In the first step, keep the device which has + * the correct fsid and the devid that is used + * for the dev_replace procedure. + * In the second step, the dev_replace state is + * read from the device tree and it is known + * whether the procedure is really active or + * not, which means whether this device is + * used or whether it should be removed. + */ + if (step == 0 || device->is_tgtdev_for_dev_replace) { + continue; + } + } if (device->bdev) { blkdev_put(device->bdev, device->mode); device->bdev = NULL; @@ -536,7 +553,8 @@ again: if (device->writeable) { list_del_init(&device->dev_alloc_list); device->writeable = 0; - fs_devices->rw_devices--; + if (!device->is_tgtdev_for_dev_replace) + fs_devices->rw_devices--; } list_del_init(&device->dev_list); fs_devices->num_devices--; @@ -594,7 +612,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) if (device->bdev) fs_devices->open_devices--; - if (device->writeable) { + if (device->writeable && !device->is_tgtdev_for_dev_replace) { list_del_init(&device->dev_alloc_list); fs_devices->rw_devices--; } @@ -718,7 +736,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, fs_devices->rotating = 1; fs_devices->open_devices++; - if (device->writeable) { + if (device->writeable && !device->is_tgtdev_for_dev_replace) { fs_devices->rw_devices++; list_add(&device->dev_alloc_list, &fs_devices->alloc_list); @@ -1350,16 +1368,22 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) root->fs_info->avail_system_alloc_bits | root->fs_info->avail_metadata_alloc_bits; - if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && - root->fs_info->fs_devices->num_devices <= 4) { + num_devices = root->fs_info->fs_devices->num_devices; + btrfs_dev_replace_lock(&root->fs_info->dev_replace); + if (btrfs_dev_replace_is_ongoing(&root->fs_info->dev_replace)) { + WARN_ON(num_devices < 1); + num_devices--; + } + btrfs_dev_replace_unlock(&root->fs_info->dev_replace); + + if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) { printk(KERN_ERR "btrfs: unable to go below four devices " "on raid10\n"); ret = -EINVAL; goto out; } - if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && - root->fs_info->fs_devices->num_devices <= 2) { + if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && num_devices <= 2) { printk(KERN_ERR "btrfs: unable to go below two " "devices on raid1\n"); ret = -EINVAL; @@ -2935,6 +2959,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl, u64 allowed; int mixed = 0; int ret; + u64 num_devices; if (btrfs_fs_closing(fs_info) || atomic_read(&fs_info->balance_pause_req) || @@ -2963,10 +2988,17 @@ int btrfs_balance(struct btrfs_balance_control *bctl, } } + num_devices = fs_info->fs_devices->num_devices; + btrfs_dev_replace_lock(&fs_info->dev_replace); + if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) { + BUG_ON(num_devices < 1); + num_devices--; + } + btrfs_dev_replace_unlock(&fs_info->dev_replace); allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE; - if (fs_info->fs_devices->num_devices == 1) + if (num_devices == 1) allowed |= BTRFS_BLOCK_GROUP_DUP; - else if (fs_info->fs_devices->num_devices < 4) + else if (num_devices < 4) allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1); else allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | @@ -3591,6 +3623,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, devices_info[ndevs].total_avail = total_avail; devices_info[ndevs].dev = device; ++ndevs; + WARN_ON(ndevs > fs_devices->rw_devices); } /* @@ -4773,6 +4806,7 @@ static void fill_device_from_item(struct extent_buffer *leaf, device->io_align = btrfs_device_io_align(leaf, dev_item); device->io_width = btrfs_device_io_width(leaf, dev_item); device->sector_size = btrfs_device_sector_size(leaf, dev_item); + WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID); device->is_tgtdev_for_dev_replace = 0; ptr = (unsigned long)btrfs_device_uuid(dev_item); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 58d7937..37d0157 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -268,7 +268,8 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, struct btrfs_fs_devices **fs_devices_ret); int btrfs_close_devices(struct btrfs_fs_devices *fs_devices); -void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices); +void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info, + struct btrfs_fs_devices *fs_devices, int step); int btrfs_find_device_missing_or_by_path(struct btrfs_root *root, char *device_path, struct btrfs_device **device); |