diff options
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r-- | fs/btrfs/volumes.c | 579 |
1 files changed, 484 insertions, 95 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index ef41f28..7782020 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -23,6 +23,7 @@ #include <linux/random.h> #include <linux/iocontext.h> #include <linux/capability.h> +#include <linux/ratelimit.h> #include <linux/kthread.h> #include <asm/div64.h> #include "compat.h" @@ -39,6 +40,8 @@ static int init_first_rw_device(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_device *device); static int btrfs_relocate_sys_chunks(struct btrfs_root *root); +static void __btrfs_reset_dev_stats(struct btrfs_device *dev); +static void btrfs_dev_stat_print_on_load(struct btrfs_device *device); static DEFINE_MUTEX(uuid_mutex); static LIST_HEAD(fs_uuids); @@ -67,7 +70,7 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices) kfree(fs_devices); } -int btrfs_cleanup_fs_uuids(void) +void btrfs_cleanup_fs_uuids(void) { struct btrfs_fs_devices *fs_devices; @@ -77,7 +80,6 @@ int btrfs_cleanup_fs_uuids(void) list_del(&fs_devices->list); free_fs_devices(fs_devices); } - return 0; } static noinline struct btrfs_device *__find_device(struct list_head *head, @@ -130,7 +132,7 @@ static void requeue_list(struct btrfs_pending_bios *pending_bios, * the list if the block device is congested. This way, multiple devices * can make progress from a single worker thread. */ -static noinline int run_scheduled_bios(struct btrfs_device *device) +static noinline void run_scheduled_bios(struct btrfs_device *device) { struct bio *pending; struct backing_dev_info *bdi; @@ -316,7 +318,6 @@ loop_lock: done: blk_finish_plug(&plug); - return 0; } static void pending_bios_fn(struct btrfs_work *work) @@ -363,6 +364,7 @@ static noinline int device_list_add(const char *path, return -ENOMEM; } device->devid = devid; + device->dev_stats_valid = 0; device->work.func = pending_bios_fn; memcpy(device->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE); @@ -455,7 +457,7 @@ error: return ERR_PTR(-ENOMEM); } -int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) +void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) { struct btrfs_device *device, *next; @@ -503,7 +505,6 @@ again: fs_devices->latest_trans = latest_transid; mutex_unlock(&uuid_mutex); - return 0; } static void __free_device(struct work_struct *work) @@ -552,10 +553,10 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) fs_devices->num_can_discard--; new_device = kmalloc(sizeof(*new_device), GFP_NOFS); - BUG_ON(!new_device); + BUG_ON(!new_device); /* -ENOMEM */ memcpy(new_device, device, sizeof(*new_device)); new_device->name = kstrdup(device->name, GFP_NOFS); - BUG_ON(device->name && !new_device->name); + BUG_ON(device->name && !new_device->name); /* -ENOMEM */ new_device->bdev = NULL; new_device->writeable = 0; new_device->in_fs_metadata = 0; @@ -625,6 +626,8 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, printk(KERN_INFO "open %s failed\n", device->name); goto error; } + filemap_write_and_wait(bdev->bd_inode->i_mapping); + invalidate_bdev(bdev); set_blocksize(bdev, 4096); bh = btrfs_read_dev_super(bdev); @@ -1039,8 +1042,10 @@ again: leaf = path->nodes[0]; extent = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_extent); + } else { + btrfs_error(root->fs_info, ret, "Slot search failed"); + goto out; } - BUG_ON(ret); if (device->bytes_used > 0) { u64 len = btrfs_dev_extent_length(leaf, extent); @@ -1050,7 +1055,10 @@ again: spin_unlock(&root->fs_info->free_chunk_lock); } ret = btrfs_del_item(trans, root, path); - + if (ret) { + btrfs_error(root->fs_info, ret, + "Failed to remove dev extent item"); + } out: btrfs_free_path(path); return ret; @@ -1078,7 +1086,8 @@ int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, key.type = BTRFS_DEV_EXTENT_KEY; ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*extent)); - BUG_ON(ret); + if (ret) + goto out; leaf = path->nodes[0]; extent = btrfs_item_ptr(leaf, path->slots[0], @@ -1093,6 +1102,7 @@ int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, btrfs_set_dev_extent_length(leaf, extent, num_bytes); btrfs_mark_buffer_dirty(leaf); +out: btrfs_free_path(path); return ret; } @@ -1118,7 +1128,7 @@ static noinline int find_next_chunk(struct btrfs_root *root, if (ret < 0) goto error; - BUG_ON(ret == 0); + BUG_ON(ret == 0); /* Corruption */ ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY); if (ret) { @@ -1162,7 +1172,7 @@ static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid) if (ret < 0) goto error; - BUG_ON(ret == 0); + BUG_ON(ret == 0); /* Corruption */ ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID, BTRFS_DEV_ITEM_KEY); @@ -1350,6 +1360,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) } set_blocksize(bdev, 4096); + invalidate_bdev(bdev); bh = btrfs_read_dev_super(bdev); if (!bh) { ret = -EINVAL; @@ -1596,7 +1607,7 @@ next_slot: (unsigned long)btrfs_device_fsid(dev_item), BTRFS_UUID_SIZE); device = btrfs_find_device(root, devid, dev_uuid, fs_uuid); - BUG_ON(!device); + BUG_ON(!device); /* Logic error */ if (device->fs_devices->seeding) { btrfs_set_device_generation(leaf, dev_item, @@ -1626,7 +1637,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) int ret = 0; if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding) - return -EINVAL; + return -EROFS; bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, root->fs_info->bdev_holder); @@ -1706,7 +1717,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) if (seeding_dev) { sb->s_flags &= ~MS_RDONLY; ret = btrfs_prepare_sprout(root); - BUG_ON(ret); + BUG_ON(ret); /* -ENOMEM */ } device->fs_devices = root->fs_info->fs_devices; @@ -1744,11 +1755,15 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) if (seeding_dev) { ret = init_first_rw_device(trans, root, device); - BUG_ON(ret); + if (ret) + goto error_trans; ret = btrfs_finish_sprout(trans, root); - BUG_ON(ret); + if (ret) + goto error_trans; } else { ret = btrfs_add_device(trans, root, device); + if (ret) + goto error_trans; } /* @@ -1758,17 +1773,31 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) btrfs_clear_space_info_full(root->fs_info); unlock_chunks(root); - btrfs_commit_transaction(trans, root); + ret = btrfs_commit_transaction(trans, root); if (seeding_dev) { mutex_unlock(&uuid_mutex); up_write(&sb->s_umount); + if (ret) /* transaction commit */ + return ret; + ret = btrfs_relocate_sys_chunks(root); - BUG_ON(ret); + if (ret < 0) + btrfs_error(root->fs_info, ret, + "Failed to relocate sys chunks after " + "device initialization. This can be fixed " + "using the \"btrfs balance\" command."); } return ret; + +error_trans: + unlock_chunks(root); + btrfs_abort_transaction(trans, root, ret); + btrfs_end_transaction(trans, root); + kfree(device->name); + kfree(device); error: blkdev_put(bdev, FMODE_EXCL); if (seeding_dev) { @@ -1876,10 +1905,20 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans, key.type = BTRFS_CHUNK_ITEM_KEY; ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - BUG_ON(ret); + if (ret < 0) + goto out; + else if (ret > 0) { /* Logic error or corruption */ + btrfs_error(root->fs_info, -ENOENT, + "Failed lookup while freeing chunk."); + ret = -ENOENT; + goto out; + } ret = btrfs_del_item(trans, root, path); - + if (ret < 0) + btrfs_error(root->fs_info, ret, + "Failed to delete chunk item."); +out: btrfs_free_path(path); return ret; } @@ -2041,7 +2080,7 @@ again: ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); if (ret < 0) goto error; - BUG_ON(ret == 0); + BUG_ON(ret == 0); /* Corruption */ ret = btrfs_previous_item(chunk_root, path, key.objectid, key.type); @@ -2250,15 +2289,13 @@ static void unset_balance_control(struct btrfs_fs_info *fs_info) * Balance filters. Return 1 if chunk should be filtered out * (should not be balanced). */ -static int chunk_profiles_filter(u64 chunk_profile, +static int chunk_profiles_filter(u64 chunk_type, struct btrfs_balance_args *bargs) { - chunk_profile &= BTRFS_BLOCK_GROUP_PROFILE_MASK; - - if (chunk_profile == 0) - chunk_profile = BTRFS_AVAIL_ALLOC_BIT_SINGLE; + chunk_type = chunk_to_extended(chunk_type) & + BTRFS_EXTENDED_PROFILE_MASK; - if (bargs->profiles & chunk_profile) + if (bargs->profiles & chunk_type) return 0; return 1; @@ -2365,18 +2402,16 @@ static int chunk_vrange_filter(struct extent_buffer *leaf, return 1; } -static int chunk_soft_convert_filter(u64 chunk_profile, +static int chunk_soft_convert_filter(u64 chunk_type, struct btrfs_balance_args *bargs) { if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT)) return 0; - chunk_profile &= BTRFS_BLOCK_GROUP_PROFILE_MASK; + chunk_type = chunk_to_extended(chunk_type) & + BTRFS_EXTENDED_PROFILE_MASK; - if (chunk_profile == 0) - chunk_profile = BTRFS_AVAIL_ALLOC_BIT_SINGLE; - - if (bargs->target & chunk_profile) + if (bargs->target == chunk_type) return 1; return 0; @@ -2602,6 +2637,30 @@ error: return ret; } +/** + * alloc_profile_is_valid - see if a given profile is valid and reduced + * @flags: profile to validate + * @extended: if true @flags is treated as an extended profile + */ +static int alloc_profile_is_valid(u64 flags, int extended) +{ + u64 mask = (extended ? BTRFS_EXTENDED_PROFILE_MASK : + BTRFS_BLOCK_GROUP_PROFILE_MASK); + + flags &= ~BTRFS_BLOCK_GROUP_TYPE_MASK; + + /* 1) check that all other bits are zeroed */ + if (flags & ~mask) + return 0; + + /* 2) see if profile is reduced */ + if (flags == 0) + return !extended; /* "0" is valid for usual profiles */ + + /* true if exactly one bit set */ + return (flags & (flags - 1)) == 0; +} + static inline int balance_need_close(struct btrfs_fs_info *fs_info) { /* cancel requested || normal exit path */ @@ -2630,6 +2689,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl, { struct btrfs_fs_info *fs_info = bctl->fs_info; u64 allowed; + int mixed = 0; int ret; if (btrfs_fs_closing(fs_info) || @@ -2639,13 +2699,16 @@ int btrfs_balance(struct btrfs_balance_control *bctl, goto out; } + allowed = btrfs_super_incompat_flags(fs_info->super_copy); + if (allowed & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) + mixed = 1; + /* * In case of mixed groups both data and meta should be picked, * and identical options should be given for both of them. */ - allowed = btrfs_super_incompat_flags(fs_info->super_copy); - if ((allowed & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) && - (bctl->flags & (BTRFS_BALANCE_DATA | BTRFS_BALANCE_METADATA))) { + allowed = BTRFS_BALANCE_DATA | BTRFS_BALANCE_METADATA; + if (mixed && (bctl->flags & allowed)) { if (!(bctl->flags & BTRFS_BALANCE_DATA) || !(bctl->flags & BTRFS_BALANCE_METADATA) || memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) { @@ -2656,14 +2719,6 @@ int btrfs_balance(struct btrfs_balance_control *bctl, } } - /* - * Profile changing sanity checks. Skip them if a simple - * balance is requested. - */ - if (!((bctl->data.flags | bctl->sys.flags | bctl->meta.flags) & - BTRFS_BALANCE_ARGS_CONVERT)) - goto do_balance; - allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE; if (fs_info->fs_devices->num_devices == 1) allowed |= BTRFS_BLOCK_GROUP_DUP; @@ -2673,24 +2728,27 @@ int btrfs_balance(struct btrfs_balance_control *bctl, allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10); - if (!profile_is_valid(bctl->data.target, 1) || - bctl->data.target & ~allowed) { + if ((bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) && + (!alloc_profile_is_valid(bctl->data.target, 1) || + (bctl->data.target & ~allowed))) { printk(KERN_ERR "btrfs: unable to start balance with target " "data profile %llu\n", (unsigned long long)bctl->data.target); ret = -EINVAL; goto out; } - if (!profile_is_valid(bctl->meta.target, 1) || - bctl->meta.target & ~allowed) { + if ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) && + (!alloc_profile_is_valid(bctl->meta.target, 1) || + (bctl->meta.target & ~allowed))) { printk(KERN_ERR "btrfs: unable to start balance with target " "metadata profile %llu\n", (unsigned long long)bctl->meta.target); ret = -EINVAL; goto out; } - if (!profile_is_valid(bctl->sys.target, 1) || - bctl->sys.target & ~allowed) { + if ((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) && + (!alloc_profile_is_valid(bctl->sys.target, 1) || + (bctl->sys.target & ~allowed))) { printk(KERN_ERR "btrfs: unable to start balance with target " "system profile %llu\n", (unsigned long long)bctl->sys.target); @@ -2698,7 +2756,9 @@ int btrfs_balance(struct btrfs_balance_control *bctl, goto out; } - if (bctl->data.target & BTRFS_BLOCK_GROUP_DUP) { + /* allow dup'ed data chunks only in mixed mode */ + if (!mixed && (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) && + (bctl->data.target & BTRFS_BLOCK_GROUP_DUP)) { printk(KERN_ERR "btrfs: dup for data is not allowed\n"); ret = -EINVAL; goto out; @@ -2724,7 +2784,6 @@ int btrfs_balance(struct btrfs_balance_control *bctl, } } -do_balance: ret = insert_balance_item(fs_info->tree_root, bctl); if (ret && ret != -EEXIST) goto out; @@ -2967,7 +3026,7 @@ again: key.offset = (u64)-1; key.type = BTRFS_DEV_EXTENT_KEY; - while (1) { + do { ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto done; @@ -3009,8 +3068,7 @@ again: goto done; if (ret == -ENOSPC) failed++; - key.offset -= 1; - } + } while (key.offset-- > 0); if (failed && !retried) { failed = 0; @@ -3128,11 +3186,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, int i; int j; - if ((type & BTRFS_BLOCK_GROUP_RAID1) && - (type & BTRFS_BLOCK_GROUP_DUP)) { - WARN_ON(1); - type &= ~BTRFS_BLOCK_GROUP_DUP; - } + BUG_ON(!alloc_profile_is_valid(type, 0)); if (list_empty(&fs_devices->alloc_list)) return -ENOSPC; @@ -3274,12 +3328,14 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, stripe_size = devices_info[ndevs-1].max_avail; num_stripes = ndevs * dev_stripes; - if (stripe_size * num_stripes > max_chunk_size * ncopies) { + if (stripe_size * ndevs > max_chunk_size * ncopies) { stripe_size = max_chunk_size * ncopies; - do_div(stripe_size, num_stripes); + do_div(stripe_size, ndevs); } do_div(stripe_size, dev_stripes); + + /* align to BTRFS_STRIPE_LEN */ do_div(stripe_size, BTRFS_STRIPE_LEN); stripe_size *= BTRFS_STRIPE_LEN; @@ -3328,13 +3384,15 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em); write_unlock(&em_tree->lock); - BUG_ON(ret); free_extent_map(em); + if (ret) + goto error; ret = btrfs_make_block_group(trans, extent_root, 0, type, BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes); - BUG_ON(ret); + if (ret) + goto error; for (i = 0; i < map->num_stripes; ++i) { struct btrfs_device *device; @@ -3347,7 +3405,10 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, info->chunk_root->root_key.objectid, BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, dev_offset, stripe_size); - BUG_ON(ret); + if (ret) { + btrfs_abort_transaction(trans, extent_root, ret); + goto error; + } } kfree(devices_info); @@ -3383,7 +3444,8 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, device = map->stripes[index].dev; device->bytes_used += stripe_size; ret = btrfs_update_device(trans, device); - BUG_ON(ret); + if (ret) + goto out_free; index++; } @@ -3420,16 +3482,19 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, key.offset = chunk_offset; ret = btrfs_insert_item(trans, chunk_root, &key, chunk, item_size); - BUG_ON(ret); - if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { + if (ret == 0 && map->type & BTRFS_BLOCK_GROUP_SYSTEM) { + /* + * TODO: Cleanup of inserted chunk root in case of + * failure. + */ ret = btrfs_add_system_chunk(chunk_root, &key, chunk, item_size); - BUG_ON(ret); } +out_free: kfree(chunk); - return 0; + return ret; } /* @@ -3461,7 +3526,8 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset, chunk_size, stripe_size); - BUG_ON(ret); + if (ret) + return ret; return 0; } @@ -3493,7 +3559,8 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size, &stripe_size, chunk_offset, alloc_profile); - BUG_ON(ret); + if (ret) + return ret; sys_chunk_offset = chunk_offset + chunk_size; @@ -3504,10 +3571,12 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map, &sys_chunk_size, &sys_stripe_size, sys_chunk_offset, alloc_profile); - BUG_ON(ret); + if (ret) + goto abort; ret = btrfs_add_device(trans, fs_info->chunk_root, device); - BUG_ON(ret); + if (ret) + goto abort; /* * Modifying chunk tree needs allocating new blocks from both @@ -3517,13 +3586,20 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, */ ret = __finish_chunk_alloc(trans, extent_root, map, chunk_offset, chunk_size, stripe_size); - BUG_ON(ret); + if (ret) + goto abort; ret = __finish_chunk_alloc(trans, extent_root, sys_map, sys_chunk_offset, sys_chunk_size, sys_stripe_size); - BUG_ON(ret); + if (ret) + goto abort; + return 0; + +abort: + btrfs_abort_transaction(trans, root, ret); + return ret; } int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset) @@ -3735,10 +3811,11 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, else if (mirror_num) stripe_index += mirror_num - 1; else { + int old_stripe_index = stripe_index; stripe_index = find_live_mirror(map, stripe_index, map->sub_stripes, stripe_index + current->pid % map->sub_stripes); - mirror_num = stripe_index + 1; + mirror_num = stripe_index - old_stripe_index + 1; } } else { /* @@ -3763,6 +3840,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, int sub_stripes = 0; u64 stripes_per_dev = 0; u32 remaining_stripes = 0; + u32 last_stripe = 0; if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)) { @@ -3776,6 +3854,8 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, stripe_nr_orig, factor, &remaining_stripes); + div_u64_rem(stripe_nr_end - 1, factor, &last_stripe); + last_stripe *= sub_stripes; } for (i = 0; i < num_stripes; i++) { @@ -3788,16 +3868,29 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, BTRFS_BLOCK_GROUP_RAID10)) { bbio->stripes[i].length = stripes_per_dev * map->stripe_len; + if (i / sub_stripes < remaining_stripes) bbio->stripes[i].length += map->stripe_len; + + /* + * Special for the first stripe and + * the last stripe: + * + * |-------|...|-------| + * |----------| + * off end_off + */ if (i < sub_stripes) bbio->stripes[i].length -= stripe_offset; - if ((i / sub_stripes + 1) % - sub_stripes == remaining_stripes) + + if (stripe_index >= last_stripe && + stripe_index <= (last_stripe + + sub_stripes - 1)) bbio->stripes[i].length -= stripe_end_offset; + if (i == sub_stripes - 1) stripe_offset = 0; } else @@ -3874,7 +3967,7 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, do_div(length, map->num_stripes); buf = kzalloc(sizeof(u64) * map->num_stripes, GFP_NOFS); - BUG_ON(!buf); + BUG_ON(!buf); /* -ENOMEM */ for (i = 0; i < map->num_stripes; i++) { if (devid && map->stripes[i].dev->devid != devid) @@ -3912,13 +4005,58 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, return 0; } +static void *merge_stripe_index_into_bio_private(void *bi_private, + unsigned int stripe_index) +{ + /* + * with single, dup, RAID0, RAID1 and RAID10, stripe_index is + * at most 1. + * The alternative solution (instead of stealing bits from the + * pointer) would be to allocate an intermediate structure + * that contains the old private pointer plus the stripe_index. + */ + BUG_ON((((uintptr_t)bi_private) & 3) != 0); + BUG_ON(stripe_index > 3); + return (void *)(((uintptr_t)bi_private) | stripe_index); +} + +static struct btrfs_bio *extract_bbio_from_bio_private(void *bi_private) +{ + return (struct btrfs_bio *)(((uintptr_t)bi_private) & ~((uintptr_t)3)); +} + +static unsigned int extract_stripe_index_from_bio_private(void *bi_private) +{ + return (unsigned int)((uintptr_t)bi_private) & 3; +} + static void btrfs_end_bio(struct bio *bio, int err) { - struct btrfs_bio *bbio = bio->bi_private; + struct btrfs_bio *bbio = extract_bbio_from_bio_private(bio->bi_private); int is_orig_bio = 0; - if (err) + if (err) { atomic_inc(&bbio->error); + if (err == -EIO || err == -EREMOTEIO) { + unsigned int stripe_index = + extract_stripe_index_from_bio_private( + bio->bi_private); + struct btrfs_device *dev; + + BUG_ON(stripe_index >= bbio->num_stripes); + dev = bbio->stripes[stripe_index].dev; + if (bio->bi_rw & WRITE) + btrfs_dev_stat_inc(dev, + BTRFS_DEV_STAT_WRITE_ERRS); + else + btrfs_dev_stat_inc(dev, + BTRFS_DEV_STAT_READ_ERRS); + if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH) + btrfs_dev_stat_inc(dev, + BTRFS_DEV_STAT_FLUSH_ERRS); + btrfs_dev_stat_print_on_error(dev); + } + } if (bio == bbio->orig_bio) is_orig_bio = 1; @@ -3967,7 +4105,7 @@ struct async_sched { * This will add one bio to the pending list for a device and make sure * the work struct is scheduled. */ -static noinline int schedule_bio(struct btrfs_root *root, +static noinline void schedule_bio(struct btrfs_root *root, struct btrfs_device *device, int rw, struct bio *bio) { @@ -3979,7 +4117,7 @@ static noinline int schedule_bio(struct btrfs_root *root, bio_get(bio); btrfsic_submit_bio(rw, bio); bio_put(bio); - return 0; + return; } /* @@ -4013,7 +4151,6 @@ static noinline int schedule_bio(struct btrfs_root *root, if (should_queue) btrfs_queue_worker(&root->fs_info->submit_workers, &device->work); - return 0; } int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, @@ -4036,7 +4173,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, ret = btrfs_map_block(map_tree, rw, logical, &map_length, &bbio, mirror_num); - BUG_ON(ret); + if (ret) /* -ENOMEM */ + return ret; total_devs = bbio->num_stripes; if (map_length < length) { @@ -4055,11 +4193,13 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, while (dev_nr < total_devs) { if (dev_nr < total_devs - 1) { bio = bio_clone(first_bio, GFP_NOFS); - BUG_ON(!bio); + BUG_ON(!bio); /* -ENOMEM */ } else { bio = first_bio; } bio->bi_private = bbio; + bio->bi_private = merge_stripe_index_into_bio_private( + bio->bi_private, (unsigned int)dev_nr); bio->bi_end_io = btrfs_end_bio; bio->bi_sector = bbio->stripes[dev_nr].physical >> 9; dev = bbio->stripes[dev_nr].dev; @@ -4209,13 +4349,13 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, write_lock(&map_tree->map_tree.lock); ret = add_extent_mapping(&map_tree->map_tree, em); write_unlock(&map_tree->map_tree.lock); - BUG_ON(ret); + BUG_ON(ret); /* Tree corruption */ free_extent_map(em); return 0; } -static int fill_device_from_item(struct extent_buffer *leaf, +static void fill_device_from_item(struct extent_buffer *leaf, struct btrfs_dev_item *dev_item, struct btrfs_device *device) { @@ -4232,8 +4372,6 @@ static int fill_device_from_item(struct extent_buffer *leaf, ptr = (unsigned long)btrfs_device_uuid(dev_item); read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); - - return 0; } static int open_seed_devices(struct btrfs_root *root, u8 *fsid) @@ -4266,8 +4404,10 @@ static int open_seed_devices(struct btrfs_root *root, u8 *fsid) ret = __btrfs_open_devices(fs_devices, FMODE_READ, root->fs_info->bdev_holder); - if (ret) + if (ret) { + free_fs_devices(fs_devices); goto out; + } if (!fs_devices->seeding) { __btrfs_close_devices(fs_devices); @@ -4384,7 +4524,7 @@ int btrfs_read_sys_array(struct btrfs_root *root) * to silence the warning eg. on PowerPC 64. */ if (PAGE_CACHE_SIZE > BTRFS_SUPER_INFO_SIZE) - SetPageUptodate(sb->first_page); + SetPageUptodate(sb->pages[0]); write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); array_size = btrfs_super_sys_array_size(super_copy); @@ -4420,6 +4560,28 @@ int btrfs_read_sys_array(struct btrfs_root *root) return ret; } +struct btrfs_device *btrfs_find_device_for_logical(struct btrfs_root *root, + u64 logical, int mirror_num) +{ + struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; + int ret; + u64 map_length = 0; + struct btrfs_bio *bbio = NULL; + struct btrfs_device *device; + + BUG_ON(mirror_num == 0); + ret = btrfs_map_block(map_tree, WRITE, logical, &map_length, &bbio, + mirror_num); + if (ret) { + BUG_ON(bbio != NULL); + return NULL; + } + BUG_ON(mirror_num != bbio->mirror_num); + device = bbio->stripes[mirror_num - 1].dev; + kfree(bbio); + return device; +} + int btrfs_read_chunk_tree(struct btrfs_root *root) { struct btrfs_path *path; @@ -4494,3 +4656,230 @@ error: btrfs_free_path(path); return ret; } + +static void __btrfs_reset_dev_stats(struct btrfs_device *dev) +{ + int i; + + for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) + btrfs_dev_stat_reset(dev, i); +} + +int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info) +{ + struct btrfs_key key; + struct btrfs_key found_key; + struct btrfs_root *dev_root = fs_info->dev_root; + struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; + struct extent_buffer *eb; + int slot; + int ret = 0; + struct btrfs_device *device; + struct btrfs_path *path = NULL; + int i; + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + + mutex_lock(&fs_devices->device_list_mutex); + list_for_each_entry(device, &fs_devices->devices, dev_list) { + int item_size; + struct btrfs_dev_stats_item *ptr; + + key.objectid = 0; + key.type = BTRFS_DEV_STATS_KEY; + key.offset = device->devid; + ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0); + if (ret) { + printk(KERN_WARNING "btrfs: no dev_stats entry found for device %s (devid %llu) (OK on first mount after mkfs)\n", + device->name, (unsigned long long)device->devid); + __btrfs_reset_dev_stats(device); + device->dev_stats_valid = 1; + btrfs_release_path(path); + continue; + } + slot = path->slots[0]; + eb = path->nodes[0]; + btrfs_item_key_to_cpu(eb, &found_key, slot); + item_size = btrfs_item_size_nr(eb, slot); + + ptr = btrfs_item_ptr(eb, slot, + struct btrfs_dev_stats_item); + + for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) { + if (item_size >= (1 + i) * sizeof(__le64)) + btrfs_dev_stat_set(device, i, + btrfs_dev_stats_value(eb, ptr, i)); + else + btrfs_dev_stat_reset(device, i); + } + + device->dev_stats_valid = 1; + btrfs_dev_stat_print_on_load(device); + btrfs_release_path(path); + } + mutex_unlock(&fs_devices->device_list_mutex); + +out: + btrfs_free_path(path); + return ret < 0 ? ret : 0; +} + +static int update_dev_stat_item(struct btrfs_trans_handle *trans, + struct btrfs_root *dev_root, + struct btrfs_device *device) +{ + struct btrfs_path *path; + struct btrfs_key key; + struct extent_buffer *eb; + struct btrfs_dev_stats_item *ptr; + int ret; + int i; + + key.objectid = 0; + key.type = BTRFS_DEV_STATS_KEY; + key.offset = device->devid; + + path = btrfs_alloc_path(); + BUG_ON(!path); + ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1); + if (ret < 0) { + printk(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n", + ret, device->name); + goto out; + } + + if (ret == 0 && + btrfs_item_size_nr(path->nodes[0], path->slots[0]) < sizeof(*ptr)) { + /* need to delete old one and insert a new one */ + ret = btrfs_del_item(trans, dev_root, path); + if (ret != 0) { + printk(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n", + device->name, ret); + goto out; + } + ret = 1; + } + + if (ret == 1) { + /* need to insert a new item */ + btrfs_release_path(path); + ret = btrfs_insert_empty_item(trans, dev_root, path, + &key, sizeof(*ptr)); + if (ret < 0) { + printk(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n", + device->name, ret); + goto out; + } + } + + eb = path->nodes[0]; + ptr = btrfs_item_ptr(eb, path->slots[0], struct btrfs_dev_stats_item); + for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) + btrfs_set_dev_stats_value(eb, ptr, i, + btrfs_dev_stat_read(device, i)); + btrfs_mark_buffer_dirty(eb); + +out: + btrfs_free_path(path); + return ret; +} + +/* + * called from commit_transaction. Writes all changed device stats to disk. + */ +int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) +{ + struct btrfs_root *dev_root = fs_info->dev_root; + struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; + struct btrfs_device *device; + int ret = 0; + + mutex_lock(&fs_devices->device_list_mutex); + list_for_each_entry(device, &fs_devices->devices, dev_list) { + if (!device->dev_stats_valid || !device->dev_stats_dirty) + continue; + + ret = update_dev_stat_item(trans, dev_root, device); + if (!ret) + device->dev_stats_dirty = 0; + } + mutex_unlock(&fs_devices->device_list_mutex); + + return ret; +} + +void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index) +{ + btrfs_dev_stat_inc(dev, index); + btrfs_dev_stat_print_on_error(dev); +} + +void btrfs_dev_stat_print_on_error(struct btrfs_device *dev) +{ + if (!dev->dev_stats_valid) + return; + printk_ratelimited(KERN_ERR + "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", + dev->name, + btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), + btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), + btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS), + btrfs_dev_stat_read(dev, + BTRFS_DEV_STAT_CORRUPTION_ERRS), + btrfs_dev_stat_read(dev, + BTRFS_DEV_STAT_GENERATION_ERRS)); +} + +static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev) +{ + printk(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", + dev->name, + btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), + btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), + btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS), + btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS), + btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS)); +} + +int btrfs_get_dev_stats(struct btrfs_root *root, + struct btrfs_ioctl_get_dev_stats *stats, + int reset_after_read) +{ + struct btrfs_device *dev; + struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; + int i; + + mutex_lock(&fs_devices->device_list_mutex); + dev = btrfs_find_device(root, stats->devid, NULL, NULL); + mutex_unlock(&fs_devices->device_list_mutex); + + if (!dev) { + printk(KERN_WARNING + "btrfs: get dev_stats failed, device not found\n"); + return -ENODEV; + } else if (!dev->dev_stats_valid) { + printk(KERN_WARNING + "btrfs: get dev_stats failed, not yet valid\n"); + return -ENODEV; + } else if (reset_after_read) { + for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) { + if (stats->nr_items > i) + stats->values[i] = + btrfs_dev_stat_read_and_reset(dev, i); + else + btrfs_dev_stat_reset(dev, i); + } + } else { + for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) + if (stats->nr_items > i) + stats->values[i] = btrfs_dev_stat_read(dev, i); + } + if (stats->nr_items > BTRFS_DEV_STAT_VALUES_MAX) + stats->nr_items = BTRFS_DEV_STAT_VALUES_MAX; + return 0; +} |