summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent-tree.c
diff options
context:
space:
mode:
authorWang Xiaoguang <wangxg.fnst@cn.fujitsu.com>2016-07-25 15:51:40 +0800
committerChris Mason <clm@fb.com>2016-08-25 03:58:26 -0700
commit18513091af9483ba84328d42092bd4d42a3c958f (patch)
treee5a8dfa241105b399c110cc4f79400b332136272 /fs/btrfs/extent-tree.c
parent4824f1f412f75e9f84b9cecbde828e8f4699f82d (diff)
downloadop-kernel-dev-18513091af9483ba84328d42092bd4d42a3c958f.zip
op-kernel-dev-18513091af9483ba84328d42092bd4d42a3c958f.tar.gz
btrfs: update btrfs_space_info's bytes_may_use timely
This patch can fix some false ENOSPC errors, below test script can reproduce one false ENOSPC error: #!/bin/bash dd if=/dev/zero of=fs.img bs=$((1024*1024)) count=128 dev=$(losetup --show -f fs.img) mkfs.btrfs -f -M $dev mkdir /tmp/mntpoint mount $dev /tmp/mntpoint cd /tmp/mntpoint xfs_io -f -c "falloc 0 $((64*1024*1024))" testfile Above script will fail for ENOSPC reason, but indeed fs still has free space to satisfy this request. Please see call graph: btrfs_fallocate() |-> btrfs_alloc_data_chunk_ondemand() | bytes_may_use += 64M |-> btrfs_prealloc_file_range() |-> btrfs_reserve_extent() |-> btrfs_add_reserved_bytes() | alloc_type is RESERVE_ALLOC_NO_ACCOUNT, so it does not | change bytes_may_use, and bytes_reserved += 64M. Now | bytes_may_use + bytes_reserved == 128M, which is greater | than btrfs_space_info's total_bytes, false enospc occurs. | Note, the bytes_may_use decrease operation will be done in | end of btrfs_fallocate(), which is too late. Here is another simple case for buffered write: CPU 1 | CPU 2 | |-> cow_file_range() |-> __btrfs_buffered_write() |-> btrfs_reserve_extent() | | | | | | | | | ..... | |-> btrfs_check_data_free_space() | | | | |-> extent_clear_unlock_delalloc() | In CPU 1, btrfs_reserve_extent()->find_free_extent()-> btrfs_add_reserved_bytes() do not decrease bytes_may_use, the decrease operation will be delayed to be done in extent_clear_unlock_delalloc(). Assume in this case, btrfs_reserve_extent() reserved 128MB data, CPU2's btrfs_check_data_free_space() tries to reserve 100MB data space. If 100MB > data_sinfo->total_bytes - data_sinfo->bytes_used - data_sinfo->bytes_reserved - data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - data_sinfo->bytes_may_use btrfs_check_data_free_space() will try to allcate new data chunk or call btrfs_start_delalloc_roots(), or commit current transaction in order to reserve some free space, obviously a lot of work. But indeed it's not necessary as long as decreasing bytes_may_use timely, we still have free space, decreasing 128M from bytes_may_use. To fix this issue, this patch chooses to update bytes_may_use for both data and metadata in btrfs_add_reserved_bytes(). For compress path, real extent length may not be equal to file content length, so introduce a ram_bytes argument for btrfs_reserve_extent(), find_free_extent() and btrfs_add_reserved_bytes(), it's becasue bytes_may_use is increased by file content length. Then compress path can update bytes_may_use correctly. Also now we can discard RESERVE_ALLOC_NO_ACCOUNT, RESERVE_ALLOC and RESERVE_FREE. As we know, usually EXTENT_DO_ACCOUNTING is used for error path. In run_delalloc_nocow(), for inode marked as NODATACOW or extent marked as PREALLOC, we also need to update bytes_may_use, but can not pass EXTENT_DO_ACCOUNTING, because it also clears metadata reservation, so here we introduce EXTENT_CLEAR_DATA_RESV flag to indicate btrfs_clear_bit_hook() to update btrfs_space_info's bytes_may_use. Meanwhile __btrfs_prealloc_file_range() will call btrfs_free_reserved_data_space() internally for both sucessful and failed path, btrfs_prealloc_file_range()'s callers does not need to call btrfs_free_reserved_data_space() any more. Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com> Reviewed-by: Josef Bacik <jbacik@fb.com> Signed-off-by: David Sterba <dsterba@suse.com> Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r--fs/btrfs/extent-tree.c56
1 files changed, 19 insertions, 37 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f1121db..133c93b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -60,21 +60,6 @@ enum {
CHUNK_ALLOC_FORCE = 2,
};
-/*
- * Control how reservations are dealt with.
- *
- * RESERVE_FREE - freeing a reservation.
- * RESERVE_ALLOC - allocating space and we need to update bytes_may_use for
- * ENOSPC accounting
- * RESERVE_ALLOC_NO_ACCOUNT - allocating space and we should not update
- * bytes_may_use as the ENOSPC accounting is done elsewhere
- */
-enum {
- RESERVE_FREE = 0,
- RESERVE_ALLOC = 1,
- RESERVE_ALLOC_NO_ACCOUNT = 2,
-};
-
static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr,
u64 num_bytes, int alloc);
@@ -105,7 +90,7 @@ static int find_next_key(struct btrfs_path *path, int level,
static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
int dump_block_groups);
static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
- u64 num_bytes, int reserve, int delalloc);
+ u64 ram_bytes, u64 num_bytes, int delalloc);
static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
u64 num_bytes, int delalloc);
static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
@@ -3502,7 +3487,6 @@ again:
dcs = BTRFS_DC_SETUP;
else if (ret == -ENOSPC)
set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags);
- btrfs_free_reserved_data_space(inode, 0, num_pages);
out_put:
iput(inode);
@@ -6500,8 +6484,9 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
/**
* btrfs_add_reserved_bytes - update the block_group and space info counters
* @cache: The cache we are manipulating
+ * @ram_bytes: The number of bytes of file content, and will be same to
+ * @num_bytes except for the compress path.
* @num_bytes: The number of bytes in question
- * @reserve: One of the reservation enums
* @delalloc: The blocks are allocated for the delalloc write
*
* This is called by the allocator when it reserves space. Metadata
@@ -6516,7 +6501,7 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
* succeeds.
*/
static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
- u64 num_bytes, int reserve, int delalloc)
+ u64 ram_bytes, u64 num_bytes, int delalloc)
{
struct btrfs_space_info *space_info = cache->space_info;
int ret = 0;
@@ -6528,13 +6513,11 @@ static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
} else {
cache->reserved += num_bytes;
space_info->bytes_reserved += num_bytes;
- if (reserve == RESERVE_ALLOC) {
- trace_btrfs_space_reservation(cache->fs_info,
- "space_info", space_info->flags,
- num_bytes, 0);
- space_info->bytes_may_use -= num_bytes;
- }
+ trace_btrfs_space_reservation(cache->fs_info,
+ "space_info", space_info->flags,
+ ram_bytes, 0);
+ space_info->bytes_may_use -= ram_bytes;
if (delalloc)
cache->delalloc_bytes += num_bytes;
}
@@ -7433,9 +7416,9 @@ btrfs_release_block_group(struct btrfs_block_group_cache *cache,
* the free space extent currently.
*/
static noinline int find_free_extent(struct btrfs_root *orig_root,
- u64 num_bytes, u64 empty_size,
- u64 hint_byte, struct btrfs_key *ins,
- u64 flags, int delalloc)
+ u64 ram_bytes, u64 num_bytes, u64 empty_size,
+ u64 hint_byte, struct btrfs_key *ins,
+ u64 flags, int delalloc)
{
int ret = 0;
struct btrfs_root *root = orig_root->fs_info->extent_root;
@@ -7447,8 +7430,6 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
struct btrfs_space_info *space_info;
int loop = 0;
int index = __get_raid_index(flags);
- int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ?
- RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
bool failed_cluster_refill = false;
bool failed_alloc = false;
bool use_cluster = true;
@@ -7780,8 +7761,8 @@ checks:
search_start - offset);
BUG_ON(offset > search_start);
- ret = btrfs_add_reserved_bytes(block_group, num_bytes,
- alloc_type, delalloc);
+ ret = btrfs_add_reserved_bytes(block_group, ram_bytes,
+ num_bytes, delalloc);
if (ret == -EAGAIN) {
btrfs_add_free_space(block_group, offset, num_bytes);
goto loop;
@@ -7953,7 +7934,7 @@ again:
up_read(&info->groups_sem);
}
-int btrfs_reserve_extent(struct btrfs_root *root,
+int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
u64 num_bytes, u64 min_alloc_size,
u64 empty_size, u64 hint_byte,
struct btrfs_key *ins, int is_data, int delalloc)
@@ -7965,8 +7946,8 @@ int btrfs_reserve_extent(struct btrfs_root *root,
flags = btrfs_get_alloc_profile(root, is_data);
again:
WARN_ON(num_bytes < root->sectorsize);
- ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins,
- flags, delalloc);
+ ret = find_free_extent(root, ram_bytes, num_bytes, empty_size,
+ hint_byte, ins, flags, delalloc);
if (!ret && !is_data) {
btrfs_dec_block_group_reservations(root->fs_info,
ins->objectid);
@@ -7975,6 +7956,7 @@ again:
num_bytes = min(num_bytes >> 1, ins->offset);
num_bytes = round_down(num_bytes, root->sectorsize);
num_bytes = max(num_bytes, min_alloc_size);
+ ram_bytes = num_bytes;
if (num_bytes == min_alloc_size)
final_tried = true;
goto again;
@@ -8241,7 +8223,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
return -EINVAL;
ret = btrfs_add_reserved_bytes(block_group, ins->offset,
- RESERVE_ALLOC_NO_ACCOUNT, 0);
+ ins->offset, 0);
BUG_ON(ret); /* logic error */
ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
0, owner, offset, ins, 1);
@@ -8385,7 +8367,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
if (IS_ERR(block_rsv))
return ERR_CAST(block_rsv);
- ret = btrfs_reserve_extent(root, blocksize, blocksize,
+ ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize,
empty_size, hint, &ins, 0, 0);
if (ret)
goto out_unuse;
OpenPOWER on IntegriCloud