From 191eac33009e6a6d31e87cfa425a20d0e79704b4 Mon Sep 17 00:00:00 2001 From: Emoly Liu Date: Mon, 31 Jul 2017 00:40:22 -0400 Subject: ext4: error should be cleared if ea_inode isn't added to the cache For Lustre, if ea_inode fails in hash validation but passes parent inode and generation checks, it won't be added to the cache as well as the error "-EFSCORRUPTED" should be cleared, otherwise it will cause "Structure needs cleaning" when running getfattr command. Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-9723 Cc: stable@vger.kernel.org Fixes: dec214d00e0d78a08b947d7dccdfdb84407a9f4d Signed-off-by: Emoly Liu Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger Reviewed-by: tahsin@google.com --- fs/ext4/xattr.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/ext4/xattr.c') diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index cff4f41..de217a0 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -451,6 +451,7 @@ ext4_xattr_inode_get(struct inode *inode, struct ext4_xattr_entry *entry, } /* Do not add ea_inode to the cache. */ ea_inode_cache = NULL; + err = 0; } else if (err) goto out; -- cgit v1.1 From ec00022030da5761518476096626338bd67df57a Mon Sep 17 00:00:00 2001 From: Tahsin Erdogan Date: Sat, 5 Aug 2017 22:41:42 -0400 Subject: ext4: inplace xattr block update fails to deduplicate blocks When an xattr block has a single reference, block is updated inplace and it is reinserted to the cache. Later, a cache lookup is performed to see whether an existing block has the same contents. This cache lookup will most of the time return the just inserted entry so deduplication is not achieved. Running the following test script will produce two xattr blocks which can be observed in "File ACL: " line of debugfs output: mke2fs -b 1024 -I 128 -F -O extent /dev/sdb 1G mount /dev/sdb /mnt/sdb touch /mnt/sdb/{x,y} setfattr -n user.1 -v aaa /mnt/sdb/x setfattr -n user.2 -v bbb /mnt/sdb/x setfattr -n user.1 -v aaa /mnt/sdb/y setfattr -n user.2 -v bbb /mnt/sdb/y debugfs -R 'stat x' /dev/sdb | cat debugfs -R 'stat y' /dev/sdb | cat This patch defers the reinsertion to the cache so that we can locate other blocks with the same contents. Signed-off-by: Tahsin Erdogan Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger --- fs/ext4/xattr.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs/ext4/xattr.c') diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index de217a0..4025666 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1816,9 +1816,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, ea_bdebug(bs->bh, "modifying in-place"); error = ext4_xattr_set_entry(i, s, handle, inode, true /* is_block */); - if (!error) - ext4_xattr_block_cache_insert(ea_block_cache, - bs->bh); ext4_xattr_block_csum_set(inode, bs->bh); unlock_buffer(bs->bh); if (error == -EFSCORRUPTED) @@ -1974,6 +1971,7 @@ inserted: } else if (bs->bh && s->base == bs->bh->b_data) { /* We were modifying this block in-place. */ ea_bdebug(bs->bh, "keeping this block"); + ext4_xattr_block_cache_insert(ea_block_cache, bs->bh); new_bh = bs->bh; get_bh(new_bh); } else { -- cgit v1.1 From 9699d4f91d9bd2f70dcc37afe3c9f18145ab2dba Mon Sep 17 00:00:00 2001 From: Tahsin Erdogan Date: Sun, 6 Aug 2017 00:07:01 -0400 Subject: ext4: make xattr inode reads faster ext4_xattr_inode_read() currently reads each block sequentially while waiting for io operation to complete before moving on to the next block. This prevents request merging in block layer. Add a ext4_bread_batch() function that starts reads for all blocks then optionally waits for them to complete. A similar logic is used in ext4_find_entry(), so update that code to use the new function. Signed-off-by: Tahsin Erdogan Signed-off-by: Theodore Ts'o --- fs/ext4/xattr.c | 51 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 19 deletions(-) (limited to 'fs/ext4/xattr.c') diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 4025666..5fa912e 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -317,28 +317,41 @@ static void ext4_xattr_inode_set_hash(struct inode *ea_inode, u32 hash) */ static int ext4_xattr_inode_read(struct inode *ea_inode, void *buf, size_t size) { - unsigned long block = 0; - struct buffer_head *bh; - int blocksize = ea_inode->i_sb->s_blocksize; - size_t csize, copied = 0; - void *copy_pos = buf; - - while (copied < size) { - csize = (size - copied) > blocksize ? blocksize : size - copied; - bh = ext4_bread(NULL, ea_inode, block, 0); - if (IS_ERR(bh)) - return PTR_ERR(bh); - if (!bh) - return -EFSCORRUPTED; + int blocksize = 1 << ea_inode->i_blkbits; + int bh_count = (size + blocksize - 1) >> ea_inode->i_blkbits; + int tail_size = (size % blocksize) ?: blocksize; + struct buffer_head *bhs_inline[8]; + struct buffer_head **bhs = bhs_inline; + int i, ret; + + if (bh_count > ARRAY_SIZE(bhs_inline)) { + bhs = kmalloc_array(bh_count, sizeof(*bhs), GFP_NOFS); + if (!bhs) + return -ENOMEM; + } - memcpy(copy_pos, bh->b_data, csize); - brelse(bh); + ret = ext4_bread_batch(ea_inode, 0 /* block */, bh_count, + true /* wait */, bhs); + if (ret) + goto free_bhs; - copy_pos += csize; - block += 1; - copied += csize; + for (i = 0; i < bh_count; i++) { + /* There shouldn't be any holes in ea_inode. */ + if (!bhs[i]) { + ret = -EFSCORRUPTED; + goto put_bhs; + } + memcpy((char *)buf + blocksize * i, bhs[i]->b_data, + i < bh_count - 1 ? blocksize : tail_size); } - return 0; + ret = 0; +put_bhs: + for (i = 0; i < bh_count; i++) + brelse(bhs[i]); +free_bhs: + if (bhs != bhs_inline) + kfree(bhs); + return ret; } static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino, -- cgit v1.1 From 3b10fdc6d8bd048f4fb14af5eda2051ace7b8b16 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Sun, 6 Aug 2017 00:27:38 -0400 Subject: ext4: fix forgetten xattr lock protection in ext4_expand_extra_isize We should avoid the contention between the i_extra_isize update and the inline data insertion, so move the xattr trylock in front of i_extra_isize update. Signed-off-by: Miao Xie Reviewed-by: Wang Shilong --- fs/ext4/xattr.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'fs/ext4/xattr.c') diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 5fa912e..862ba38 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2645,10 +2645,6 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, int error = 0, tried_min_extra_isize = 0; int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize); int isize_diff; /* How much do we need to grow i_extra_isize */ - int no_expand; - - if (ext4_write_trylock_xattr(inode, &no_expand) == 0) - return 0; retry: isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize; @@ -2731,16 +2727,10 @@ shift: EXT4_I(inode)->i_extra_isize = new_extra_isize; brelse(bh); out: - ext4_write_unlock_xattr(inode, &no_expand); return 0; cleanup: brelse(bh); - /* - * Inode size expansion failed; don't try again - */ - no_expand = 1; - ext4_write_unlock_xattr(inode, &no_expand); return error; } -- cgit v1.1 From cf0a5e818fe216dbdf5da4e829e157d27ebfc8a4 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Sun, 6 Aug 2017 00:40:01 -0400 Subject: ext4: restructure ext4_expand_extra_isize Current ext4_expand_extra_isize just tries to expand extra isize, if someone is holding xattr lock or some check fails, it will give up. So rename its name to ext4_try_to_expand_extra_isize. Besides that, we clean up unnecessary check and move some relative checks into it. Signed-off-by: Miao Xie Signed-off-by: Theodore Ts'o Reviewed-by: Wang Shilong --- fs/ext4/xattr.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs/ext4/xattr.c') diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 862ba38..7f5f4b6 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2638,12 +2638,14 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, { struct ext4_xattr_ibody_header *header; struct buffer_head *bh = NULL; + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + static unsigned int mnt_count; size_t min_offs; size_t ifree, bfree; int total_ino; void *base, *end; int error = 0, tried_min_extra_isize = 0; - int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize); + int s_min_extra_isize = le16_to_cpu(sbi->s_es->s_min_extra_isize); int isize_diff; /* How much do we need to grow i_extra_isize */ retry: @@ -2731,6 +2733,11 @@ out: cleanup: brelse(bh); + if (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count)) { + ext4_warning(inode->i_sb, "Unable to expand inode %lu. Delete some EAs or run e2fsck.", + inode->i_ino); + mnt_count = le16_to_cpu(sbi->s_es->s_mnt_count); + } return error; } -- cgit v1.1 From b640b2c51b26459fc08f2185a385495b0f509a80 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Sun, 6 Aug 2017 00:55:48 -0400 Subject: ext4: cleanup ext4_expand_extra_isize_ea() Clean up some goto statement, make ext4_expand_extra_isize_ea() clearer. Signed-off-by: Miao Xie Signed-off-by: Theodore Ts'o Reviewed-by: Wang Shilong --- fs/ext4/xattr.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'fs/ext4/xattr.c') diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 7f5f4b6..82a5af9 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2637,7 +2637,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, struct ext4_inode *raw_inode, handle_t *handle) { struct ext4_xattr_ibody_header *header; - struct buffer_head *bh = NULL; + struct buffer_head *bh; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); static unsigned int mnt_count; size_t min_offs; @@ -2651,7 +2651,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, retry: isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize; if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) - goto out; + return 0; header = IHDR(inode, raw_inode); @@ -2686,6 +2686,7 @@ retry: EXT4_ERROR_INODE(inode, "bad block %llu", EXT4_I(inode)->i_file_acl); error = -EFSCORRUPTED; + brelse(bh); goto cleanup; } base = BHDR(bh); @@ -2693,11 +2694,11 @@ retry: min_offs = end - base; bfree = ext4_xattr_free_space(BFIRST(bh), &min_offs, base, NULL); + brelse(bh); if (bfree + ifree < isize_diff) { if (!tried_min_extra_isize && s_min_extra_isize) { tried_min_extra_isize++; new_extra_isize = s_min_extra_isize; - brelse(bh); goto retry; } error = -ENOSPC; @@ -2715,7 +2716,6 @@ retry: s_min_extra_isize) { tried_min_extra_isize++; new_extra_isize = s_min_extra_isize; - brelse(bh); goto retry; } goto cleanup; @@ -2727,13 +2727,9 @@ shift: EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize, (void *)header, total_ino); EXT4_I(inode)->i_extra_isize = new_extra_isize; - brelse(bh); -out: - return 0; cleanup: - brelse(bh); - if (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count)) { + if (error && (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count))) { ext4_warning(inode->i_sb, "Unable to expand inode %lu. Delete some EAs or run e2fsck.", inode->i_ino); mnt_count = le16_to_cpu(sbi->s_es->s_mnt_count); -- cgit v1.1 From 32aaf194201e98db4235b7b71ac62a22e2ac355f Mon Sep 17 00:00:00 2001 From: Tahsin Erdogan Date: Mon, 14 Aug 2017 08:30:06 -0400 Subject: ext4: add missing xattr hash update When updating an extended attribute, if the padded value sizes are the same, a shortcut is taken to avoid the bulk of the work. This was fine until the xattr hash update was moved inside ext4_xattr_set_entry(). With that change, the hash update got missed in the shortcut case. Thanks to ZhangYi (yizhang089@gmail.com) for root causing the problem. Fixes: daf8328172df ("ext4: eliminate xattr entry e_hash recalculation for removes") Reported-by: Miklos Szeredi Signed-off-by: Tahsin Erdogan Signed-off-by: Theodore Ts'o --- fs/ext4/xattr.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/ext4/xattr.c') diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 82a5af9..3dd9701 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1543,7 +1543,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i, /* Clear padding bytes. */ memset(val + i->value_len, 0, new_size - i->value_len); } - return 0; + goto update_hash; } /* Compute min_offs and last. */ @@ -1707,6 +1707,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i, here->e_value_size = cpu_to_le32(i->value_len); } +update_hash: if (i->value) { __le32 hash = 0; @@ -1725,7 +1726,8 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i, here->e_name_len, &crc32c_hash, 1); } else if (is_block) { - __le32 *value = s->base + min_offs - new_size; + __le32 *value = s->base + le16_to_cpu( + here->e_value_offs); hash = ext4_xattr_hash_entry(here->e_name, here->e_name_len, value, -- cgit v1.1