summaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorAndreas Gruenbacher <agruenba@redhat.com>2016-02-22 22:44:04 -0500
committerTheodore Ts'o <tytso@mit.edu>2016-02-22 22:44:04 -0500
commit6048c64b26097a0ffbd966866b599f990e674e9b (patch)
tree412b3e62328842563ef70e6ab68a1dd539038a90 /fs/ext4
parent3fd164629d25b04f291a79a013dcc7ce1a301269 (diff)
downloadop-kernel-dev-6048c64b26097a0ffbd966866b599f990e674e9b.zip
op-kernel-dev-6048c64b26097a0ffbd966866b599f990e674e9b.tar.gz
mbcache: add reusable flag to cache entries
To reduce amount of damage caused by single bad block, we limit number of inodes sharing an xattr block to 1024. Thus there can be more xattr blocks with the same contents when there are lots of files with the same extended attributes. These xattr blocks naturally result in hash collisions and can form long hash chains and we unnecessarily check each such block only to find out we cannot use it because it is already shared by too many inodes. Add a reusable flag to cache entries which is cleared when a cache entry has reached its maximum refcount. Cache entries which are not marked reusable are skipped by mb_cache_entry_find_{first,next}. This significantly speeds up mbcache when there are many same xattr blocks. For example for xattr-bench with 5 values and each process handling 20000 files, the run for 64 processes is 25x faster with this patch. Even for 8 processes the speedup is almost 3x. We have also verified that for situations where there is only one xattr block of each kind, the patch doesn't have a measurable cost. [JK: Remove handling of setting the same value since it is not needed anymore, check for races in e_reusable setting, improve changelog, add measurements] Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/xattr.c66
1 files changed, 42 insertions, 24 deletions
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index b661ae8..0441e05 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -545,6 +545,8 @@ static void
ext4_xattr_release_block(handle_t *handle, struct inode *inode,
struct buffer_head *bh)
{
+ struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
+ u32 hash, ref;
int error = 0;
BUFFER_TRACE(bh, "get_write_access");
@@ -553,23 +555,34 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
goto out;
lock_buffer(bh);
- if (BHDR(bh)->h_refcount == cpu_to_le32(1)) {
- __u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
-
+ hash = le32_to_cpu(BHDR(bh)->h_hash);
+ ref = le32_to_cpu(BHDR(bh)->h_refcount);
+ if (ref == 1) {
ea_bdebug(bh, "refcount now=0; freeing");
/*
* This must happen under buffer lock for
* ext4_xattr_block_set() to reliably detect freed block
*/
- mb_cache_entry_delete_block(EXT4_GET_MB_CACHE(inode), hash,
- bh->b_blocknr);
+ mb_cache_entry_delete_block(ext4_mb_cache, hash, bh->b_blocknr);
get_bh(bh);
unlock_buffer(bh);
ext4_free_blocks(handle, inode, bh, 0, 1,
EXT4_FREE_BLOCKS_METADATA |
EXT4_FREE_BLOCKS_FORGET);
} else {
- le32_add_cpu(&BHDR(bh)->h_refcount, -1);
+ ref--;
+ BHDR(bh)->h_refcount = cpu_to_le32(ref);
+ if (ref == EXT4_XATTR_REFCOUNT_MAX - 1) {
+ struct mb_cache_entry *ce;
+
+ ce = mb_cache_entry_get(ext4_mb_cache, hash,
+ bh->b_blocknr);
+ if (ce) {
+ ce->e_reusable = 1;
+ mb_cache_entry_put(ext4_mb_cache, ce);
+ }
+ }
+
/*
* Beware of this ugliness: Releasing of xattr block references
* from different inodes can race and so we have to protect
@@ -872,6 +885,8 @@ inserted:
if (new_bh == bs->bh)
ea_bdebug(new_bh, "keeping");
else {
+ u32 ref;
+
/* The old block is released after updating
the inode. */
error = dquot_alloc_block(inode,
@@ -886,15 +901,18 @@ inserted:
lock_buffer(new_bh);
/*
* We have to be careful about races with
- * freeing or rehashing of xattr block. Once we
- * hold buffer lock xattr block's state is
- * stable so we can check whether the block got
- * freed / rehashed or not. Since we unhash
- * mbcache entry under buffer lock when freeing
- * / rehashing xattr block, checking whether
- * entry is still hashed is reliable.
+ * freeing, rehashing or adding references to
+ * xattr block. Once we hold buffer lock xattr
+ * block's state is stable so we can check
+ * whether the block got freed / rehashed or
+ * not. Since we unhash mbcache entry under
+ * buffer lock when freeing / rehashing xattr
+ * block, checking whether entry is still
+ * hashed is reliable. Same rules hold for
+ * e_reusable handling.
*/
- if (hlist_bl_unhashed(&ce->e_hash_list)) {
+ if (hlist_bl_unhashed(&ce->e_hash_list) ||
+ !ce->e_reusable) {
/*
* Undo everything and check mbcache
* again.
@@ -909,9 +927,12 @@ inserted:
new_bh = NULL;
goto inserted;
}
- le32_add_cpu(&BHDR(new_bh)->h_refcount, 1);
+ ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
+ BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
+ if (ref >= EXT4_XATTR_REFCOUNT_MAX)
+ ce->e_reusable = 0;
ea_bdebug(new_bh, "reusing; refcount now=%d",
- le32_to_cpu(BHDR(new_bh)->h_refcount));
+ ref);
unlock_buffer(new_bh);
error = ext4_handle_dirty_xattr_block(handle,
inode,
@@ -1566,11 +1587,14 @@ cleanup:
static void
ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh)
{
- __u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
+ struct ext4_xattr_header *header = BHDR(bh);
+ __u32 hash = le32_to_cpu(header->h_hash);
+ int reusable = le32_to_cpu(header->h_refcount) <
+ EXT4_XATTR_REFCOUNT_MAX;
int error;
error = mb_cache_entry_create(ext4_mb_cache, GFP_NOFS, hash,
- bh->b_blocknr);
+ bh->b_blocknr, reusable);
if (error) {
if (error == -EBUSY)
ea_bdebug(bh, "already in cache");
@@ -1645,12 +1669,6 @@ ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
if (!bh) {
EXT4_ERROR_INODE(inode, "block %lu read error",
(unsigned long) ce->e_block);
- } else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
- EXT4_XATTR_REFCOUNT_MAX) {
- ea_idebug(inode, "block %lu refcount %d>=%d",
- (unsigned long) ce->e_block,
- le32_to_cpu(BHDR(bh)->h_refcount),
- EXT4_XATTR_REFCOUNT_MAX);
} else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) {
*pce = ce;
return bh;
OpenPOWER on IntegriCloud