From 5576cd6ca555bd79e76a2d798aec44b28851d369 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 12 Jun 2014 13:25:01 +0800 Subject: f2fs: avoid unneeded SetPageUptodate in f2fs_write_end We have already set page update in ->write_begin, so we should remove redundant SetPageUptodate in ->write_end. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index f8cf619..2eb2764 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1016,7 +1016,6 @@ static int f2fs_write_end(struct file *file, trace_f2fs_write_end(inode, pos, len, copied); - SetPageUptodate(page); set_page_dirty(page); if (pos + copied > i_size_read(inode)) { -- cgit v1.1 From ca0a81b397b5f153ac2a9880c5e85b08b0447e4e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 12 Jun 2014 13:31:50 +0800 Subject: f2fs: avoid to truncate non-updated page partially After we call find_data_page in truncate_partial_data_page, we could not guarantee this page is updated or not as error may occurred in lower layer. We'd better check status of the page to avoid this no updated page be writebacked to device. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 7d8b962..36fa505 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -380,13 +380,15 @@ static void truncate_partial_data_page(struct inode *inode, u64 from) return; lock_page(page); - if (unlikely(page->mapping != inode->i_mapping)) { - f2fs_put_page(page, 1); - return; - } + if (unlikely(!PageUptodate(page) || + page->mapping != inode->i_mapping)) + goto out; + f2fs_wait_on_page_writeback(page, DATA); zero_user(page, offset, PAGE_CACHE_SIZE - offset); set_page_dirty(page); + +out: f2fs_put_page(page, 1); } -- cgit v1.1 From 50732df02eefb39ab414ef655979c2c9b64ad21c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 19 Jun 2014 16:23:19 +0800 Subject: f2fs: support ->tmpfile() Add function f2fs_tmpfile() to support O_TMPFILE file creation, and modify logic of init_inode_metadata to enable linkat temp file. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 10 ++++++++- fs/f2fs/namei.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index a4addd7..3677f41 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -333,10 +333,12 @@ static int make_empty_dir(struct inode *inode, static struct page *init_inode_metadata(struct inode *inode, struct inode *dir, const struct qstr *name) { + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); struct page *page; int err; - if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { + if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE) && + inode->i_nlink) { page = new_inode_page(inode, name); if (IS_ERR(page)) return page; @@ -370,6 +372,12 @@ static struct page *init_inode_metadata(struct inode *inode, */ if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) { file_lost_pino(inode); + /* + * If link the tmpfile to alias through linkat path, + * we should remove this inode from orphan list. + */ + if (inode->i_nlink == 0) + remove_orphan_inode(sbi, inode->i_ino); inc_nlink(inode); } return page; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index a6bdddc..5c08c54 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" @@ -487,6 +488,68 @@ out: return err; } +static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + struct super_block *sb = dir->i_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct inode *inode; + struct page *page; + int err; + + inode = f2fs_new_inode(dir, mode); + if (IS_ERR(inode)) + return PTR_ERR(inode); + + inode->i_op = &f2fs_file_inode_operations; + inode->i_fop = &f2fs_file_operations; + inode->i_mapping->a_ops = &f2fs_dblock_aops; + + f2fs_lock_op(sbi); + err = acquire_orphan_inode(sbi); + if (err) + goto out; + /* + * add this non-linked tmpfile to orphan list, in this way we could + * remove all unused data of tmpfile after abnormal power-off. + */ + add_orphan_inode(sbi, inode->i_ino); + + page = new_inode_page(inode, NULL); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto remove_out; + } + + err = f2fs_init_acl(inode, dir, page); + if (err) + goto unlock_out; + + err = f2fs_init_security(inode, dir, NULL, page); + if (err) + goto unlock_out; + + f2fs_put_page(page, 1); + f2fs_unlock_op(sbi); + + alloc_nid_done(sbi, inode->i_ino); + mark_inode_dirty(inode); + d_tmpfile(dentry, inode); + unlock_new_inode(inode); + return 0; +unlock_out: + f2fs_put_page(page, 1); +remove_out: + remove_orphan_inode(sbi, inode->i_ino); +out: + f2fs_unlock_op(sbi); + clear_nlink(inode); + unlock_new_inode(inode); + make_bad_inode(inode); + iput(inode); + alloc_nid_failed(sbi, inode->i_ino); + return err; +} + const struct inode_operations f2fs_dir_inode_operations = { .create = f2fs_create, .lookup = f2fs_lookup, @@ -497,6 +560,7 @@ const struct inode_operations f2fs_dir_inode_operations = { .rmdir = f2fs_rmdir, .mknod = f2fs_mknod, .rename = f2fs_rename, + .tmpfile = f2fs_tmpfile, .getattr = f2fs_getattr, .setattr = f2fs_setattr, .get_acl = f2fs_get_acl, -- cgit v1.1 From b97a9b5da891ab6aff5a6a19c569c9c4c5563d48 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 20 Jun 2014 21:37:02 -0700 Subject: f2fs: introduce f2fs_do_tmpfile for code consistency This patch adds f2fs_do_tmpfile to eliminate the redundant init_inode_metadata flow. Throught this, we can provide the consistent lock usage, e.g., fi->i_sem, and this will enable better debugging stuffs. Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 27 ++++++++++++++++++++++++--- fs/f2fs/f2fs.h | 1 + fs/f2fs/namei.c | 30 ++++++++---------------------- 3 files changed, 33 insertions(+), 25 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 3677f41..fb7e01e 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -337,8 +337,7 @@ static struct page *init_inode_metadata(struct inode *inode, struct page *page; int err; - if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE) && - inode->i_nlink) { + if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { page = new_inode_page(inode, name); if (IS_ERR(page)) return page; @@ -364,7 +363,8 @@ static struct page *init_inode_metadata(struct inode *inode, set_cold_node(inode, page); } - init_dent_inode(name, page); + if (name) + init_dent_inode(name, page); /* * This file should be checkpointed during fsync. @@ -537,6 +537,27 @@ fail: return err; } +int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) +{ + struct page *page; + int err = 0; + + down_write(&F2FS_I(inode)->i_sem); + page = init_inode_metadata(inode, dir, NULL); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto fail; + } + /* we don't need to mark_inode_dirty now */ + update_inode(inode, page); + f2fs_put_page(page, 1); + + clear_inode_flag(F2FS_I(inode), FI_NEW_INODE); +fail: + up_write(&F2FS_I(inode)->i_sem); + return err; +} + /* * It only removes the dentry from the dentry page,corresponding name * entry in name page does not need to be touched during deletion. diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 58df97e..ca421a8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1136,6 +1136,7 @@ void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, int update_dent_inode(struct inode *, const struct qstr *); int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *); +int f2fs_do_tmpfile(struct inode *, struct inode *); int f2fs_make_empty(struct inode *, struct inode *); bool f2fs_empty_dir(struct inode *); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 5c08c54..1994d2e 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -493,7 +493,6 @@ static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) struct super_block *sb = dir->i_sb; struct f2fs_sb_info *sbi = F2FS_SB(sb); struct inode *inode; - struct page *page; int err; inode = f2fs_new_inode(dir, mode); @@ -508,38 +507,25 @@ static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) err = acquire_orphan_inode(sbi); if (err) goto out; + + err = f2fs_do_tmpfile(inode, dir); + if (err) + goto release_out; + /* * add this non-linked tmpfile to orphan list, in this way we could * remove all unused data of tmpfile after abnormal power-off. */ add_orphan_inode(sbi, inode->i_ino); - - page = new_inode_page(inode, NULL); - if (IS_ERR(page)) { - err = PTR_ERR(page); - goto remove_out; - } - - err = f2fs_init_acl(inode, dir, page); - if (err) - goto unlock_out; - - err = f2fs_init_security(inode, dir, NULL, page); - if (err) - goto unlock_out; - - f2fs_put_page(page, 1); f2fs_unlock_op(sbi); alloc_nid_done(sbi, inode->i_ino); - mark_inode_dirty(inode); d_tmpfile(dentry, inode); unlock_new_inode(inode); return 0; -unlock_out: - f2fs_put_page(page, 1); -remove_out: - remove_orphan_inode(sbi, inode->i_ino); + +release_out: + release_orphan_inode(sbi); out: f2fs_unlock_op(sbi); clear_nlink(inode); -- cgit v1.1 From a014e037be26b5c9ee6fb4e49e7804141cf3bb89 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 20 Jun 2014 21:44:02 -0700 Subject: f2fs: clean up an unused parameter and assignment This patch cleans up simple unnecessary codes. Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 2 +- fs/f2fs/f2fs.h | 2 +- fs/f2fs/namei.c | 26 +++++++++----------------- fs/f2fs/node.c | 2 +- 4 files changed, 12 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index fb7e01e..087b03d 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -338,7 +338,7 @@ static struct page *init_inode_metadata(struct inode *inode, int err; if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { - page = new_inode_page(inode, name); + page = new_inode_page(inode); if (IS_ERR(page)) return page; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ca421a8..3f0291b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1174,7 +1174,7 @@ int truncate_inode_blocks(struct inode *, pgoff_t); int truncate_xattr_node(struct inode *, struct page *); int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t); void remove_inode_page(struct inode *); -struct page *new_inode_page(struct inode *, const struct qstr *); +struct page *new_inode_page(struct inode *); struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); void ra_node_page(struct f2fs_sb_info *, nid_t); struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 1994d2e..89b0bd9 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -23,14 +23,13 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) { - struct super_block *sb = dir->i_sb; - struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); nid_t ino; struct inode *inode; bool nid_free = false; int err; - inode = new_inode(sb); + inode = new_inode(dir->i_sb); if (!inode) return ERR_PTR(-ENOMEM); @@ -103,8 +102,7 @@ static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode, static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { - struct super_block *sb = dir->i_sb; - struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); struct inode *inode; nid_t ino = 0; int err; @@ -147,8 +145,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { struct inode *inode = old_dentry->d_inode; - struct super_block *sb = dir->i_sb; - struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); int err; f2fs_balance_fs(sbi); @@ -208,8 +205,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, static int f2fs_unlink(struct inode *dir, struct dentry *dentry) { - struct super_block *sb = dir->i_sb; - struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); struct inode *inode = dentry->d_inode; struct f2fs_dir_entry *de; struct page *page; @@ -243,8 +239,7 @@ fail: static int f2fs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { - struct super_block *sb = dir->i_sb; - struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); struct inode *inode; size_t symlen = strlen(symname) + 1; int err; @@ -331,8 +326,7 @@ static int f2fs_rmdir(struct inode *dir, struct dentry *dentry) static int f2fs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { - struct super_block *sb = dir->i_sb; - struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); struct inode *inode; int err = 0; @@ -370,8 +364,7 @@ out: static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { - struct super_block *sb = old_dir->i_sb; - struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct f2fs_sb_info *sbi = F2FS_SB(old_dir->i_sb); struct inode *old_inode = old_dentry->d_inode; struct inode *new_inode = new_dentry->d_inode; struct page *old_dir_page; @@ -490,8 +483,7 @@ out: static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) { - struct super_block *sb = dir->i_sb; - struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); struct inode *inode; int err; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 4b697cc..de709f0 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -845,7 +845,7 @@ void remove_inode_page(struct inode *inode) truncate_node(&dn); } -struct page *new_inode_page(struct inode *inode, const struct qstr *name) +struct page *new_inode_page(struct inode *inode) { struct dnode_of_data dn; -- cgit v1.1 From aec71382c68135261ef6efc3d8a96b7149939446 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 24 Jun 2014 09:18:20 +0800 Subject: f2fs: refactor flush_nat_entries codes for reducing NAT writes Although building NAT journal in cursum reduce the read/write work for NAT block, but previous design leave us lower performance when write checkpoint frequently for these cases: 1. if journal in cursum has already full, it's a bit of waste that we flush all nat entries to page for persistence, but not to cache any entries. 2. if journal in cursum is not full, we fill nat entries to journal util journal is full, then flush the left dirty entries to disk without merge journaled entries, so these journaled entries may be flushed to disk at next checkpoint but lost chance to flushed last time. In this patch we merge dirty entries located in same NAT block to nat entry set, and linked all set to list, sorted ascending order by entries' count of set. Later we flush entries in sparse set into journal as many as we can, and then flush merged entries to disk. In this way we can not only gain in performance, but also save lifetime of flash device. In my testing environment, it shows this patch can help to reduce NAT block writes obviously. In hard disk test case: cost time of fsstress is stablely reduced by about 5%. 1. virtual machine + hard disk: fsstress -p 20 -n 200 -l 5 node num cp count nodes/cp based 4599.6 1803.0 2.551 patched 2714.6 1829.6 1.483 2. virtual machine + 32g micro SD card: fsstress -p 20 -n 200 -l 1 -w -f chown=0 -f creat=4 -f dwrite=0 -f fdatasync=4 -f fsync=4 -f link=0 -f mkdir=4 -f mknod=4 -f rename=5 -f rmdir=5 -f symlink=0 -f truncate=4 -f unlink=5 -f write=0 -S node num cp count nodes/cp based 84.5 43.7 1.933 patched 49.2 40.0 1.23 Our latency of merging op shows not bad when handling extreme case like: merging a great number of dirty nats: latency(ns) dirty nat count 3089219 24922 5129423 27422 4000250 24523 change log from v1: o fix wrong logic in add_nat_entry when grab a new nat entry set. o swith to create slab cache in create_node_manager_caches. o use GFP_ATOMIC instead of GFP_NOFS to avoid potential long latency. change log from v2: o make comment position more appropriate suggested by Jaegeuk Kim. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 + fs/f2fs/node.c | 263 +++++++++++++++++++++++++++++++++++++++------------------ fs/f2fs/node.h | 7 ++ 3 files changed, 188 insertions(+), 84 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3f0291b..ec480b1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -256,6 +256,8 @@ struct f2fs_nm_info { unsigned int nat_cnt; /* the # of cached nat entries */ struct list_head nat_entries; /* cached nat entry list (clean) */ struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */ + struct list_head nat_entry_set; /* nat entry set list */ + unsigned int dirty_nat_cnt; /* total num of nat entries in set */ /* free node ids management */ struct radix_tree_root free_nid_root;/* root of the free_nid cache */ diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index de709f0..a90f51d 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -25,6 +25,7 @@ static struct kmem_cache *nat_entry_slab; static struct kmem_cache *free_nid_slab; +static struct kmem_cache *nat_entry_set_slab; bool available_free_memory(struct f2fs_sb_info *sbi, int type) { @@ -90,12 +91,8 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) /* get current nat block page with lock */ src_page = get_meta_page(sbi, src_off); - - /* Dirty src_page means that it is already the new target NAT page. */ - if (PageDirty(src_page)) - return src_page; - dst_page = grab_meta_page(sbi, dst_off); + f2fs_bug_on(PageDirty(src_page)); src_addr = page_address(src_page); dst_addr = page_address(dst_page); @@ -1744,7 +1741,90 @@ skip: return err; } -static bool flush_nats_in_journal(struct f2fs_sb_info *sbi) +static struct nat_entry_set *grab_nat_entry_set(void) +{ + struct nat_entry_set *nes = + f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC); + + nes->entry_cnt = 0; + INIT_LIST_HEAD(&nes->set_list); + INIT_LIST_HEAD(&nes->entry_list); + return nes; +} + +static void release_nat_entry_set(struct nat_entry_set *nes, + struct f2fs_nm_info *nm_i) +{ + f2fs_bug_on(!list_empty(&nes->entry_list)); + + nm_i->dirty_nat_cnt -= nes->entry_cnt; + list_del(&nes->set_list); + kmem_cache_free(nat_entry_set_slab, nes); +} + +static void adjust_nat_entry_set(struct nat_entry_set *nes, + struct list_head *head) +{ + struct nat_entry_set *next = nes; + + if (list_is_last(&nes->set_list, head)) + return; + + list_for_each_entry_continue(next, head, set_list) + if (nes->entry_cnt <= next->entry_cnt) + break; + + list_move_tail(&nes->set_list, &next->set_list); +} + +static void add_nat_entry(struct nat_entry *ne, struct list_head *head) +{ + struct nat_entry_set *nes; + nid_t start_nid = START_NID(ne->ni.nid); + + list_for_each_entry(nes, head, set_list) { + if (nes->start_nid == start_nid) { + list_move_tail(&ne->list, &nes->entry_list); + nes->entry_cnt++; + adjust_nat_entry_set(nes, head); + return; + } + } + + nes = grab_nat_entry_set(); + + nes->start_nid = start_nid; + list_move_tail(&ne->list, &nes->entry_list); + nes->entry_cnt++; + list_add(&nes->set_list, head); +} + +static void merge_nats_in_set(struct f2fs_sb_info *sbi) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct list_head *dirty_list = &nm_i->dirty_nat_entries; + struct list_head *set_list = &nm_i->nat_entry_set; + struct nat_entry *ne, *tmp; + + write_lock(&nm_i->nat_tree_lock); + list_for_each_entry_safe(ne, tmp, dirty_list, list) { + if (nat_get_blkaddr(ne) == NEW_ADDR) + continue; + add_nat_entry(ne, set_list); + nm_i->dirty_nat_cnt++; + } + write_unlock(&nm_i->nat_tree_lock); +} + +static bool __has_cursum_space(struct f2fs_summary_block *sum, int size) +{ + if (nats_in_cursum(sum) + size <= NAT_JOURNAL_ENTRIES) + return true; + else + return false; +} + +static void remove_nats_in_journal(struct f2fs_sb_info *sbi) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); @@ -1752,12 +1832,6 @@ static bool flush_nats_in_journal(struct f2fs_sb_info *sbi) int i; mutex_lock(&curseg->curseg_mutex); - - if (nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) { - mutex_unlock(&curseg->curseg_mutex); - return false; - } - for (i = 0; i < nats_in_cursum(sum); i++) { struct nat_entry *ne; struct f2fs_nat_entry raw_ne; @@ -1767,23 +1841,21 @@ static bool flush_nats_in_journal(struct f2fs_sb_info *sbi) retry: write_lock(&nm_i->nat_tree_lock); ne = __lookup_nat_cache(nm_i, nid); - if (ne) { - __set_nat_cache_dirty(nm_i, ne); - write_unlock(&nm_i->nat_tree_lock); - continue; - } + if (ne) + goto found; + ne = grab_nat_entry(nm_i, nid); if (!ne) { write_unlock(&nm_i->nat_tree_lock); goto retry; } node_info_from_raw_nat(&ne->ni, &raw_ne); +found: __set_nat_cache_dirty(nm_i, ne); write_unlock(&nm_i->nat_tree_lock); } update_nats_in_cursum(sum, -i); mutex_unlock(&curseg->curseg_mutex); - return true; } /* @@ -1794,80 +1866,91 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) struct f2fs_nm_info *nm_i = NM_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); struct f2fs_summary_block *sum = curseg->sum_blk; - struct nat_entry *ne, *cur; - struct page *page = NULL; - struct f2fs_nat_block *nat_blk = NULL; - nid_t start_nid = 0, end_nid = 0; - bool flushed; - - flushed = flush_nats_in_journal(sbi); + struct nat_entry_set *nes, *tmp; + struct list_head *head = &nm_i->nat_entry_set; + bool to_journal = true; - if (!flushed) - mutex_lock(&curseg->curseg_mutex); + /* merge nat entries of dirty list to nat entry set temporarily */ + merge_nats_in_set(sbi); - /* 1) flush dirty nat caches */ - list_for_each_entry_safe(ne, cur, &nm_i->dirty_nat_entries, list) { - nid_t nid; - struct f2fs_nat_entry raw_ne; - int offset = -1; - - if (nat_get_blkaddr(ne) == NEW_ADDR) - continue; - - nid = nat_get_nid(ne); + /* + * if there are no enough space in journal to store dirty nat + * entries, remove all entries from journal and merge them + * into nat entry set. + */ + if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt)) { + remove_nats_in_journal(sbi); - if (flushed) - goto to_nat_page; + /* + * merge nat entries of dirty list to nat entry set temporarily + */ + merge_nats_in_set(sbi); + } - /* if there is room for nat enries in curseg->sumpage */ - offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1); - if (offset >= 0) { - raw_ne = nat_in_journal(sum, offset); - goto flush_now; - } -to_nat_page: - if (!page || (start_nid > nid || nid > end_nid)) { - if (page) { - f2fs_put_page(page, 1); - page = NULL; - } - start_nid = START_NID(nid); - end_nid = start_nid + NAT_ENTRY_PER_BLOCK - 1; + if (!nm_i->dirty_nat_cnt) + return; - /* - * get nat block with dirty flag, increased reference - * count, mapped and lock - */ + /* + * there are two steps to flush nat entries: + * #1, flush nat entries to journal in current hot data summary block. + * #2, flush nat entries to nat page. + */ + list_for_each_entry_safe(nes, tmp, head, set_list) { + struct f2fs_nat_block *nat_blk; + struct nat_entry *ne, *cur; + struct page *page; + nid_t start_nid = nes->start_nid; + + if (to_journal && !__has_cursum_space(sum, nes->entry_cnt)) + to_journal = false; + + if (to_journal) { + mutex_lock(&curseg->curseg_mutex); + } else { page = get_next_nat_page(sbi, start_nid); nat_blk = page_address(page); + f2fs_bug_on(!nat_blk); } - f2fs_bug_on(!nat_blk); - raw_ne = nat_blk->entries[nid - start_nid]; -flush_now: - raw_nat_from_node_info(&raw_ne, &ne->ni); - - if (offset < 0) { - nat_blk->entries[nid - start_nid] = raw_ne; - } else { - nat_in_journal(sum, offset) = raw_ne; - nid_in_journal(sum, offset) = cpu_to_le32(nid); - } + /* flush dirty nats in nat entry set */ + list_for_each_entry_safe(ne, cur, &nes->entry_list, list) { + struct f2fs_nat_entry *raw_ne; + nid_t nid = nat_get_nid(ne); + int offset; + + if (to_journal) { + offset = lookup_journal_in_cursum(sum, + NAT_JOURNAL, nid, 1); + f2fs_bug_on(offset < 0); + raw_ne = &nat_in_journal(sum, offset); + nid_in_journal(sum, offset) = cpu_to_le32(nid); + } else { + raw_ne = &nat_blk->entries[nid - start_nid]; + } + raw_nat_from_node_info(raw_ne, &ne->ni); - if (nat_get_blkaddr(ne) == NULL_ADDR && + if (nat_get_blkaddr(ne) == NULL_ADDR && add_free_nid(sbi, nid, false) <= 0) { - write_lock(&nm_i->nat_tree_lock); - __del_from_nat_cache(nm_i, ne); - write_unlock(&nm_i->nat_tree_lock); - } else { - write_lock(&nm_i->nat_tree_lock); - __clear_nat_cache_dirty(nm_i, ne); - write_unlock(&nm_i->nat_tree_lock); + write_lock(&nm_i->nat_tree_lock); + __del_from_nat_cache(nm_i, ne); + write_unlock(&nm_i->nat_tree_lock); + } else { + write_lock(&nm_i->nat_tree_lock); + __clear_nat_cache_dirty(nm_i, ne); + write_unlock(&nm_i->nat_tree_lock); + } } + + if (to_journal) + mutex_unlock(&curseg->curseg_mutex); + else + f2fs_put_page(page, 1); + + release_nat_entry_set(nes, nm_i); } - if (!flushed) - mutex_unlock(&curseg->curseg_mutex); - f2fs_put_page(page, 1); + + f2fs_bug_on(!list_empty(head)); + f2fs_bug_on(nm_i->dirty_nat_cnt); } static int init_node_manager(struct f2fs_sb_info *sbi) @@ -1896,6 +1979,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi) INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); INIT_LIST_HEAD(&nm_i->nat_entries); INIT_LIST_HEAD(&nm_i->dirty_nat_entries); + INIT_LIST_HEAD(&nm_i->nat_entry_set); mutex_init(&nm_i->build_lock); spin_lock_init(&nm_i->free_nid_list_lock); @@ -1976,19 +2060,30 @@ int __init create_node_manager_caches(void) nat_entry_slab = f2fs_kmem_cache_create("nat_entry", sizeof(struct nat_entry)); if (!nat_entry_slab) - return -ENOMEM; + goto fail; free_nid_slab = f2fs_kmem_cache_create("free_nid", sizeof(struct free_nid)); - if (!free_nid_slab) { - kmem_cache_destroy(nat_entry_slab); - return -ENOMEM; - } + if (!free_nid_slab) + goto destory_nat_entry; + + nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set", + sizeof(struct nat_entry_set)); + if (!nat_entry_set_slab) + goto destory_free_nid; return 0; + +destory_free_nid: + kmem_cache_destroy(free_nid_slab); +destory_nat_entry: + kmem_cache_destroy(nat_entry_slab); +fail: + return -ENOMEM; } void destroy_node_manager_caches(void) { + kmem_cache_destroy(nat_entry_set_slab); kmem_cache_destroy(free_nid_slab); kmem_cache_destroy(nat_entry_slab); } diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 7281112..8a116a4 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -89,6 +89,13 @@ enum mem_type { DIRTY_DENTS /* indicates dirty dentry pages */ }; +struct nat_entry_set { + struct list_head set_list; /* link with all nat sets */ + struct list_head entry_list; /* link with dirty nat entries */ + nid_t start_nid; /* start nid of nats in set */ + unsigned int entry_cnt; /* the # of nat entries in set */ +}; + /* * For free nid mangement */ -- cgit v1.1 From b434babf854eab82397759e98425423cd9bc4786 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Mon, 23 Jun 2014 18:39:15 +0200 Subject: f2fs: replace count*size kzalloc by kcalloc kcalloc manages count*sizeof overflow. Cc: Jaegeuk Kim Cc: linux-f2fs-devel@lists.sourceforge.net Signed-off-by: Fabian Frederick Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d04613d..a4f8375 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1703,7 +1703,7 @@ static int build_curseg(struct f2fs_sb_info *sbi) struct curseg_info *array; int i; - array = kzalloc(sizeof(*array) * NR_CURSEG_TYPE, GFP_KERNEL); + array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL); if (!array) return -ENOMEM; -- cgit v1.1 From 1256010ab1a372c8ffc01fc37ff767d8bc15378b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 24 Jun 2014 14:16:24 +0800 Subject: f2fs: reduce region of f2fs_lock_op covered for better concurrency In our rename process, region of f2fs_lock_op covered is too big as some of the code like f2fs_empty_dir/f2fs_find_entry are not needed to protect by this lock. So in the extreme case like doing checkpoint when we rename old inode to exist inode in a large directory could cause lower concurrency. Let's reduce the region of f2fs_lock_op to fix this. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 89b0bd9..1b3cae0 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -387,8 +387,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out_old; } - f2fs_lock_op(sbi); - if (new_inode) { err = -ENOTEMPTY; @@ -401,6 +399,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (!new_entry) goto out_dir; + f2fs_lock_op(sbi); + err = acquire_orphan_inode(sbi); if (err) goto put_out_dir; @@ -429,9 +429,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, update_inode_page(old_inode); update_inode_page(new_inode); } else { + f2fs_lock_op(sbi); + err = f2fs_add_link(new_dentry, old_inode); - if (err) + if (err) { + f2fs_unlock_op(sbi); goto out_dir; + } if (old_dir_entry) { inc_nlink(new_dir); @@ -466,6 +470,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, return 0; put_out_dir: + f2fs_unlock_op(sbi); kunmap(new_page); f2fs_put_page(new_page, 0); out_dir: @@ -473,7 +478,6 @@ out_dir: kunmap(old_dir_page); f2fs_put_page(old_dir_page, 0); } - f2fs_unlock_op(sbi); out_old: kunmap(old_page); f2fs_put_page(old_page, 0); -- cgit v1.1 From 34e6d456da4a61f655655a6d431657da5753913f Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Tue, 24 Jun 2014 18:18:14 +0800 Subject: f2fs: remove the redundant validation check of acl kernel side(xx_init_acl), the acl is get/cloned from the parent dir's, which is credible. So remove the redundant validation check of acl here. Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index dbe2141..83b9b5a 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -203,12 +203,6 @@ static int __f2fs_set_acl(struct inode *inode, int type, size_t size = 0; int error; - if (acl) { - error = posix_acl_valid(acl); - if (error < 0) - return error; - } - switch (type) { case ACL_TYPE_ACCESS: name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; -- cgit v1.1 From 1c3bb97899c91d420562c51882ee81a9d7c35306 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Fri, 27 Jun 2014 17:57:04 +0800 Subject: f2fs: remove the needless point-cast Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 087b03d..bcbfbc4 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -298,14 +298,13 @@ static int make_empty_dir(struct inode *inode, struct page *dentry_page; struct f2fs_dentry_block *dentry_blk; struct f2fs_dir_entry *de; - void *kaddr; dentry_page = get_new_data_page(inode, page, 0, true); if (IS_ERR(dentry_page)) return PTR_ERR(dentry_page); - kaddr = kmap_atomic(dentry_page); - dentry_blk = (struct f2fs_dentry_block *)kaddr; + + dentry_blk = kmap_atomic(dentry_page); de = &dentry_blk->dentry[0]; de->name_len = cpu_to_le16(1); @@ -323,7 +322,7 @@ static int make_empty_dir(struct inode *inode, test_and_set_bit_le(0, &dentry_blk->dentry_bitmap); test_and_set_bit_le(1, &dentry_blk->dentry_bitmap); - kunmap_atomic(kaddr); + kunmap_atomic(dentry_blk); set_page_dirty(dentry_page); f2fs_put_page(dentry_page, 1); @@ -570,14 +569,13 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, struct address_space *mapping = page->mapping; struct inode *dir = mapping->host; int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); - void *kaddr = page_address(page); int i; lock_page(page); f2fs_wait_on_page_writeback(page, DATA); - dentry_blk = (struct f2fs_dentry_block *)kaddr; - bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry; + dentry_blk = page_address(page); + bit_pos = dentry - dentry_blk->dentry; for (i = 0; i < slots; i++) test_and_clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); @@ -632,7 +630,6 @@ bool f2fs_empty_dir(struct inode *dir) unsigned long nblock = dir_blocks(dir); for (bidx = 0; bidx < nblock; bidx++) { - void *kaddr; dentry_page = get_lock_data_page(dir, bidx); if (IS_ERR(dentry_page)) { if (PTR_ERR(dentry_page) == -ENOENT) @@ -641,8 +638,8 @@ bool f2fs_empty_dir(struct inode *dir) return false; } - kaddr = kmap_atomic(dentry_page); - dentry_blk = (struct f2fs_dentry_block *)kaddr; + + dentry_blk = kmap_atomic(dentry_page); if (bidx == 0) bit_pos = 2; else @@ -650,7 +647,7 @@ bool f2fs_empty_dir(struct inode *dir) bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, NR_DENTRY_IN_BLOCK, bit_pos); - kunmap_atomic(kaddr); + kunmap_atomic(dentry_blk); f2fs_put_page(dentry_page, 1); -- cgit v1.1 From eee6160f2e324f0f6a626159fa1120caad31a6be Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Tue, 24 Jun 2014 18:21:23 +0800 Subject: f2fs: arguments cleanup of finding file flow functions Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 28 +++++++++++++--------------- fs/f2fs/f2fs.h | 2 +- fs/f2fs/hash.c | 4 +++- 3 files changed, 17 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index bcbfbc4..e84e880 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -77,8 +77,8 @@ static unsigned long dir_block_index(unsigned int level, return bidx; } -static bool early_match_name(const char *name, size_t namelen, - f2fs_hash_t namehash, struct f2fs_dir_entry *de) +static bool early_match_name(size_t namelen, f2fs_hash_t namehash, + struct f2fs_dir_entry *de) { if (le16_to_cpu(de->name_len) != namelen) return false; @@ -90,7 +90,7 @@ static bool early_match_name(const char *name, size_t namelen, } static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, - const char *name, size_t namelen, int *max_slots, + struct qstr *name, int *max_slots, f2fs_hash_t namehash, struct page **res_page) { struct f2fs_dir_entry *de; @@ -109,9 +109,10 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, continue; } de = &dentry_blk->dentry[bit_pos]; - if (early_match_name(name, namelen, namehash, de)) { + if (early_match_name(name->len, namehash, de)) { if (!memcmp(dentry_blk->filename[bit_pos], - name, namelen)) { + name->name, + name->len)) { *res_page = dentry_page; goto found; } @@ -132,10 +133,10 @@ found: } static struct f2fs_dir_entry *find_in_level(struct inode *dir, - unsigned int level, const char *name, size_t namelen, + unsigned int level, struct qstr *name, f2fs_hash_t namehash, struct page **res_page) { - int s = GET_DENTRY_SLOTS(namelen); + int s = GET_DENTRY_SLOTS(name->len); unsigned int nbucket, nblock; unsigned int bidx, end_block; struct page *dentry_page; @@ -160,8 +161,8 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, continue; } - de = find_in_block(dentry_page, name, namelen, - &max_slots, namehash, res_page); + de = find_in_block(dentry_page, name, &max_slots, + namehash, res_page); if (de) break; @@ -187,8 +188,6 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, struct qstr *child, struct page **res_page) { - const char *name = child->name; - size_t namelen = child->len; unsigned long npages = dir_blocks(dir); struct f2fs_dir_entry *de = NULL; f2fs_hash_t name_hash; @@ -200,12 +199,11 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, *res_page = NULL; - name_hash = f2fs_dentry_hash(name, namelen); + name_hash = f2fs_dentry_hash(child); max_depth = F2FS_I(dir)->i_current_depth; for (level = 0; level < max_depth; level++) { - de = find_in_level(dir, level, name, - namelen, name_hash, res_page); + de = find_in_level(dir, level, child, name_hash, res_page); if (de) break; } @@ -460,7 +458,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, int err = 0; int i; - dentry_hash = f2fs_dentry_hash(name->name, name->len); + dentry_hash = f2fs_dentry_hash(name); level = 0; current_depth = F2FS_I(dir)->i_current_depth; if (F2FS_I(dir)->chash == dentry_hash) { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ec480b1..ae3b4ac 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1158,7 +1158,7 @@ void f2fs_msg(struct super_block *, const char *, const char *, ...); /* * hash.c */ -f2fs_hash_t f2fs_dentry_hash(const char *, size_t); +f2fs_hash_t f2fs_dentry_hash(const struct qstr *); /* * node.c diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index 6eb8d26..948d17bf 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -69,12 +69,14 @@ static void str2hashbuf(const char *msg, size_t len, unsigned int *buf, int num) *buf++ = pad; } -f2fs_hash_t f2fs_dentry_hash(const char *name, size_t len) +f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info) { __u32 hash; f2fs_hash_t f2fs_hash; const char *p; __u32 in[8], buf[4]; + const char *name = name_info->name; + size_t len = name_info->len; if ((len <= 2) && (name[0] == '.') && (name[1] == '.' || name[1] == '\0')) -- cgit v1.1 From 3aab8f828ef358ae92545294a14cd52d510cc585 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 2 Jul 2014 13:25:04 +0800 Subject: f2fs: introduce f2fs_write_failed to handle error case when write When we fail in ->write_begin()/->direct_IO(), our allocated node block in disk and page cache are still kept, despite these may not be used again. This patch introduce f2fs_write_failed() to handle the error case of these two interfaces, it will truncate page cache and blocks of this file according to i_size. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 42 +++++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 2eb2764..05154d6d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -914,6 +914,16 @@ skip_write: return 0; } +static void f2fs_write_failed(struct address_space *mapping, loff_t to) +{ + struct inode *inode = mapping->host; + + if (to > inode->i_size) { + truncate_pagecache(inode, inode->i_size); + truncate_blocks(inode, inode->i_size); + } +} + static int f2fs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -931,11 +941,13 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, repeat: err = f2fs_convert_inline_data(inode, pos + len); if (err) - return err; + goto fail; page = grab_cache_page_write_begin(mapping, index, flags); - if (!page) - return -ENOMEM; + if (!page) { + err = -ENOMEM; + goto fail; + } /* to avoid latency during memory pressure */ unlock_page(page); @@ -949,10 +961,9 @@ repeat: set_new_dnode(&dn, inode, NULL, NULL, 0); err = f2fs_reserve_block(&dn, index); f2fs_unlock_op(sbi); - if (err) { f2fs_put_page(page, 0); - return err; + goto fail; } inline_data: lock_page(page); @@ -982,19 +993,20 @@ inline_data: err = f2fs_read_inline_data(inode, page); if (err) { page_cache_release(page); - return err; + goto fail; } } else { err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, READ_SYNC); if (err) - return err; + goto fail; } lock_page(page); if (unlikely(!PageUptodate(page))) { f2fs_put_page(page, 1); - return -EIO; + err = -EIO; + goto fail; } if (unlikely(page->mapping != mapping)) { f2fs_put_page(page, 1); @@ -1005,6 +1017,9 @@ out: SetPageUptodate(page); clear_cold_data(page); return 0; +fail: + f2fs_write_failed(mapping, pos + len); + return err; } static int f2fs_write_end(struct file *file, @@ -1049,7 +1064,10 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + size_t count = iov_iter_count(iter); + int err; /* Let buffer I/O handle the inline data case. */ if (f2fs_has_inline_data(inode)) @@ -1061,8 +1079,10 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, /* clear fsync mark to recover these blocks */ fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino); - return blockdev_direct_IO(rw, iocb, inode, iter, offset, - get_data_block); + err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block); + if (err < 0 && (rw & WRITE)) + f2fs_write_failed(mapping, offset + count); + return err; } static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, -- cgit v1.1 From 6b2920a513ec9972f7b80d219fcf6f59130a1f31 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 7 Jul 2014 11:21:59 +0800 Subject: f2fs: use inner macro and function to clean up codes In this patch we use below inner macro and function to clean up codes. 1. ADDRS_PER_PAGE 2. SM_I 3. f2fs_readonly Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 3 +-- fs/f2fs/segment.c | 9 ++++----- fs/f2fs/super.c | 7 +++---- 3 files changed, 8 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 36fa505..7c652b3 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -272,8 +272,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) } } - end_offset = IS_INODE(dn.node_page) ? - ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK; + end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); /* find data/hole in dnode block */ for (; dn.ofs_in_node < end_offset; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a4f8375..8a6e57d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -272,13 +272,13 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi) return -ENOMEM; spin_lock_init(&fcc->issue_lock); init_waitqueue_head(&fcc->flush_wait_queue); - sbi->sm_info->cmd_control_info = fcc; + SM_I(sbi)->cmd_control_info = fcc; fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(fcc->f2fs_issue_flush)) { err = PTR_ERR(fcc->f2fs_issue_flush); kfree(fcc); - sbi->sm_info->cmd_control_info = NULL; + SM_I(sbi)->cmd_control_info = NULL; return err; } @@ -287,13 +287,12 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi) void destroy_flush_cmd_control(struct f2fs_sb_info *sbi) { - struct flush_cmd_control *fcc = - sbi->sm_info->cmd_control_info; + struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; if (fcc && fcc->f2fs_issue_flush) kthread_stop(fcc->f2fs_issue_flush); kfree(fcc); - sbi->sm_info->cmd_control_info = NULL; + SM_I(sbi)->cmd_control_info = NULL; } static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8f96d93..870fe19 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -615,7 +615,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * Previous and new state of filesystem is RO, * so skip checking GC and FLUSH_MERGE conditions. */ - if ((sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) + if (f2fs_readonly(sb) && (*flags & MS_RDONLY)) goto skip; /* @@ -642,8 +642,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) */ if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { destroy_flush_cmd_control(sbi); - } else if (test_opt(sbi, FLUSH_MERGE) && - !sbi->sm_info->cmd_control_info) { + } else if (test_opt(sbi, FLUSH_MERGE) && !SM_I(sbi)->cmd_control_info) { err = create_flush_cmd_control(sbi); if (err) goto restore_gc; @@ -1082,7 +1081,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) * If filesystem is not mounted as read-only then * do start the gc_thread. */ - if (!(sb->s_flags & MS_RDONLY)) { + if (!f2fs_readonly(sb)) { /* After POR, we can run background GC thread.*/ err = start_gc_thread(sbi); if (err) -- cgit v1.1 From 81e366f87f52c62671fb1d8050572e68dbcf1d22 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 10 Jul 2014 12:37:46 +0800 Subject: f2fs: check name_len of dir entry to prevent from deadloop We assume that modification of some special application could result in zeroed name_len, or it is consciously made by somebody. We will deadloop in find_in_block when name_len of dir entry is zero. This patch is added for preventing deadloop in above scenario. change log from v1: o use f2fs_bug_on rather than break out from searching dir entry suggested by Jaegeuk Kim. Jaegeuk describe: "Well, IMO, it would be good to add f2fs_bug_on() here with a specific comment. In the current phase of f2fs, it is more important to investigate the file system bugs, rather than workarounds for any corrupted images. And, definitely it needs to stop the kernel if any corrupted image was mounted, so that we can figure out where the bugs are occurred." Suggested-by: Jaegeuk Kim Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index e84e880..bcf893c 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -121,6 +121,13 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, *max_slots = max_len; max_len = 0; } + + /* + * For the most part, it should be a bug when name_len is zero. + * We stop here for figuring out where the bugs are occurred. + */ + f2fs_bug_on(!de->name_len); + bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); } -- cgit v1.1 From 7a6c76b1b2f2c9555571647f07fc7ccada0a1b0a Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Fri, 11 Jul 2014 18:35:43 +0800 Subject: f2fs: cleanup the needless return of f2fs_create_root_stats Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index b52c12c..3f99266 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -345,21 +345,14 @@ void __init f2fs_create_root_stats(void) f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL); if (!f2fs_debugfs_root) - goto bail; + return; file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root, NULL, &stat_fops); - if (!file) - goto free_debugfs_dir; - - return; - -free_debugfs_dir: - debugfs_remove(f2fs_debugfs_root); - -bail: - f2fs_debugfs_root = NULL; - return; + if (!file) { + debugfs_remove(f2fs_debugfs_root); + f2fs_debugfs_root = NULL; + } } void f2fs_destroy_root_stats(void) -- cgit v1.1 From 4b2868aa4f07eeb64e96c2e0823387c426eca356 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Fri, 11 Jul 2014 18:35:44 +0800 Subject: f2fs: remove the unused stat_lock Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ae3b4ac..8f507d4 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1298,7 +1298,6 @@ bool space_for_roll_forward(struct f2fs_sb_info *); struct f2fs_stat_info { struct list_head stat_list; struct f2fs_sb_info *sbi; - struct mutex stat_lock; int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs; int main_area_segs, main_area_sections, main_area_zones; int hit_ext, total_ext; -- cgit v1.1 From f1121ab0ba9ab9c1592049533b511877600f409e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 14 Jul 2014 16:45:15 +0800 Subject: f2fs: reduce searching region of segmap when free section In __set_test_and_free we will check whether all segment are free in one section When free one segment, in order to set section to free status. But the searching region of segmap is from start segno to last segno of f2fs, it's not necessary. So let's just only check all segment bitmap of target section. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 7091204..ee5c75e 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -347,8 +347,8 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi, if (test_and_clear_bit(segno, free_i->free_segmap)) { free_i->free_segments++; - next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi), - start_segno); + next = find_next_bit(free_i->free_segmap, + start_segno + sbi->segs_per_sec, start_segno); if (next >= start_segno + sbi->segs_per_sec) { if (test_and_clear_bit(secno, free_i->free_secmap)) free_i->free_sections++; -- cgit v1.1 From 79e35dc3c23dd2ac9f8681361026c82b71a0b006 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Sat, 12 Jul 2014 20:10:00 +0800 Subject: f2fs: add f2fs_balance_fs for direct IO Otherwise, if a large amount of direct IO writes were done, the segment allocation may be failed because no enough segments are gced. Changes: v2: add f2fs_balance_fs into __get_data_block instead of f2fs_direct_IO. Signed-off-by: Huang, Ying Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 05154d6d..c77c667 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -626,8 +626,10 @@ static int __get_data_block(struct inode *inode, sector_t iblock, if (check_extent_cache(inode, pgofs, bh_result)) goto out; - if (create) + if (create) { + f2fs_balance_fs(sbi); f2fs_lock_op(sbi); + } /* When reading holes, we need its node page */ set_new_dnode(&dn, inode, NULL, NULL, 0); -- cgit v1.1 From 32f9bc25cbda00410e2379c58ae027e88bf24770 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 12 Jul 2014 19:13:54 +0800 Subject: f2fs: support ->rename2() Now new interface ->rename2() is added to VFS, here are related description: https://lkml.org/lkml/2014/2/7/873 https://lkml.org/lkml/2014/2/7/758 This patch adds function f2fs_rename2() to support ->rename2() including handling both RENAME_EXCHANGE and RENAME_NOREPLACE flag. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 164 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 164 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 1b3cae0..27b0377 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -485,6 +485,169 @@ out: return err; } +static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + struct super_block *sb = old_dir->i_sb; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct inode *old_inode = old_dentry->d_inode; + struct inode *new_inode = new_dentry->d_inode; + struct page *old_dir_page, *new_dir_page; + struct page *old_page, *new_page; + struct f2fs_dir_entry *old_dir_entry = NULL, *new_dir_entry = NULL; + struct f2fs_dir_entry *old_entry, *new_entry; + int old_nlink = 0, new_nlink = 0; + int err = -ENOENT; + + f2fs_balance_fs(sbi); + + old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); + if (!old_entry) + goto out; + + new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, &new_page); + if (!new_entry) + goto out_old; + + /* prepare for updating ".." directory entry info later */ + if (old_dir != new_dir) { + if (S_ISDIR(old_inode->i_mode)) { + err = -EIO; + old_dir_entry = f2fs_parent_dir(old_inode, + &old_dir_page); + if (!old_dir_entry) + goto out_new; + } + + if (S_ISDIR(new_inode->i_mode)) { + err = -EIO; + new_dir_entry = f2fs_parent_dir(new_inode, + &new_dir_page); + if (!new_dir_entry) + goto out_old_dir; + } + } + + /* + * If cross rename between file and directory those are not + * in the same directory, we will inc nlink of file's parent + * later, so we should check upper boundary of its nlink. + */ + if ((!old_dir_entry || !new_dir_entry) && + old_dir_entry != new_dir_entry) { + old_nlink = old_dir_entry ? -1 : 1; + new_nlink = -old_nlink; + err = -EMLINK; + if ((old_nlink > 0 && old_inode->i_nlink >= F2FS_LINK_MAX) || + (new_nlink > 0 && new_inode->i_nlink >= F2FS_LINK_MAX)) + goto out_new_dir; + } + + f2fs_lock_op(sbi); + + err = update_dent_inode(old_inode, &new_dentry->d_name); + if (err) + goto out_unlock; + + err = update_dent_inode(new_inode, &old_dentry->d_name); + if (err) + goto out_undo; + + /* update ".." directory entry info of old dentry */ + if (old_dir_entry) + f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir); + + /* update ".." directory entry info of new dentry */ + if (new_dir_entry) + f2fs_set_link(new_inode, new_dir_entry, new_dir_page, old_dir); + + /* update directory entry info of old dir inode */ + f2fs_set_link(old_dir, old_entry, old_page, new_inode); + + down_write(&F2FS_I(old_inode)->i_sem); + file_lost_pino(old_inode); + up_write(&F2FS_I(old_inode)->i_sem); + + update_inode_page(old_inode); + + old_dir->i_ctime = CURRENT_TIME; + if (old_nlink) { + down_write(&F2FS_I(old_dir)->i_sem); + if (old_nlink < 0) + drop_nlink(old_dir); + else + inc_nlink(old_dir); + up_write(&F2FS_I(old_dir)->i_sem); + } + mark_inode_dirty(old_dir); + update_inode_page(old_dir); + + /* update directory entry info of new dir inode */ + f2fs_set_link(new_dir, new_entry, new_page, old_inode); + + down_write(&F2FS_I(new_inode)->i_sem); + file_lost_pino(new_inode); + up_write(&F2FS_I(new_inode)->i_sem); + + update_inode_page(new_inode); + + new_dir->i_ctime = CURRENT_TIME; + if (new_nlink) { + down_write(&F2FS_I(new_dir)->i_sem); + if (new_nlink < 0) + drop_nlink(new_dir); + else + inc_nlink(new_dir); + up_write(&F2FS_I(new_dir)->i_sem); + } + mark_inode_dirty(new_dir); + update_inode_page(new_dir); + + f2fs_unlock_op(sbi); + return 0; +out_undo: + /* Still we may fail to recover name info of f2fs_inode here */ + update_dent_inode(old_inode, &old_dentry->d_name); +out_unlock: + f2fs_unlock_op(sbi); +out_new_dir: + if (new_dir_entry) { + kunmap(new_dir_page); + f2fs_put_page(new_dir_page, 0); + } +out_old_dir: + if (old_dir_entry) { + kunmap(old_dir_page); + f2fs_put_page(old_dir_page, 0); + } +out_new: + kunmap(new_page); + f2fs_put_page(new_page, 0); +out_old: + kunmap(old_page); + f2fs_put_page(old_page, 0); +out: + return err; +} + +static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) + return -EINVAL; + + if (flags & RENAME_EXCHANGE) { + return f2fs_cross_rename(old_dir, old_dentry, + new_dir, new_dentry); + } + /* + * VFS has already handled the new dentry existence case, + * here, we just deal with "RENAME_NOREPLACE" as regular rename. + */ + return f2fs_rename(old_dir, old_dentry, new_dir, new_dentry); +} + static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) { struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); @@ -542,6 +705,7 @@ const struct inode_operations f2fs_dir_inode_operations = { .rmdir = f2fs_rmdir, .mknod = f2fs_mknod, .rename = f2fs_rename, + .rename2 = f2fs_rename2, .tmpfile = f2fs_tmpfile, .getattr = f2fs_getattr, .setattr = f2fs_setattr, -- cgit v1.1 From dbf20cb259e879e2d939fd3fd5c792732d845195 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 25 Jul 2014 12:00:57 +0800 Subject: f2fs: avoid use invalid mapping of node_inode when evict meta inode Andrey Tsyvarev reported: "Using memory error detector reveals the following use-after-free error in 3.15.0: AddressSanitizer: heap-use-after-free in f2fs_evict_inode Read of size 8 by thread T22279: [] f2fs_evict_inode+0x102/0x2e0 [f2fs] [] evict+0x15f/0x290 [< inlined >] iput+0x196/0x280 iput_final [] iput+0x196/0x280 [] f2fs_put_super+0xd6/0x170 [f2fs] [] generic_shutdown_super+0xc5/0x1b0 [] kill_block_super+0x4d/0xb0 [] deactivate_locked_super+0x66/0x80 [] deactivate_super+0x68/0x80 [] mntput_no_expire+0x198/0x250 [< inlined >] SyS_umount+0xe9/0x1a0 SYSC_umount [] SyS_umount+0xe9/0x1a0 [] system_call_fastpath+0x16/0x1b Freed by thread T3: [] f2fs_i_callback+0x27/0x30 [f2fs] [< inlined >] rcu_process_callbacks+0x2d6/0x930 __rcu_reclaim [< inlined >] rcu_process_callbacks+0x2d6/0x930 rcu_do_batch [< inlined >] rcu_process_callbacks+0x2d6/0x930 invoke_rcu_callbacks [< inlined >] rcu_process_callbacks+0x2d6/0x930 __rcu_process_callbacks [] rcu_process_callbacks+0x2d6/0x930 [] __do_softirq+0x142/0x380 [] run_ksoftirqd+0x30/0x50 [] smpboot_thread_fn+0x197/0x280 [] kthread+0x148/0x160 [] ret_from_fork+0x7c/0xb0 Allocated by thread T22276: [] f2fs_alloc_inode+0x2d/0x170 [f2fs] [] iget_locked+0x10a/0x230 [] f2fs_iget+0x35/0xa80 [f2fs] [] f2fs_fill_super+0xb53/0xff0 [f2fs] [] mount_bdev+0x1de/0x240 [] f2fs_mount+0x10/0x20 [f2fs] [] mount_fs+0x55/0x220 [] vfs_kern_mount+0x66/0x200 [< inlined >] do_mount+0x2b4/0x1120 do_new_mount [] do_mount+0x2b4/0x1120 [< inlined >] SyS_mount+0xb2/0x110 SYSC_mount [] SyS_mount+0xb2/0x110 [] system_call_fastpath+0x16/0x1b The buggy address ffff8800587866c8 is located 48 bytes inside of 680-byte region [ffff880058786698, ffff880058786940) Memory state around the buggy address: ffff880058786100: ffffffff ffffffff ffffffff ffffffff ffff880058786200: ffffffff ffffffff ffffffrr rrrrrrrr ffff880058786300: rrrrrrrr rrffffff ffffffff ffffffff ffff880058786400: ffffffff ffffffff ffffffff ffffffff ffff880058786500: ffffffff ffffffff ffffffff fffffffr >ffff880058786600: rrrrrrrr rrrrrrrr rrrfffff ffffffff ^ ffff880058786700: ffffffff ffffffff ffffffff ffffffff ffff880058786800: ffffffff ffffffff ffffffff ffffffff ffff880058786900: ffffffff rrrrrrrr rrrrrrrr rrrr.... ffff880058786a00: ........ ........ ........ ........ ffff880058786b00: ........ ........ ........ ........ Legend: f - 8 freed bytes r - 8 redzone bytes . - 8 allocated bytes x=1..7 - x allocated bytes + (8-x) redzone bytes Investigation shows, that f2fs_evict_inode, when called for 'meta_inode', uses invalidate_mapping_pages() for 'node_inode'. But 'node_inode' is deleted before 'meta_inode' in f2fs_put_super via iput(). It seems that in common usage scenario this use-after-free is benign, because 'node_inode' remains partially valid data even after kmem_cache_free(). But things may change if, while 'meta_inode' is evicted in one f2fs filesystem, another (mounted) f2fs filesystem requests inode from cache, and formely 'node_inode' of the first filesystem is returned." Nids for both meta_inode and node_inode are reservation, so it's not necessary for us to invalidate pages which will never be allocated. To fix this issue, let's skipping needlessly invalidating pages for {meta,node}_inode in f2fs_evict_inode. Reported-by: Andrey Tsyvarev Tested-by: Andrey Tsyvarev Signed-off-by: Gu Zheng Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 2cf6962..cafba3c 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -273,7 +273,7 @@ void f2fs_evict_inode(struct inode *inode) if (inode->i_ino == F2FS_NODE_INO(sbi) || inode->i_ino == F2FS_META_INO(sbi)) - goto no_delete; + goto out_clear; f2fs_bug_on(get_dirty_dents(inode)); remove_dirty_dir_inode(inode); @@ -295,6 +295,7 @@ void f2fs_evict_inode(struct inode *inode) sb_end_intwrite(inode->i_sb); no_delete: - clear_inode(inode); invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); +out_clear: + clear_inode(inode); } -- cgit v1.1 From 9d847950770da7102d4efc02d0939ce28e3a7dd0 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 25 Jul 2014 12:55:09 +0800 Subject: f2fs: fix to put root inode in error path of fill_super We should put root inode correctly in error path of fill_super, otherwise we may encounter a leak case of inode resource. Signed-off-by: Chao Yu Reviewed-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 870fe19..34649aa 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1033,8 +1033,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) goto free_node_inode; } if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { + iput(root); err = -EINVAL; - goto free_root_inode; + goto free_node_inode; } sb->s_root = d_make_root(root); /* allocate root dentry */ -- cgit v1.1 From 0f7b2abd188089a44f60e2bf8521d1363ada9e12 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 23 Jul 2014 09:57:31 -0700 Subject: f2fs: add nobarrier mount option This patch adds a mount option, nobarrier, in f2fs. The assumption in here is that file system keeps the IO ordering, but doesn't care about cache flushes inside the storages. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 5 ++++- fs/f2fs/f2fs.h | 1 + fs/f2fs/segment.c | 3 +++ fs/f2fs/super.c | 7 +++++++ 4 files changed, 15 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c77c667..482313d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -139,7 +139,10 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, /* change META to META_FLUSH in the checkpoint procedure */ if (type >= META_FLUSH) { io->fio.type = META_FLUSH; - io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO; + if (test_opt(sbi, NOBARRIER)) + io->fio.rw = WRITE_FLUSH | REQ_META | REQ_PRIO; + else + io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO; } __submit_merged_bio(io); up_write(&io->io_rwsem); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8f507d4..e999eec 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -41,6 +41,7 @@ #define F2FS_MOUNT_INLINE_XATTR 0x00000080 #define F2FS_MOUNT_INLINE_DATA 0x00000100 #define F2FS_MOUNT_FLUSH_MERGE 0x00000200 +#define F2FS_MOUNT_NOBARRIER 0x00000400 #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) #define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8a6e57d..9fce0f47 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -239,6 +239,9 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; struct flush_cmd cmd; + if (test_opt(sbi, NOBARRIER)) + return 0; + if (!test_opt(sbi, FLUSH_MERGE)) return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 34649aa..93593ce 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -52,6 +52,7 @@ enum { Opt_inline_xattr, Opt_inline_data, Opt_flush_merge, + Opt_nobarrier, Opt_err, }; @@ -69,6 +70,7 @@ static match_table_t f2fs_tokens = { {Opt_inline_xattr, "inline_xattr"}, {Opt_inline_data, "inline_data"}, {Opt_flush_merge, "flush_merge"}, + {Opt_nobarrier, "nobarrier"}, {Opt_err, NULL}, }; @@ -339,6 +341,9 @@ static int parse_options(struct super_block *sb, char *options) case Opt_flush_merge: set_opt(sbi, FLUSH_MERGE); break; + case Opt_nobarrier: + set_opt(sbi, NOBARRIER); + break; default: f2fs_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" or missing value", @@ -544,6 +549,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",inline_data"); if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE)) seq_puts(seq, ",flush_merge"); + if (test_opt(sbi, NOBARRIER)) + seq_puts(seq, ",nobarrier"); seq_printf(seq, ",active_logs=%u", sbi->active_logs); return 0; -- cgit v1.1 From 953e6cc6bcb615dfa373320ffa62b574c6be608a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 25 Jul 2014 15:47:16 -0700 Subject: f2fs: punch the core function for inode management This patch punches out the core functions to manage the inode numbers. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 81 ++++++++++++++++++++++++++++------------------------ 1 file changed, 44 insertions(+), 37 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 0b4710c..1c6eb6b 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -282,6 +282,47 @@ const struct address_space_operations f2fs_meta_aops = { .set_page_dirty = f2fs_set_meta_page_dirty, }; +static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino) +{ + struct list_head *head; + struct orphan_inode_entry *new, *e; + + new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC); + new->ino = ino; + + spin_lock(&sbi->orphan_inode_lock); + list_for_each_entry(e, &sbi->orphan_inode_list, list) { + if (e->ino == ino) { + spin_unlock(&sbi->orphan_inode_lock); + kmem_cache_free(orphan_entry_slab, new); + return; + } + if (e->ino > ino) + break; + } + + /* add new entry into list which is sorted by inode number */ + list_add_tail(&new->list, &e->list); + spin_unlock(&sbi->orphan_inode_lock); +} + +static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino) +{ + struct orphan_inode_entry *e; + + spin_lock(&sbi->orphan_inode_lock); + list_for_each_entry(e, &sbi->orphan_inode_list, list) { + if (e->ino == ino) { + list_del(&e->list); + sbi->n_orphans--; + spin_unlock(&sbi->orphan_inode_lock); + kmem_cache_free(orphan_entry_slab, e); + return; + } + } + spin_unlock(&sbi->orphan_inode_lock); +} + int acquire_orphan_inode(struct f2fs_sb_info *sbi) { int err = 0; @@ -306,48 +347,14 @@ void release_orphan_inode(struct f2fs_sb_info *sbi) void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) { - struct list_head *head; - struct orphan_inode_entry *new, *orphan; - - new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC); - new->ino = ino; - - spin_lock(&sbi->orphan_inode_lock); - head = &sbi->orphan_inode_list; - list_for_each_entry(orphan, head, list) { - if (orphan->ino == ino) { - spin_unlock(&sbi->orphan_inode_lock); - kmem_cache_free(orphan_entry_slab, new); - return; - } - - if (orphan->ino > ino) - break; - } - /* add new orphan entry into list which is sorted by inode number */ - list_add_tail(&new->list, &orphan->list); - spin_unlock(&sbi->orphan_inode_lock); + __add_ino_entry(sbi, ino); } void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) { - struct list_head *head; - struct orphan_inode_entry *orphan; - - spin_lock(&sbi->orphan_inode_lock); - head = &sbi->orphan_inode_list; - list_for_each_entry(orphan, head, list) { - if (orphan->ino == ino) { - list_del(&orphan->list); - f2fs_bug_on(sbi->n_orphans == 0); - sbi->n_orphans--; - spin_unlock(&sbi->orphan_inode_lock); - kmem_cache_free(orphan_entry_slab, orphan); - return; - } - } - spin_unlock(&sbi->orphan_inode_lock); + /* remove orphan entry from orphan list */ + __remove_ino_entry(sbi, ino); } static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) -- cgit v1.1 From 6451e041c8d39daf39c71eefe839641c2093713e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 25 Jul 2014 15:47:17 -0700 Subject: f2fs: add infra for ino management This patch changes the naming of orphan-related data structures to use as inode numbers managed globally. Later, we can use this facility for managing any inode number lists. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 76 +++++++++++++++++++++++++++------------------------- fs/f2fs/debug.c | 2 +- fs/f2fs/f2fs.h | 19 ++++++++----- fs/f2fs/super.c | 2 +- 4 files changed, 55 insertions(+), 44 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 1c6eb6b..f93d154 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -22,7 +22,7 @@ #include "segment.h" #include -static struct kmem_cache *orphan_entry_slab; +static struct kmem_cache *ino_entry_slab; static struct kmem_cache *inode_entry_slab; /* @@ -282,19 +282,18 @@ const struct address_space_operations f2fs_meta_aops = { .set_page_dirty = f2fs_set_meta_page_dirty, }; -static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino) +static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) { - struct list_head *head; - struct orphan_inode_entry *new, *e; + struct ino_entry *new, *e; - new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC); + new = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC); new->ino = ino; - spin_lock(&sbi->orphan_inode_lock); - list_for_each_entry(e, &sbi->orphan_inode_list, list) { + spin_lock(&sbi->ino_lock[type]); + list_for_each_entry(e, &sbi->ino_list[type], list) { if (e->ino == ino) { - spin_unlock(&sbi->orphan_inode_lock); - kmem_cache_free(orphan_entry_slab, new); + spin_unlock(&sbi->ino_lock[type]); + kmem_cache_free(ino_entry_slab, new); return; } if (e->ino > ino) @@ -303,58 +302,58 @@ static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino) /* add new entry into list which is sorted by inode number */ list_add_tail(&new->list, &e->list); - spin_unlock(&sbi->orphan_inode_lock); + spin_unlock(&sbi->ino_lock[type]); } -static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino) +static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) { - struct orphan_inode_entry *e; + struct ino_entry *e; - spin_lock(&sbi->orphan_inode_lock); - list_for_each_entry(e, &sbi->orphan_inode_list, list) { + spin_lock(&sbi->ino_lock[type]); + list_for_each_entry(e, &sbi->ino_list[type], list) { if (e->ino == ino) { list_del(&e->list); sbi->n_orphans--; - spin_unlock(&sbi->orphan_inode_lock); - kmem_cache_free(orphan_entry_slab, e); + spin_unlock(&sbi->ino_lock[type]); + kmem_cache_free(ino_entry_slab, e); return; } } - spin_unlock(&sbi->orphan_inode_lock); + spin_unlock(&sbi->ino_lock[type]); } int acquire_orphan_inode(struct f2fs_sb_info *sbi) { int err = 0; - spin_lock(&sbi->orphan_inode_lock); + spin_lock(&sbi->ino_lock[ORPHAN_INO]); if (unlikely(sbi->n_orphans >= sbi->max_orphans)) err = -ENOSPC; else sbi->n_orphans++; - spin_unlock(&sbi->orphan_inode_lock); + spin_unlock(&sbi->ino_lock[ORPHAN_INO]); return err; } void release_orphan_inode(struct f2fs_sb_info *sbi) { - spin_lock(&sbi->orphan_inode_lock); + spin_lock(&sbi->ino_lock[ORPHAN_INO]); f2fs_bug_on(sbi->n_orphans == 0); sbi->n_orphans--; - spin_unlock(&sbi->orphan_inode_lock); + spin_unlock(&sbi->ino_lock[ORPHAN_INO]); } void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) { /* add new orphan entry into list which is sorted by inode number */ - __add_ino_entry(sbi, ino); + __add_ino_entry(sbi, ino, ORPHAN_INO); } void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) { /* remove orphan entry from orphan list */ - __remove_ino_entry(sbi, ino); + __remove_ino_entry(sbi, ino, ORPHAN_INO); } static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) @@ -408,14 +407,14 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans + (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK); struct page *page = NULL; - struct orphan_inode_entry *orphan = NULL; + struct ino_entry *orphan = NULL; for (index = 0; index < orphan_blocks; index++) grab_meta_page(sbi, start_blk + index); index = 1; - spin_lock(&sbi->orphan_inode_lock); - head = &sbi->orphan_inode_list; + spin_lock(&sbi->ino_lock[ORPHAN_INO]); + head = &sbi->ino_list[ORPHAN_INO]; /* loop for each orphan inode entry and write them in Jornal block */ list_for_each_entry(orphan, head, list) { @@ -455,7 +454,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) f2fs_put_page(page, 1); } - spin_unlock(&sbi->orphan_inode_lock); + spin_unlock(&sbi->ino_lock[ORPHAN_INO]); } static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, @@ -939,31 +938,36 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint"); } -void init_orphan_info(struct f2fs_sb_info *sbi) +void init_ino_entry_info(struct f2fs_sb_info *sbi) { - spin_lock_init(&sbi->orphan_inode_lock); - INIT_LIST_HEAD(&sbi->orphan_inode_list); - sbi->n_orphans = 0; + int i; + + for (i = 0; i < MAX_INO_ENTRY; i++) { + spin_lock_init(&sbi->ino_lock[i]); + INIT_LIST_HEAD(&sbi->ino_list[i]); + } + /* * considering 512 blocks in a segment 8 blocks are needed for cp * and log segment summaries. Remaining blocks are used to keep * orphan entries with the limitation one reserved segment * for cp pack we can have max 1020*504 orphan entries */ + sbi->n_orphans = 0; sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK; } int __init create_checkpoint_caches(void) { - orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry", - sizeof(struct orphan_inode_entry)); - if (!orphan_entry_slab) + ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry", + sizeof(struct ino_entry)); + if (!ino_entry_slab) return -ENOMEM; inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry", sizeof(struct dir_inode_entry)); if (!inode_entry_slab) { - kmem_cache_destroy(orphan_entry_slab); + kmem_cache_destroy(ino_entry_slab); return -ENOMEM; } return 0; @@ -971,6 +975,6 @@ int __init create_checkpoint_caches(void) void destroy_checkpoint_caches(void) { - kmem_cache_destroy(orphan_entry_slab); + kmem_cache_destroy(ino_entry_slab); kmem_cache_destroy(inode_entry_slab); } diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 3f99266..a441ba3 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -167,7 +167,7 @@ get_cache: si->cache_mem += npages << PAGE_CACHE_SHIFT; npages = META_MAPPING(sbi)->nrpages; si->cache_mem += npages << PAGE_CACHE_SHIFT; - si->cache_mem += sbi->n_orphans * sizeof(struct orphan_inode_entry); + si->cache_mem += sbi->n_orphans * sizeof(struct ino_entry); si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e999eec..b6fa6ec 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -100,8 +100,13 @@ enum { META_SSA }; -/* for the list of orphan inodes */ -struct orphan_inode_entry { +/* for the list of ino */ +enum { + ORPHAN_INO, /* for orphan ino list */ + MAX_INO_ENTRY, /* max. list */ +}; + +struct ino_entry { struct list_head list; /* list head */ nid_t ino; /* inode number */ }; @@ -450,9 +455,11 @@ struct f2fs_sb_info { bool por_doing; /* recovery is doing or not */ wait_queue_head_t cp_wait; - /* for orphan inode management */ - struct list_head orphan_inode_list; /* orphan inode list */ - spinlock_t orphan_inode_lock; /* for orphan inode list */ + /* for inode management */ + spinlock_t ino_lock[MAX_INO_ENTRY]; /* for ino entry lock */ + struct list_head ino_list[MAX_INO_ENTRY]; /* inode list head */ + + /* for orphan inode, use 0'th array */ unsigned int n_orphans; /* # of orphan inodes */ unsigned int max_orphans; /* max orphan inodes */ @@ -1255,7 +1262,7 @@ void add_dirty_dir_inode(struct inode *); void remove_dirty_dir_inode(struct inode *); void sync_dirty_dir_inodes(struct f2fs_sb_info *); void write_checkpoint(struct f2fs_sb_info *, bool); -void init_orphan_info(struct f2fs_sb_info *); +void init_ino_entry_info(struct f2fs_sb_info *); int __init create_checkpoint_caches(void); void destroy_checkpoint_caches(void); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 93593ce..f253e22 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1003,7 +1003,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) INIT_LIST_HEAD(&sbi->dir_inode_list); spin_lock_init(&sbi->dir_inode_lock); - init_orphan_info(sbi); + init_ino_entry_info(sbi); /* setup f2fs internal modules */ err = build_segment_manager(sbi); -- cgit v1.1 From 39efac41fbe44343cac29472320a1d502fcff66b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 24 Jul 2014 18:15:17 -0700 Subject: f2fs: use radix_tree for ino management For better ino management, this patch replaces the data structure from list to radix tree. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 49 +++++++++++++++++++++++++++---------------------- fs/f2fs/f2fs.h | 1 + 2 files changed, 28 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index f93d154..4bf2037 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -284,24 +284,27 @@ const struct address_space_operations f2fs_meta_aops = { static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) { - struct ino_entry *new, *e; - - new = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC); - new->ino = ino; - + struct ino_entry *e; +retry: spin_lock(&sbi->ino_lock[type]); - list_for_each_entry(e, &sbi->ino_list[type], list) { - if (e->ino == ino) { + + e = radix_tree_lookup(&sbi->ino_root[type], ino); + if (!e) { + e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC); + if (!e) { spin_unlock(&sbi->ino_lock[type]); - kmem_cache_free(ino_entry_slab, new); - return; + goto retry; } - if (e->ino > ino) - break; - } + if (radix_tree_insert(&sbi->ino_root[type], ino, e)) { + spin_unlock(&sbi->ino_lock[type]); + kmem_cache_free(ino_entry_slab, e); + goto retry; + } + memset(e, 0, sizeof(struct ino_entry)); + e->ino = ino; - /* add new entry into list which is sorted by inode number */ - list_add_tail(&new->list, &e->list); + list_add_tail(&e->list, &sbi->ino_list[type]); + } spin_unlock(&sbi->ino_lock[type]); } @@ -310,14 +313,15 @@ static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) struct ino_entry *e; spin_lock(&sbi->ino_lock[type]); - list_for_each_entry(e, &sbi->ino_list[type], list) { - if (e->ino == ino) { - list_del(&e->list); + e = radix_tree_lookup(&sbi->ino_root[type], ino); + if (e) { + list_del(&e->list); + radix_tree_delete(&sbi->ino_root[type], ino); + if (type == ORPHAN_INO) sbi->n_orphans--; - spin_unlock(&sbi->ino_lock[type]); - kmem_cache_free(ino_entry_slab, e); - return; - } + spin_unlock(&sbi->ino_lock[type]); + kmem_cache_free(ino_entry_slab, e); + return; } spin_unlock(&sbi->ino_lock[type]); } @@ -346,7 +350,7 @@ void release_orphan_inode(struct f2fs_sb_info *sbi) void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) { - /* add new orphan entry into list which is sorted by inode number */ + /* add new orphan ino entry into list */ __add_ino_entry(sbi, ino, ORPHAN_INO); } @@ -943,6 +947,7 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi) int i; for (i = 0; i < MAX_INO_ENTRY; i++) { + INIT_RADIX_TREE(&sbi->ino_root[i], GFP_ATOMIC); spin_lock_init(&sbi->ino_lock[i]); INIT_LIST_HEAD(&sbi->ino_list[i]); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b6fa6ec..4454caa 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -456,6 +456,7 @@ struct f2fs_sb_info { wait_queue_head_t cp_wait; /* for inode management */ + struct radix_tree_root ino_root[MAX_INO_ENTRY]; /* ino entry array */ spinlock_t ino_lock[MAX_INO_ENTRY]; /* for ino entry lock */ struct list_head ino_list[MAX_INO_ENTRY]; /* inode list head */ -- cgit v1.1 From fff04f90c1b9f91b9c513a89702a4b9ffe5dc1c5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 25 Jul 2014 07:40:59 -0700 Subject: f2fs: add info of appended or updated data writes This patch introduces a inode number list in which represents inodes having appended data writes or updated data writes after last checkpoint. This will be used at fsync to determine whether the recovery information should be written or not. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 39 +++++++++++++++++++++++++++++++++++++++ fs/f2fs/data.c | 2 ++ fs/f2fs/f2fs.h | 7 +++++++ fs/f2fs/inline.c | 1 + fs/f2fs/inode.c | 4 ++++ 5 files changed, 53 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 4bf2037..430163d 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -326,6 +326,44 @@ static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) spin_unlock(&sbi->ino_lock[type]); } +void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type) +{ + /* add new dirty ino entry into list */ + __add_ino_entry(sbi, ino, type); +} + +void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type) +{ + /* remove dirty ino entry from list */ + __remove_ino_entry(sbi, ino, type); +} + +/* mode should be APPEND_INO or UPDATE_INO */ +bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode) +{ + struct ino_entry *e; + spin_lock(&sbi->ino_lock[mode]); + e = radix_tree_lookup(&sbi->ino_root[mode], ino); + spin_unlock(&sbi->ino_lock[mode]); + return e ? true : false; +} + +static void release_dirty_inode(struct f2fs_sb_info *sbi) +{ + struct ino_entry *e, *tmp; + int i; + + for (i = APPEND_INO; i <= UPDATE_INO; i++) { + spin_lock(&sbi->ino_lock[i]); + list_for_each_entry_safe(e, tmp, &sbi->ino_list[i], list) { + list_del(&e->list); + radix_tree_delete(&sbi->ino_root[i], e->ino); + kmem_cache_free(ino_entry_slab, e); + } + spin_unlock(&sbi->ino_lock[i]); + } +} + int acquire_orphan_inode(struct f2fs_sb_info *sbi) { int err = 0; @@ -897,6 +935,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) if (unlikely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) { clear_prefree_segments(sbi); + release_dirty_inode(sbi); F2FS_RESET_SB_DIRT(sbi); } } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 482313d..ec3c886 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -789,9 +789,11 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio) !is_cold_data(page) && need_inplace_update(inode))) { rewrite_data_page(page, old_blkaddr, fio); + set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); } else { write_data_page(page, &dn, &new_blkaddr, fio); update_extent_cache(new_blkaddr, &dn); + set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); } out_writepage: f2fs_put_dnode(&dn); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4454caa..ab36025 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -103,6 +103,8 @@ enum { /* for the list of ino */ enum { ORPHAN_INO, /* for orphan ino list */ + APPEND_INO, /* for append ino list */ + UPDATE_INO, /* for update ino list */ MAX_INO_ENTRY, /* max. list */ }; @@ -994,6 +996,8 @@ enum { FI_NO_EXTENT, /* not to use the extent cache */ FI_INLINE_XATTR, /* used for inline xattr */ FI_INLINE_DATA, /* used for inline data*/ + FI_APPEND_WRITE, /* inode has appended data */ + FI_UPDATE_WRITE, /* inode has in-place-update data */ }; static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) @@ -1252,6 +1256,9 @@ struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); int ra_meta_pages(struct f2fs_sb_info *, int, int, int); long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); +void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type); +void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type); +bool exist_written_data(struct f2fs_sb_info *, nid_t, int); int acquire_orphan_inode(struct f2fs_sb_info *); void release_orphan_inode(struct f2fs_sb_info *); void add_orphan_inode(struct f2fs_sb_info *, nid_t); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 1bba522..5beecce 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -172,6 +172,7 @@ int f2fs_write_inline_data(struct inode *inode, stat_inc_inline_inode(inode); } + set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); sync_inode_page(&dn); f2fs_put_dnode(&dn); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index cafba3c..0e69aa9 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -296,6 +296,10 @@ void f2fs_evict_inode(struct inode *inode) sb_end_intwrite(inode->i_sb); no_delete: invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); + if (is_inode_flag_set(F2FS_I(inode), FI_APPEND_WRITE)) + add_dirty_inode(sbi, inode->i_ino, APPEND_INO); + if (is_inode_flag_set(F2FS_I(inode), FI_UPDATE_WRITE)) + add_dirty_inode(sbi, inode->i_ino, UPDATE_INO); out_clear: clear_inode(inode); } -- cgit v1.1 From 6d99ba41a72c1213f2029e6fc66aa7943339b5db Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 24 Jul 2014 19:08:02 -0700 Subject: f2fs: skip unnecessary data writes during fsync This patch intends to improve the fsync performance by skipping remaining the recovery information, only when there is no data that we should recover. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 7c652b3..9b888fb 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -133,6 +133,17 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) return ret; } + /* + * if there is no written data, don't waste time to write recovery info. + */ + if (!is_inode_flag_set(fi, FI_APPEND_WRITE) && + !exist_written_data(sbi, inode->i_ino, APPEND_INO)) { + if (is_inode_flag_set(fi, FI_UPDATE_WRITE) || + exist_written_data(sbi, inode->i_ino, UPDATE_INO)) + goto flush_out; + goto out; + } + /* guarantee free sections for fsync */ f2fs_balance_fs(sbi); @@ -188,6 +199,13 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) ret = wait_on_node_pages_writeback(sbi, inode->i_ino); if (ret) goto out; + + /* once recovery info is written, don't need to tack this */ + remove_dirty_inode(sbi, inode->i_ino, APPEND_INO); + clear_inode_flag(fi, FI_APPEND_WRITE); +flush_out: + remove_dirty_inode(sbi, inode->i_ino, UPDATE_INO); + clear_inode_flag(fi, FI_UPDATE_WRITE); ret = f2fs_issue_flush(F2FS_SB(inode->i_sb)); } out: -- cgit v1.1 From ea1aa12ca237149227ef68af50c9a1acf027b625 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 24 Jul 2014 19:11:43 -0700 Subject: f2fs: enable in-place-update for fdatasync This patch enforces in-place-updates only when fdatasync is requested. If we adopt this in-place-updates for the fdatasync, we can skip to write the recovery information. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 7 +++++++ fs/f2fs/segment.h | 4 ++++ 3 files changed, 12 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ab36025..8f8685e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -998,6 +998,7 @@ enum { FI_INLINE_DATA, /* used for inline data*/ FI_APPEND_WRITE, /* inode has appended data */ FI_UPDATE_WRITE, /* inode has in-place-update data */ + FI_NEED_IPU, /* used fo ipu for fdatasync */ }; static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 9b888fb..9550135 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -127,7 +127,14 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) return 0; trace_f2fs_sync_file_enter(inode); + + /* if fdatasync is triggered, let's do in-place-update */ + if (datasync) + set_inode_flag(fi, FI_NEED_IPU); + ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (datasync) + clear_inode_flag(fi, FI_NEED_IPU); if (ret) { trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); return ret; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index ee5c75e..55973f7 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -486,6 +486,10 @@ static inline bool need_inplace_update(struct inode *inode) if (S_ISDIR(inode->i_mode)) return false; + /* this is only set during fdatasync */ + if (is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU)) + return true; + switch (SM_I(sbi)->ipu_policy) { case F2FS_IPU_FORCE: return true; -- cgit v1.1 From 01229f5e1b21b378863c91f8c653bbd8e593858c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 25 Jul 2014 07:41:43 -0700 Subject: f2fs: fix wrong condition for unlikely This patch fixes the wrongly used unlikely condition. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 430163d..26b94bb 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -933,7 +933,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) /* Here, we only have one bio having CP pack */ sync_meta_pages(sbi, META_FLUSH, LONG_MAX); - if (unlikely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) { + if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) { clear_prefree_segments(sbi); release_dirty_inode(sbi); F2FS_RESET_SB_DIRT(sbi); -- cgit v1.1 From 61e0f2d0a5f2cddf7cd96fa8cb7fe53a1e5e325d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 25 Jul 2014 15:47:23 -0700 Subject: f2fs: test before set/clear bits If the bit is already set, we don't need to reset it, and vice versa. Because we don't need to make the caches dirty for that. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8f8685e..475f97c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1003,7 +1003,8 @@ enum { static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) { - set_bit(flag, &fi->flags); + if (!test_bit(flag, &fi->flags)) + set_bit(flag, &fi->flags); } static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag) @@ -1013,7 +1014,8 @@ static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag) static inline void clear_inode_flag(struct f2fs_inode_info *fi, int flag) { - clear_bit(flag, &fi->flags); + if (test_bit(flag, &fi->flags)) + clear_bit(flag, &fi->flags); } static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode) -- cgit v1.1 From cf2271e781cb16e1ca22be920010c2b64d90c338 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 25 Jul 2014 15:47:25 -0700 Subject: f2fs: avoid retrying wrong recovery routine when error was occurred This patch eliminates the propagation of recovery errors to the next mount. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 3 ++- fs/f2fs/f2fs.h | 2 +- fs/f2fs/recovery.c | 20 +++++++++++++++++++- fs/f2fs/segment.c | 5 +---- 4 files changed, 23 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 26b94bb..cea20b8 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -796,6 +796,7 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); nid_t last_nid = 0; block_t start_blk; struct page *cp_page; @@ -809,7 +810,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) * This avoids to conduct wrong roll-forward operations and uses * metapages, so should be called prior to sync_meta_pages below. */ - discard_next_dnode(sbi); + discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg)); /* Flush all the NAT/SIT pages */ while (get_pages(sbi, F2FS_DIRTY_META)) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 475f97c..14b9f74 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1225,7 +1225,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *); void invalidate_blocks(struct f2fs_sb_info *, block_t); void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); void clear_prefree_segments(struct f2fs_sb_info *); -void discard_next_dnode(struct f2fs_sb_info *); +void discard_next_dnode(struct f2fs_sb_info *, block_t); int npages_for_summary_flush(struct f2fs_sb_info *); void allocate_new_segments(struct f2fs_sb_info *); struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index a112368..b2aa53b 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -434,7 +434,9 @@ next: int recover_fsync_data(struct f2fs_sb_info *sbi) { + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); struct list_head inode_list; + block_t blkaddr; int err; bool need_writecp = false; @@ -447,6 +449,9 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) /* step #1: find fsynced inode numbers */ sbi->por_doing = true; + + blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); + err = find_fsync_dnodes(sbi, &inode_list); if (err) goto out; @@ -462,8 +467,21 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) out: destroy_fsync_dnodes(&inode_list); kmem_cache_destroy(fsync_entry_slab); + + if (err) { + truncate_inode_pages_final(NODE_MAPPING(sbi)); + truncate_inode_pages_final(META_MAPPING(sbi)); + } + sbi->por_doing = false; - if (!err && need_writecp) + if (err) { + discard_next_dnode(sbi, blkaddr); + + /* Flush all the NAT/SIT pages */ + while (get_pages(sbi, F2FS_DIRTY_META)) + sync_meta_pages(sbi, META, LONG_MAX); + } else if (need_writecp) { write_checkpoint(sbi, false); + } return err; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9fce0f47..e016b97 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -379,11 +379,8 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi, return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); } -void discard_next_dnode(struct f2fs_sb_info *sbi) +void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) { - struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); - block_t blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); - if (f2fs_issue_discard(sbi, blkaddr, 1)) { struct page *page = grab_meta_page(sbi, blkaddr); /* zero-filled page */ -- cgit v1.1 From 24a9ee0fa3d40415765d2d9f6064930d72ad8b5a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 25 Jul 2014 17:46:10 -0700 Subject: f2fs: add tracepoint for f2fs_issue_flush This patch adds a tracepoint for f2fs_issue_flush. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e016b97..5a53a0a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -239,6 +239,9 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; struct flush_cmd cmd; + trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER), + test_opt(sbi, FLUSH_MERGE)); + if (test_opt(sbi, NOBARRIER)) return 0; -- cgit v1.1 From 33be828ada7274ebcade2001f16e5b4e33a4636e Mon Sep 17 00:00:00 2001 From: Dongho Sim Date: Wed, 30 Jul 2014 06:52:41 +0000 Subject: f2fs: remove redundant lines in allocate_data_block There are redundant lines in allocate_data_block. In this function, we call refresh_sit_entry with old seg and old curseg. After that, we call locate_dirty_segment with old curseg. But, the new address is always allocated from old curseg and we call locate_dirty_segment with old curseg in refresh_sit_entry. So, we do not need to call locate_dirty_segment with old curseg again. We've discussed like below: Jaegeuk said: "When considering SSR, we need to take care of the following scenario. - old segno : X - new address : Z - old curseg : Y This means, a new block is supposed to be written to Z from X. And Z is newly allocated in the same path from Y. In that case, we should trigger locate_dirty_segment for Y, since it was a current_segment and can be dirty owing to SSR. But that was not included in the dirty list." Changman said: "We already choosed old curseg(Y) and then we allocate new address(Z) from old curseg(Y). After that we call refresh_sit_entry(old address, new address). In the funcation, we call locate_dirty_segment with old seg and old curseg. So calling locate_dirty_segment after refresh_sit_entry again is redundant." Jaegeuk said: "Right. The new address is always allocated from old_curseg." Reviewed-by: Chao Yu Signed-off-by: Dongho Sim Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 5a53a0a..c3b76d0 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -976,14 +976,12 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg; - unsigned int old_cursegno; curseg = CURSEG_I(sbi, type); mutex_lock(&curseg->curseg_mutex); *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); - old_cursegno = curseg->segno; /* * __add_sum_entry should be resided under the curseg_mutex @@ -1004,7 +1002,6 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, * since SSR needs latest valid block information. */ refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); - locate_dirty_segment(sbi, old_cursegno); mutex_unlock(&sit_i->sentry_lock); -- cgit v1.1 From 65b85ccce03f17b3098273192b20885f3fead820 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 30 Jul 2014 17:25:54 -0700 Subject: f2fs: fix coding style This patch fixes wrong coding style. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 14b9f74..80c7869 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -781,7 +781,7 @@ static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) if (flag == NAT_BITMAP) return &ckpt->sit_nat_version_bitmap; else - return ((unsigned char *)ckpt + F2FS_BLKSIZE); + return (unsigned char *)ckpt + F2FS_BLKSIZE; } else { offset = (flag == NAT_BITMAP) ? le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 9550135..2a91a7d 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -231,8 +231,9 @@ static pgoff_t __get_first_dirty_index(struct address_space *mapping, /* find first dirty page index */ pagevec_init(&pvec, 0); - nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs, PAGECACHE_TAG_DIRTY, 1); - pgofs = nr_pages ? pvec.pages[0]->index: LONG_MAX; + nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs, + PAGECACHE_TAG_DIRTY, 1); + pgofs = nr_pages ? pvec.pages[0]->index : LONG_MAX; pagevec_release(&pvec); return pgofs; } -- cgit v1.1 From b3582c68920105e29d219714d8a6fbde25a43379 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 3 Jul 2014 18:58:39 +0800 Subject: f2fs: reduce competition among node page writes We do not need to block on ->node_write among different node page writers e.g. fsync/flush, unless we have a node page writer from write_checkpoint. So it's better use rw_semaphore instead of mutex type for ->node_write to promote performance. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 6 +++--- fs/f2fs/f2fs.h | 2 +- fs/f2fs/node.c | 4 ++-- fs/f2fs/super.c | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index cea20b8..6aeed5b 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -762,10 +762,10 @@ retry_flush_dents: * until finishing nat/sit flush. */ retry_flush_nodes: - mutex_lock(&sbi->node_write); + down_write(&sbi->node_write); if (get_pages(sbi, F2FS_DIRTY_NODES)) { - mutex_unlock(&sbi->node_write); + up_write(&sbi->node_write); sync_node_pages(sbi, 0, &wbc); goto retry_flush_nodes; } @@ -774,7 +774,7 @@ retry_flush_nodes: static void unblock_operations(struct f2fs_sb_info *sbi) { - mutex_unlock(&sbi->node_write); + up_write(&sbi->node_write); f2fs_unlock_all(sbi); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 80c7869..9942567 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -452,7 +452,7 @@ struct f2fs_sb_info { struct inode *meta_inode; /* cache meta blocks */ struct mutex cp_mutex; /* checkpoint procedure lock */ struct rw_semaphore cp_rwsem; /* blocking FS operations */ - struct mutex node_write; /* locking node writes */ + struct rw_semaphore node_write; /* locking node writes */ struct mutex writepages; /* mutex for writepages() */ bool por_doing; /* recovery is doing or not */ wait_queue_head_t cp_wait; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index a90f51d..7b5b5de 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1231,12 +1231,12 @@ static int f2fs_write_node_page(struct page *page, if (wbc->for_reclaim) goto redirty_out; - mutex_lock(&sbi->node_write); + down_read(&sbi->node_write); set_page_writeback(page); write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr); set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page)); dec_page_count(sbi, F2FS_DIRTY_NODES); - mutex_unlock(&sbi->node_write); + up_read(&sbi->node_write); unlock_page(page); return 0; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f253e22..657582f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -953,7 +953,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) mutex_init(&sbi->gc_mutex); mutex_init(&sbi->writepages); mutex_init(&sbi->cp_mutex); - mutex_init(&sbi->node_write); + init_rwsem(&sbi->node_write); sbi->por_doing = false; spin_lock_init(&sbi->stat_lock); -- cgit v1.1 From 70407fad85f2ec87a0cf56057c3267cd3aa22768 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 31 Jul 2014 21:11:22 +0800 Subject: f2fs: add tracepoint for f2fs_direct_IO This patch adds a tracepoint for f2fs_direct_IO. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ec3c886..03313099 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1086,9 +1086,14 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, /* clear fsync mark to recover these blocks */ fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino); + trace_f2fs_direct_IO_enter(inode, offset, count, rw); + err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block); if (err < 0 && (rw & WRITE)) f2fs_write_failed(mapping, offset + count); + + trace_f2fs_direct_IO_exit(inode, offset, count, rw, err); + return err; } -- cgit v1.1 From 70cfed88efa760fd165fc413cfd1801b5cc8acd2 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 2 Aug 2014 15:26:04 +0800 Subject: f2fs: avoid skipping recover_inline_xattr after recover_inline_data When we recover data of inode in roll-forward procedure, and the inode has both inline data and inline xattr. We may skip recovering inline xattr if we recover inline data form node page first. This patch will fix the problem that we lost inline xattr data in above scenario. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/node.c | 4 +--- fs/f2fs/recovery.c | 2 ++ 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9942567..4dab533 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1204,6 +1204,7 @@ void alloc_nid_done(struct f2fs_sb_info *, nid_t); void alloc_nid_failed(struct f2fs_sb_info *, nid_t); void recover_node_page(struct f2fs_sb_info *, struct page *, struct f2fs_summary *, struct node_info *, block_t); +void recover_inline_xattr(struct inode *, struct page *); bool recover_xattr_data(struct inode *, struct page *, block_t); int recover_inode_page(struct f2fs_sb_info *, struct page *); int restore_node_summary(struct f2fs_sb_info *, unsigned int, diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 7b5b5de..d3d90d2 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1549,7 +1549,7 @@ void recover_node_page(struct f2fs_sb_info *sbi, struct page *page, clear_node_page_dirty(page); } -static void recover_inline_xattr(struct inode *inode, struct page *page) +void recover_inline_xattr(struct inode *inode, struct page *page) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); void *src_addr, *dst_addr; @@ -1588,8 +1588,6 @@ bool recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) nid_t new_xnid = nid_of_node(page); struct node_info ni; - recover_inline_xattr(inode, page); - if (!f2fs_has_xattr_block(ofs_of_node(page))) return false; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index b2aa53b..fe1c6d9 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -300,6 +300,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, struct node_info ni; int err = 0, recovered = 0; + recover_inline_xattr(inode, page); + if (recover_inline_data(inode, page)) goto out; -- cgit v1.1 From 002a41cabb5829d59c0337dcb5fa3893e0bb15fd Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 4 Aug 2014 09:54:58 +0800 Subject: f2fs: invalidate xattr node page when evict inode When inode is evicted, all the page cache belong to this inode should be released including the xattr node page. But previously we didn't do this, this patch fixed this issue. v2: o reposition invalidate_mapping_pages() to the right place suggested by Jaegeuk Kim. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 0e69aa9..2c39999 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -267,6 +267,7 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) void f2fs_evict_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + nid_t xnid = F2FS_I(inode)->i_xattr_nid; trace_f2fs_evict_inode(inode); truncate_inode_pages_final(&inode->i_data); @@ -296,6 +297,8 @@ void f2fs_evict_inode(struct inode *inode) sb_end_intwrite(inode->i_sb); no_delete: invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); + if (xnid) + invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); if (is_inode_flag_set(F2FS_I(inode), FI_APPEND_WRITE)) add_dirty_inode(sbi, inode->i_ino, APPEND_INO); if (is_inode_flag_set(F2FS_I(inode), FI_UPDATE_WRITE)) -- cgit v1.1 From 497a0930bb4bd148451e1af239a082e3916b681b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 4 Aug 2014 10:11:17 +0800 Subject: f2fs: add f2fs_balance_fs for expand_inode_data This patch adds f2fs_balance_fs in expand_inode_data to avoid allocation failure with segment. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 2a91a7d..208f1a9 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -672,6 +672,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset, loff_t off_start, off_end; int ret = 0; + f2fs_balance_fs(sbi); + ret = inode_newsize_ok(inode, (len + offset)); if (ret) return ret; -- cgit v1.1 From b65ee14818e67127aa242fe1dbd3711b9c095cc0 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 4 Aug 2014 10:10:07 +0800 Subject: f2fs: use for_each_set_bit to simplify the code This patch uses for_each_set_bit to simplify some codes in f2fs. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 7 ++----- fs/f2fs/segment.c | 13 ++++--------- 2 files changed, 6 insertions(+), 14 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index b90dbe5..d7947d9 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -186,7 +186,6 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi, static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - unsigned int hint = 0; unsigned int secno; /* @@ -194,11 +193,9 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) * selected by background GC before. * Those segments guarantee they have small valid blocks. */ -next: - secno = find_next_bit(dirty_i->victim_secmap, TOTAL_SECS(sbi), hint++); - if (secno < TOTAL_SECS(sbi)) { + for_each_set_bit(secno, dirty_i->victim_secmap, TOTAL_SECS(sbi)) { if (sec_usage_check(sbi, secno)) - goto next; + continue; clear_bit(secno, dirty_i->victim_secmap); return secno * sbi->segs_per_sec; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c3b76d0..0dfeeba 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -439,17 +439,12 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - unsigned int segno = -1; + unsigned int segno; unsigned int total_segs = TOTAL_SEGS(sbi); mutex_lock(&dirty_i->seglist_lock); - while (1) { - segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, - segno + 1); - if (segno >= total_segs) - break; + for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], total_segs) __set_test_and_free(sbi, segno); - } mutex_unlock(&dirty_i->seglist_lock); } @@ -1531,7 +1526,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi) struct page *page = NULL; struct f2fs_sit_block *raw_sit = NULL; unsigned int start = 0, end = 0; - unsigned int segno = -1; + unsigned int segno; bool flushed; mutex_lock(&curseg->curseg_mutex); @@ -1543,7 +1538,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi) */ flushed = flush_sits_in_journal(sbi); - while ((segno = find_next_bit(bitmap, nsegs, segno + 1)) < nsegs) { + for_each_set_bit(segno, bitmap, nsegs) { struct seg_entry *se = get_seg_entry(sbi, segno); int sit_offset, offset; -- cgit v1.1