diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-02 09:42:38 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-02 09:42:38 -0700 |
commit | 3f490f7f99053288bd85563f8d9b5032b810e177 (patch) | |
tree | a921f0fba44c8aac602608ff7349c25e4129b132 | |
parent | c4eb1b07303ad9e00aba842aa90d5521293ac857 (diff) | |
parent | a1dd3c13ce65b726fddfe72b9d2f1009db983ce6 (diff) | |
download | op-kernel-dev-3f490f7f99053288bd85563f8d9b5032b810e177.zip op-kernel-dev-3f490f7f99053288bd85563f8d9b5032b810e177.tar.gz |
Merge tag 'for-f2fs-3.11' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim:
"This patch-set includes the following major enhancement patches:
- remount_fs callback function
- restore parent inode number to enhance the fsync performance
- xattr security labels
- reduce the number of redundant lock/unlock data pages
- avoid frequent write_inode calls
The other minor bug fixes are as follows.
- endian conversion bugs
- various bugs in the roll-forward recovery routine"
* tag 'for-f2fs-3.11' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (56 commits)
f2fs: fix to recover i_size from roll-forward
f2fs: remove the unused argument "sbi" of func destroy_fsync_dnodes()
f2fs: remove reusing any prefree segments
f2fs: code cleanup and simplify in func {find/add}_gc_inode
f2fs: optimize the init_dirty_segmap function
f2fs: fix an endian conversion bug detected by sparse
f2fs: fix crc endian conversion
f2fs: add remount_fs callback support
f2fs: recover wrong pino after checkpoint during fsync
f2fs: optimize do_write_data_page()
f2fs: make locate_dirty_segment() as static
f2fs: remove unnecessary parameter "offset" from __add_sum_entry()
f2fs: avoid freqeunt write_inode calls
f2fs: optimise the truncate_data_blocks_range() range
f2fs: use the F2FS specific flags in f2fs_ioctl()
f2fs: sync dir->i_size with its block allocation
f2fs: fix i_blocks translation on various types of files
f2fs: set sb->s_fs_info before calling parse_options()
f2fs: support xattr security labels
f2fs: fix iget/iput of dir during recovery
...
-rw-r--r-- | Documentation/filesystems/f2fs.txt | 9 | ||||
-rw-r--r-- | fs/f2fs/Kconfig | 12 | ||||
-rw-r--r-- | fs/f2fs/acl.c | 2 | ||||
-rw-r--r-- | fs/f2fs/checkpoint.c | 99 | ||||
-rw-r--r-- | fs/f2fs/data.c | 68 | ||||
-rw-r--r-- | fs/f2fs/debug.c | 4 | ||||
-rw-r--r-- | fs/f2fs/dir.c | 109 | ||||
-rw-r--r-- | fs/f2fs/f2fs.h | 66 | ||||
-rw-r--r-- | fs/f2fs/file.c | 58 | ||||
-rw-r--r-- | fs/f2fs/gc.c | 42 | ||||
-rw-r--r-- | fs/f2fs/inode.c | 13 | ||||
-rw-r--r-- | fs/f2fs/namei.c | 17 | ||||
-rw-r--r-- | fs/f2fs/node.c | 34 | ||||
-rw-r--r-- | fs/f2fs/node.h | 68 | ||||
-rw-r--r-- | fs/f2fs/recovery.c | 150 | ||||
-rw-r--r-- | fs/f2fs/segment.c | 101 | ||||
-rw-r--r-- | fs/f2fs/super.c | 253 | ||||
-rw-r--r-- | fs/f2fs/xattr.c | 68 | ||||
-rw-r--r-- | fs/f2fs/xattr.h | 24 | ||||
-rw-r--r-- | include/linux/f2fs_fs.h | 4 |
20 files changed, 750 insertions, 451 deletions
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index bd3c56c..b91e2f2 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -98,8 +98,13 @@ Cleaning Overhead MOUNT OPTIONS ================================================================================ -background_gc_off Turn off cleaning operations, namely garbage collection, - triggered in background when I/O subsystem is idle. +background_gc=%s Turn on/off cleaning operations, namely garbage + collection, triggered in background when I/O subsystem is + idle. If background_gc=on, it will turn on the garbage + collection and if background_gc=off, garbage collection + will be truned off. + Default value for this option is on. So garbage + collection is on by default. disable_roll_forward Disable the roll-forward recovery routine discard Issue discard/TRIM commands when a segment is cleaned. no_heap Disable heap-style segment allocation which finds free diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index fd27e7e..e06e099 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -51,3 +51,15 @@ config F2FS_FS_POSIX_ACL Linux website <http://acl.bestbits.at/>. If you don't know what Access Control Lists are, say N + +config F2FS_FS_SECURITY + bool "F2FS Security Labels" + depends on F2FS_FS_XATTR + help + Security labels provide an access control facility to support Linux + Security Models (LSMs) accepted by AppArmor, SELinux, Smack and TOMOYO + Linux. This option enables an extended attribute handler for file + security labels in the f2fs filesystem, so that it requires enabling + the extended attribute support in advance. + + If you are not using a security module, say N. diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 44abc2f..b7826ec 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -250,7 +250,7 @@ static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl) } } - error = f2fs_setxattr(inode, name_index, "", value, size); + error = f2fs_setxattr(inode, name_index, "", value, size, NULL); kfree(value); if (!error) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index b1de01d..66a6b85 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -357,8 +357,8 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, unsigned long blk_size = sbi->blocksize; struct f2fs_checkpoint *cp_block; unsigned long long cur_version = 0, pre_version = 0; - unsigned int crc = 0; size_t crc_offset; + __u32 crc = 0; /* Read the 1st cp block in this CP pack */ cp_page_1 = get_meta_page(sbi, cp_addr); @@ -369,7 +369,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, if (crc_offset >= blk_size) goto invalid_cp1; - crc = *(unsigned int *)((unsigned char *)cp_block + crc_offset); + crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); if (!f2fs_crc_valid(crc, cp_block, crc_offset)) goto invalid_cp1; @@ -384,7 +384,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, if (crc_offset >= blk_size) goto invalid_cp2; - crc = *(unsigned int *)((unsigned char *)cp_block + crc_offset); + crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); if (!f2fs_crc_valid(crc, cp_block, crc_offset)) goto invalid_cp2; @@ -450,13 +450,30 @@ fail_no_cp: return -EINVAL; } -void set_dirty_dir_page(struct inode *inode, struct page *page) +static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct list_head *head = &sbi->dir_inode_list; - struct dir_inode_entry *new; struct list_head *this; + list_for_each(this, head) { + struct dir_inode_entry *entry; + entry = list_entry(this, struct dir_inode_entry, list); + if (entry->inode == inode) + return -EEXIST; + } + list_add_tail(&new->list, head); +#ifdef CONFIG_F2FS_STAT_FS + sbi->n_dirty_dirs++; +#endif + return 0; +} + +void set_dirty_dir_page(struct inode *inode, struct page *page) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct dir_inode_entry *new; + if (!S_ISDIR(inode->i_mode)) return; retry: @@ -469,23 +486,31 @@ retry: INIT_LIST_HEAD(&new->list); spin_lock(&sbi->dir_inode_lock); - list_for_each(this, head) { - struct dir_inode_entry *entry; - entry = list_entry(this, struct dir_inode_entry, list); - if (entry->inode == inode) { - kmem_cache_free(inode_entry_slab, new); - goto out; - } - } - list_add_tail(&new->list, head); - sbi->n_dirty_dirs++; + if (__add_dirty_inode(inode, new)) + kmem_cache_free(inode_entry_slab, new); - BUG_ON(!S_ISDIR(inode->i_mode)); -out: inc_page_count(sbi, F2FS_DIRTY_DENTS); inode_inc_dirty_dents(inode); SetPagePrivate(page); + spin_unlock(&sbi->dir_inode_lock); +} +void add_dirty_dir_inode(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct dir_inode_entry *new; +retry: + new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS); + if (!new) { + cond_resched(); + goto retry; + } + new->inode = inode; + INIT_LIST_HEAD(&new->list); + + spin_lock(&sbi->dir_inode_lock); + if (__add_dirty_inode(inode, new)) + kmem_cache_free(inode_entry_slab, new); spin_unlock(&sbi->dir_inode_lock); } @@ -499,8 +524,10 @@ void remove_dirty_dir_inode(struct inode *inode) return; spin_lock(&sbi->dir_inode_lock); - if (atomic_read(&F2FS_I(inode)->dirty_dents)) - goto out; + if (atomic_read(&F2FS_I(inode)->dirty_dents)) { + spin_unlock(&sbi->dir_inode_lock); + return; + } list_for_each(this, head) { struct dir_inode_entry *entry; @@ -508,12 +535,38 @@ void remove_dirty_dir_inode(struct inode *inode) if (entry->inode == inode) { list_del(&entry->list); kmem_cache_free(inode_entry_slab, entry); +#ifdef CONFIG_F2FS_STAT_FS sbi->n_dirty_dirs--; +#endif + break; + } + } + spin_unlock(&sbi->dir_inode_lock); + + /* Only from the recovery routine */ + if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) { + clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT); + iput(inode); + } +} + +struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino) +{ + struct list_head *head = &sbi->dir_inode_list; + struct list_head *this; + struct inode *inode = NULL; + + spin_lock(&sbi->dir_inode_lock); + list_for_each(this, head) { + struct dir_inode_entry *entry; + entry = list_entry(this, struct dir_inode_entry, list); + if (entry->inode->i_ino == ino) { + inode = entry->inode; break; } } -out: spin_unlock(&sbi->dir_inode_lock); + return inode; } void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) @@ -595,7 +648,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) block_t start_blk; struct page *cp_page; unsigned int data_sum_blocks, orphan_blocks; - unsigned int crc32 = 0; + __u32 crc32 = 0; void *kaddr; int i; @@ -664,8 +717,8 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); crc32 = f2fs_crc32(ckpt, le32_to_cpu(ckpt->checksum_offset)); - *(__le32 *)((unsigned char *)ckpt + - le32_to_cpu(ckpt->checksum_offset)) + *((__le32 *)((unsigned char *)ckpt + + le32_to_cpu(ckpt->checksum_offset))) = cpu_to_le32(crc32); start_blk = __start_cp_addr(sbi); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ce11d9a..035f9a3 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -68,7 +68,9 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, struct buffer_head *bh_result) { struct f2fs_inode_info *fi = F2FS_I(inode); +#ifdef CONFIG_F2FS_STAT_FS struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); +#endif pgoff_t start_fofs, end_fofs; block_t start_blkaddr; @@ -78,7 +80,9 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, return 0; } +#ifdef CONFIG_F2FS_STAT_FS sbi->total_hit_ext++; +#endif start_fofs = fi->ext.fofs; end_fofs = fi->ext.fofs + fi->ext.len - 1; start_blkaddr = fi->ext.blk_addr; @@ -96,7 +100,9 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, else bh_result->b_size = UINT_MAX; +#ifdef CONFIG_F2FS_STAT_FS sbi->read_hit_ext++; +#endif read_unlock(&fi->ext.ext_lock); return 1; } @@ -199,7 +205,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) if (dn.data_blkaddr == NEW_ADDR) return ERR_PTR(-EINVAL); - page = grab_cache_page(mapping, index); + page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); if (!page) return ERR_PTR(-ENOMEM); @@ -233,18 +239,23 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) struct page *page; int err; +repeat: + page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); + if (!page) + return ERR_PTR(-ENOMEM); + set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, index, LOOKUP_NODE); - if (err) + if (err) { + f2fs_put_page(page, 1); return ERR_PTR(err); + } f2fs_put_dnode(&dn); - if (dn.data_blkaddr == NULL_ADDR) + if (dn.data_blkaddr == NULL_ADDR) { + f2fs_put_page(page, 1); return ERR_PTR(-ENOENT); -repeat: - page = grab_cache_page(mapping, index); - if (!page) - return ERR_PTR(-ENOMEM); + } if (PageUptodate(page)) return page; @@ -274,9 +285,10 @@ repeat: * * Also, caller should grab and release a mutex by calling mutex_lock_op() and * mutex_unlock_op(). + * Note that, npage is set only by make_empty_dir. */ -struct page *get_new_data_page(struct inode *inode, pgoff_t index, - bool new_i_size) +struct page *get_new_data_page(struct inode *inode, + struct page *npage, pgoff_t index, bool new_i_size) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct address_space *mapping = inode->i_mapping; @@ -284,18 +296,20 @@ struct page *get_new_data_page(struct inode *inode, pgoff_t index, struct dnode_of_data dn; int err; - set_new_dnode(&dn, inode, NULL, NULL, 0); + set_new_dnode(&dn, inode, npage, npage, 0); err = get_dnode_of_data(&dn, index, ALLOC_NODE); if (err) return ERR_PTR(err); if (dn.data_blkaddr == NULL_ADDR) { if (reserve_new_block(&dn)) { - f2fs_put_dnode(&dn); + if (!npage) + f2fs_put_dnode(&dn); return ERR_PTR(-ENOSPC); } } - f2fs_put_dnode(&dn); + if (!npage) + f2fs_put_dnode(&dn); repeat: page = grab_cache_page(mapping, index); if (!page) @@ -325,6 +339,8 @@ repeat: if (new_i_size && i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) { i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT)); + /* Only the directory inode sets new_i_size */ + set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); mark_inode_dirty_sync(inode); } return page; @@ -481,8 +497,9 @@ int do_write_data_page(struct page *page) * If current allocation needs SSR, * it had better in-place writes for updated data. */ - if (old_blk_addr != NEW_ADDR && !is_cold_data(page) && - need_inplace_update(inode)) { + if (unlikely(old_blk_addr != NEW_ADDR && + !is_cold_data(page) && + need_inplace_update(inode))) { rewrite_data_page(F2FS_SB(inode->i_sb), page, old_blk_addr); } else { @@ -684,6 +701,27 @@ err: return err; } +static int f2fs_write_end(struct file *file, + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + struct inode *inode = page->mapping->host; + + SetPageUptodate(page); + set_page_dirty(page); + + if (pos + copied > i_size_read(inode)) { + i_size_write(inode, pos + copied); + mark_inode_dirty(inode); + update_inode_page(inode); + } + + unlock_page(page); + page_cache_release(page); + return copied; +} + static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) { @@ -741,7 +779,7 @@ const struct address_space_operations f2fs_dblock_aops = { .writepage = f2fs_write_data_page, .writepages = f2fs_write_data_pages, .write_begin = f2fs_write_begin, - .write_end = nobh_write_end, + .write_end = f2fs_write_end, .set_page_dirty = f2fs_set_data_page_dirty, .invalidatepage = f2fs_invalidate_data_page, .releasepage = f2fs_release_data_page, diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 8d99437..0d6c6aa 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -175,12 +175,12 @@ get_cache: static int stat_show(struct seq_file *s, void *v) { - struct f2fs_stat_info *si, *next; + struct f2fs_stat_info *si; int i = 0; int j; mutex_lock(&f2fs_stat_mutex); - list_for_each_entry_safe(si, next, &f2fs_stat_list, stat_list) { + list_for_each_entry(si, &f2fs_stat_list, stat_list) { char devname[BDEVNAME_SIZE]; update_general_status(si->sbi); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 600bb5e..9d1cd42 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -13,6 +13,7 @@ #include "f2fs.h" #include "node.h" #include "acl.h" +#include "xattr.h" static unsigned long dir_blocks(struct inode *inode) { @@ -215,9 +216,9 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p) { - struct page *page = NULL; - struct f2fs_dir_entry *de = NULL; - struct f2fs_dentry_block *dentry_blk = NULL; + struct page *page; + struct f2fs_dir_entry *de; + struct f2fs_dentry_block *dentry_blk; page = get_lock_data_page(dir, 0); if (IS_ERR(page)) @@ -264,15 +265,10 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, f2fs_put_page(page, 1); } -void init_dent_inode(const struct qstr *name, struct page *ipage) +static void init_dent_inode(const struct qstr *name, struct page *ipage) { struct f2fs_node *rn; - if (IS_ERR(ipage)) - return; - - wait_on_page_writeback(ipage); - /* copy name info. to this inode page */ rn = (struct f2fs_node *)page_address(ipage); rn->i.i_namelen = cpu_to_le32(name->len); @@ -280,14 +276,15 @@ void init_dent_inode(const struct qstr *name, struct page *ipage) set_page_dirty(ipage); } -static int make_empty_dir(struct inode *inode, struct inode *parent) +static int make_empty_dir(struct inode *inode, + struct inode *parent, struct page *page) { struct page *dentry_page; struct f2fs_dentry_block *dentry_blk; struct f2fs_dir_entry *de; void *kaddr; - dentry_page = get_new_data_page(inode, 0, true); + dentry_page = get_new_data_page(inode, page, 0, true); if (IS_ERR(dentry_page)) return PTR_ERR(dentry_page); @@ -317,63 +314,76 @@ static int make_empty_dir(struct inode *inode, struct inode *parent) return 0; } -static int init_inode_metadata(struct inode *inode, +static struct page *init_inode_metadata(struct inode *inode, struct inode *dir, const struct qstr *name) { + struct page *page; + int err; + if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { - int err; - err = new_inode_page(inode, name); - if (err) - return err; + page = new_inode_page(inode, name); + if (IS_ERR(page)) + return page; if (S_ISDIR(inode->i_mode)) { - err = make_empty_dir(inode, dir); - if (err) { - remove_inode_page(inode); - return err; - } + err = make_empty_dir(inode, dir, page); + if (err) + goto error; } err = f2fs_init_acl(inode, dir); - if (err) { - remove_inode_page(inode); - return err; - } + if (err) + goto error; + + err = f2fs_init_security(inode, dir, name, page); + if (err) + goto error; + + wait_on_page_writeback(page); } else { - struct page *ipage; - ipage = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); - if (IS_ERR(ipage)) - return PTR_ERR(ipage); - set_cold_node(inode, ipage); - init_dent_inode(name, ipage); - f2fs_put_page(ipage, 1); + page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); + if (IS_ERR(page)) + return page; + + wait_on_page_writeback(page); + set_cold_node(inode, page); } + + init_dent_inode(name, page); + + /* + * This file should be checkpointed during fsync. + * We lost i_pino from now on. + */ if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) { + file_lost_pino(inode); inc_nlink(inode); - update_inode_page(inode); } - return 0; + return page; + +error: + f2fs_put_page(page, 1); + remove_inode_page(inode); + return ERR_PTR(err); } static void update_parent_metadata(struct inode *dir, struct inode *inode, unsigned int current_depth) { - bool need_dir_update = false; - if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { if (S_ISDIR(inode->i_mode)) { inc_nlink(dir); - need_dir_update = true; + set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); } clear_inode_flag(F2FS_I(inode), FI_NEW_INODE); } dir->i_mtime = dir->i_ctime = CURRENT_TIME; if (F2FS_I(dir)->i_current_depth != current_depth) { F2FS_I(dir)->i_current_depth = current_depth; - need_dir_update = true; + set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); } - if (need_dir_update) + if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) update_inode_page(dir); else mark_inode_dirty(dir); @@ -423,6 +433,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *in struct page *dentry_page = NULL; struct f2fs_dentry_block *dentry_blk = NULL; int slots = GET_DENTRY_SLOTS(namelen); + struct page *page; int err = 0; int i; @@ -448,7 +459,7 @@ start: bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket)); for (block = bidx; block <= (bidx + nblock - 1); block++) { - dentry_page = get_new_data_page(dir, block, true); + dentry_page = get_new_data_page(dir, NULL, block, true); if (IS_ERR(dentry_page)) return PTR_ERR(dentry_page); @@ -465,12 +476,13 @@ start: ++level; goto start; add_dentry: - err = init_inode_metadata(inode, dir, name); - if (err) - goto fail; - wait_on_page_writeback(dentry_page); + page = init_inode_metadata(inode, dir, name); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto fail; + } de = &dentry_blk->dentry[bit_pos]; de->hash_code = dentry_hash; de->name_len = cpu_to_le16(namelen); @@ -481,11 +493,14 @@ add_dentry: test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); set_page_dirty(dentry_page); - update_parent_metadata(dir, inode, current_depth); - - /* update parent inode number before releasing dentry page */ + /* we don't need to mark_inode_dirty now */ F2FS_I(inode)->i_pino = dir->i_ino; + update_inode(inode, page); + f2fs_put_page(page, 1); + + update_parent_metadata(dir, inode, current_depth); fail: + clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); kunmap(dentry_page); f2fs_put_page(dentry_page, 1); return err; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 20aab02..467d42d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -37,21 +37,35 @@ typecheck(unsigned long long, b) && \ ((long long)((a) - (b)) > 0)) -typedef u64 block_t; +typedef u32 block_t; /* + * should not change u32, since it is the on-disk block + * address format, __le32. + */ typedef u32 nid_t; struct f2fs_mount_info { unsigned int opt; }; -static inline __u32 f2fs_crc32(void *buff, size_t len) +#define CRCPOLY_LE 0xedb88320 + +static inline __u32 f2fs_crc32(void *buf, size_t len) { - return crc32_le(F2FS_SUPER_MAGIC, buff, len); + unsigned char *p = (unsigned char *)buf; + __u32 crc = F2FS_SUPER_MAGIC; + int i; + + while (len--) { + crc ^= *p++; + for (i = 0; i < 8; i++) + crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0); + } + return crc; } -static inline bool f2fs_crc_valid(__u32 blk_crc, void *buff, size_t buff_size) +static inline bool f2fs_crc_valid(__u32 blk_crc, void *buf, size_t buf_size) { - return f2fs_crc32(buff, buff_size) == blk_crc; + return f2fs_crc32(buf, buf_size) == blk_crc; } /* @@ -148,7 +162,7 @@ struct extent_info { * i_advise uses FADVISE_XXX_BIT. We can add additional hints later. */ #define FADVISE_COLD_BIT 0x01 -#define FADVISE_CP_BIT 0x02 +#define FADVISE_LOST_PINO_BIT 0x02 struct f2fs_inode_info { struct inode vfs_inode; /* serve a vfs inode */ @@ -369,7 +383,6 @@ struct f2fs_sb_info { /* for directory inode management */ struct list_head dir_inode_list; /* dir inode list */ spinlock_t dir_inode_lock; /* for dir inode list lock */ - unsigned int n_dirty_dirs; /* # of dir inodes */ /* basic file system units */ unsigned int log_sectors_per_block; /* log2 sectors per block */ @@ -406,12 +419,15 @@ struct f2fs_sb_info { * for stat information. * one is for the LFS mode, and the other is for the SSR mode. */ +#ifdef CONFIG_F2FS_STAT_FS struct f2fs_stat_info *stat_info; /* FS status information */ unsigned int segment_count[2]; /* # of allocated segments */ unsigned int block_count[2]; /* # of allocated blocks */ - unsigned int last_victim[2]; /* last victim segment # */ int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ int bg_gc; /* background gc calls */ + unsigned int n_dirty_dirs; /* # of dir inodes */ +#endif + unsigned int last_victim[2]; /* last victim segment # */ spinlock_t stat_lock; /* lock for stat operations */ }; @@ -495,9 +511,17 @@ static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) static inline void mutex_lock_all(struct f2fs_sb_info *sbi) { - int i = 0; - for (; i < NR_GLOBAL_LOCKS; i++) - mutex_lock(&sbi->fs_lock[i]); + int i; + + for (i = 0; i < NR_GLOBAL_LOCKS; i++) { + /* + * This is the only time we take multiple fs_lock[] + * instances; the order is immaterial since we + * always hold cp_mutex, which serializes multiple + * such operations. + */ + mutex_lock_nest_lock(&sbi->fs_lock[i], &sbi->cp_mutex); + } } static inline void mutex_unlock_all(struct f2fs_sb_info *sbi) @@ -843,9 +867,12 @@ static inline int f2fs_clear_bit(unsigned int nr, char *addr) /* used for f2fs_inode_info->flags */ enum { FI_NEW_INODE, /* indicate newly allocated inode */ + FI_DIRTY_INODE, /* indicate inode is dirty or not */ FI_INC_LINK, /* need to increment i_nlink */ FI_ACL_MODE, /* indicate acl mode */ FI_NO_ALLOC, /* should not allocate any blocks */ + FI_UPDATE_DIR, /* should update inode block for consistency */ + FI_DELAY_IPUT, /* used for the recovery */ }; static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) @@ -878,14 +905,21 @@ static inline int cond_clear_inode_flag(struct f2fs_inode_info *fi, int flag) return 0; } +static inline int f2fs_readonly(struct super_block *sb) +{ + return sb->s_flags & MS_RDONLY; +} + /* * file.c */ int f2fs_sync_file(struct file *, loff_t, loff_t, int); void truncate_data_blocks(struct dnode_of_data *); void f2fs_truncate(struct inode *); +int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *); int f2fs_setattr(struct dentry *, struct iattr *); int truncate_hole(struct inode *, pgoff_t, pgoff_t); +int truncate_data_blocks_range(struct dnode_of_data *, int); long f2fs_ioctl(struct file *, unsigned int, unsigned long); long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long); @@ -913,7 +947,6 @@ struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **); ino_t f2fs_inode_by_name(struct inode *, struct qstr *); void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, struct page *, struct inode *); -void init_dent_inode(const struct qstr *, struct page *); int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *); int f2fs_make_empty(struct inode *, struct inode *); @@ -948,8 +981,8 @@ void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); int truncate_inode_blocks(struct inode *, pgoff_t); int remove_inode_page(struct inode *); -int new_inode_page(struct inode *, const struct qstr *); -struct page *new_node_page(struct dnode_of_data *, unsigned int); +struct page *new_inode_page(struct inode *, const struct qstr *); +struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); void ra_node_page(struct f2fs_sb_info *, nid_t); struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); struct page *get_node_page_ra(struct page *, int); @@ -974,7 +1007,6 @@ void destroy_node_manager_caches(void); */ void f2fs_balance_fs(struct f2fs_sb_info *); void invalidate_blocks(struct f2fs_sb_info *, block_t); -void locate_dirty_segment(struct f2fs_sb_info *, unsigned int); void clear_prefree_segments(struct f2fs_sb_info *); int npages_for_summary_flush(struct f2fs_sb_info *); void allocate_new_segments(struct f2fs_sb_info *); @@ -1011,7 +1043,9 @@ void remove_orphan_inode(struct f2fs_sb_info *, nid_t); int recover_orphan_inodes(struct f2fs_sb_info *); int get_valid_checkpoint(struct f2fs_sb_info *); void set_dirty_dir_page(struct inode *, struct page *); +void add_dirty_dir_inode(struct inode *); void remove_dirty_dir_inode(struct inode *); +struct inode *check_dirty_dir_inode(struct f2fs_sb_info *, nid_t); void sync_dirty_dir_inodes(struct f2fs_sb_info *); void write_checkpoint(struct f2fs_sb_info *, bool); void init_orphan_info(struct f2fs_sb_info *); @@ -1025,7 +1059,7 @@ int reserve_new_block(struct dnode_of_data *); void update_extent_cache(block_t, struct dnode_of_data *); struct page *find_data_page(struct inode *, pgoff_t, bool); struct page *get_lock_data_page(struct inode *, pgoff_t); -struct page *get_new_data_page(struct inode *, pgoff_t, bool); +struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); int f2fs_readpage(struct f2fs_sb_info *, struct page *, block_t, int); int do_write_data_page(struct page *); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 1cae864..d2d2b7d 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -63,9 +63,10 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, f2fs_put_dnode(&dn); mutex_unlock_op(sbi, ilock); + file_update_time(vma->vm_file); lock_page(page); if (page->mapping != inode->i_mapping || - page_offset(page) >= i_size_read(inode) || + page_offset(page) > i_size_read(inode) || !PageUptodate(page)) { unlock_page(page); err = -EFAULT; @@ -76,10 +77,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, * check to see if the page is mapped already (no holes) */ if (PageMappedToDisk(page)) - goto out; - - /* fill the page */ - wait_on_page_writeback(page); + goto mapped; /* page is wholly or partially inside EOF */ if (((page->index + 1) << PAGE_CACHE_SHIFT) > i_size_read(inode)) { @@ -90,7 +88,9 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, set_page_dirty(page); SetPageUptodate(page); - file_update_time(vma->vm_file); +mapped: + /* fill the page */ + wait_on_page_writeback(page); out: sb_end_pagefault(inode->i_sb); return block_page_mkwrite_return(err); @@ -102,6 +102,24 @@ static const struct vm_operations_struct f2fs_file_vm_ops = { .remap_pages = generic_file_remap_pages, }; +static int get_parent_ino(struct inode *inode, nid_t *pino) +{ + struct dentry *dentry; + + inode = igrab(inode); + dentry = d_find_any_alias(inode); + iput(inode); + if (!dentry) + return 0; + + inode = igrab(dentry->d_parent->d_inode); + dput(dentry); + + *pino = inode->i_ino; + iput(inode); + return 1; +} + int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; @@ -114,7 +132,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) .for_reclaim = 0, }; - if (inode->i_sb->s_flags & MS_RDONLY) + if (f2fs_readonly(inode->i_sb)) return 0; trace_f2fs_sync_file_enter(inode); @@ -134,7 +152,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) need_cp = true; - else if (is_cp_file(inode)) + else if (file_wrong_pino(inode)) need_cp = true; else if (!space_for_roll_forward(sbi)) need_cp = true; @@ -142,11 +160,23 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) need_cp = true; if (need_cp) { + nid_t pino; + /* all the dirty node pages should be flushed for POR */ ret = f2fs_sync_fs(inode->i_sb, 1); + if (file_wrong_pino(inode) && inode->i_nlink == 1 && + get_parent_ino(inode, &pino)) { + F2FS_I(inode)->i_pino = pino; + file_got_pino(inode); + mark_inode_dirty_sync(inode); + ret = f2fs_write_inode(inode, NULL); + if (ret) + goto out; + } } else { /* if there is no written node page, write its inode page */ while (!sync_node_pages(sbi, inode->i_ino, &wbc)) { + mark_inode_dirty_sync(inode); ret = f2fs_write_inode(inode, NULL); if (ret) goto out; @@ -168,7 +198,7 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) return 0; } -static int truncate_data_blocks_range(struct dnode_of_data *dn, int count) +int truncate_data_blocks_range(struct dnode_of_data *dn, int count) { int nr_free = 0, ofs = dn->ofs_in_node; struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); @@ -185,10 +215,10 @@ static int truncate_data_blocks_range(struct dnode_of_data *dn, int count) update_extent_cache(NULL_ADDR, dn); invalidate_blocks(sbi, blkaddr); - dec_valid_block_count(sbi, dn->inode, 1); nr_free++; } if (nr_free) { + dec_valid_block_count(sbi, dn->inode, nr_free); set_page_dirty(dn->node_page); sync_inode_page(dn); } @@ -291,7 +321,7 @@ void f2fs_truncate(struct inode *inode) } } -static int f2fs_getattr(struct vfsmount *mnt, +int f2fs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode = dentry->d_inode; @@ -387,7 +417,7 @@ static void fill_zero(struct inode *inode, pgoff_t index, f2fs_balance_fs(sbi); ilock = mutex_lock_op(sbi); - page = get_new_data_page(inode, index, false); + page = get_new_data_page(inode, NULL, index, false); mutex_unlock_op(sbi, ilock); if (!IS_ERR(page)) { @@ -575,10 +605,10 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) int ret; switch (cmd) { - case FS_IOC_GETFLAGS: + case F2FS_IOC_GETFLAGS: flags = fi->i_flags & FS_FL_USER_VISIBLE; return put_user(flags, (int __user *) arg); - case FS_IOC_SETFLAGS: + case F2FS_IOC_SETFLAGS: { unsigned int oldflags; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 1496159..35f9b1a 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -76,7 +76,9 @@ static int gc_thread_func(void *data) else wait_ms = increase_sleep_time(wait_ms); +#ifdef CONFIG_F2FS_STAT_FS sbi->bg_gc++; +#endif /* if return value is not zero, no victim was selected */ if (f2fs_gc(sbi)) @@ -89,23 +91,28 @@ int start_gc_thread(struct f2fs_sb_info *sbi) { struct f2fs_gc_kthread *gc_th; dev_t dev = sbi->sb->s_bdev->bd_dev; + int err = 0; if (!test_opt(sbi, BG_GC)) - return 0; + goto out; gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL); - if (!gc_th) - return -ENOMEM; + if (!gc_th) { + err = -ENOMEM; + goto out; + } sbi->gc_thread = gc_th; init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(gc_th->f2fs_gc_task)) { + err = PTR_ERR(gc_th->f2fs_gc_task); kfree(gc_th); sbi->gc_thread = NULL; - return -ENOMEM; } - return 0; + +out: + return err; } void stop_gc_thread(struct f2fs_sb_info *sbi) @@ -234,14 +241,14 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct victim_sel_policy p; - unsigned int secno; + unsigned int secno, max_cost; int nsearched = 0; p.alloc_mode = alloc_mode; select_policy(sbi, gc_type, type, &p); p.min_segno = NULL_SEGNO; - p.min_cost = get_max_cost(sbi, &p); + p.min_cost = max_cost = get_max_cost(sbi, &p); mutex_lock(&dirty_i->seglist_lock); @@ -280,7 +287,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, p.min_cost = cost; } - if (cost == get_max_cost(sbi, &p)) + if (cost == max_cost) continue; if (nsearched++ >= MAX_VICTIM_SEARCH) { @@ -288,8 +295,8 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, break; } } -got_it: if (p.min_segno != NULL_SEGNO) { +got_it: if (p.alloc_mode == LFS) { secno = GET_SECNO(sbi, p.min_segno); if (gc_type == FG_GC) @@ -314,28 +321,21 @@ static const struct victim_selection default_v_ops = { static struct inode *find_gc_inode(nid_t ino, struct list_head *ilist) { - struct list_head *this; struct inode_entry *ie; - list_for_each(this, ilist) { - ie = list_entry(this, struct inode_entry, list); + list_for_each_entry(ie, ilist, list) if (ie->inode->i_ino == ino) return ie->inode; - } return NULL; } static void add_gc_inode(struct inode *inode, struct list_head *ilist) { - struct list_head *this; - struct inode_entry *new_ie, *ie; + struct inode_entry *new_ie; - list_for_each(this, ilist) { - ie = list_entry(this, struct inode_entry, list); - if (ie->inode == inode) { - iput(inode); - return; - } + if (inode == find_gc_inode(inode->i_ino, ilist)) { + iput(inode); + return; } repeat: new_ie = kmem_cache_alloc(winode_slab, GFP_NOFS); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 91ac7f9..2b2d45d1 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -109,12 +109,6 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) ret = do_read_inode(inode); if (ret) goto bad_inode; - - if (!sbi->por_doing && inode->i_nlink == 0) { - ret = -ENOENT; - goto bad_inode; - } - make_now: if (ino == F2FS_NODE_INO(sbi)) { inode->i_mapping->a_ops = &f2fs_node_aops; @@ -130,8 +124,7 @@ make_now: inode->i_op = &f2fs_dir_inode_operations; inode->i_fop = &f2fs_dir_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER_MOVABLE | - __GFP_ZERO); + mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); } else if (S_ISLNK(inode->i_mode)) { inode->i_op = &f2fs_symlink_inode_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; @@ -199,6 +192,7 @@ void update_inode(struct inode *inode, struct page *node_page) set_cold_node(inode, node_page); set_page_dirty(node_page); + clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); } int update_inode_page(struct inode *inode) @@ -224,6 +218,9 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) inode->i_ino == F2FS_META_INO(sbi)) return 0; + if (!is_inode_flag_set(F2FS_I(inode), FI_DIRTY_INODE)) + return 0; + if (wbc) f2fs_balance_fs(sbi); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 47abc97..64c0716 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -112,7 +112,7 @@ static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode, int count = le32_to_cpu(sbi->raw_super->extension_count); for (i = 0; i < count; i++) { if (is_multimedia_file(name, extlist[i])) { - set_cold_file(inode); + file_set_cold(inode); break; } } @@ -149,8 +149,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, alloc_nid_done(sbi, ino); - if (!sbi->por_doing) - d_instantiate(dentry, inode); + d_instantiate(dentry, inode); unlock_new_inode(inode); return 0; out: @@ -173,7 +172,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, f2fs_balance_fs(sbi); inode->i_ctime = CURRENT_TIME; - atomic_inc(&inode->i_count); + ihold(inode); set_inode_flag(F2FS_I(inode), FI_INC_LINK); ilock = mutex_lock_op(sbi); @@ -182,17 +181,10 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, if (err) goto out; - /* - * This file should be checkpointed during fsync. - * We lost i_pino from now on. - */ - set_cp_file(inode); - d_instantiate(dentry, inode); return 0; out: clear_inode_flag(F2FS_I(inode), FI_INC_LINK); - make_bad_inode(inode); iput(inode); return err; } @@ -498,6 +490,7 @@ const struct inode_operations f2fs_dir_inode_operations = { .rmdir = f2fs_rmdir, .mknod = f2fs_mknod, .rename = f2fs_rename, + .getattr = f2fs_getattr, .setattr = f2fs_setattr, .get_acl = f2fs_get_acl, #ifdef CONFIG_F2FS_FS_XATTR @@ -512,6 +505,7 @@ const struct inode_operations f2fs_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = page_follow_link_light, .put_link = page_put_link, + .getattr = f2fs_getattr, .setattr = f2fs_setattr, #ifdef CONFIG_F2FS_FS_XATTR .setxattr = generic_setxattr, @@ -522,6 +516,7 @@ const struct inode_operations f2fs_symlink_inode_operations = { }; const struct inode_operations f2fs_special_inode_operations = { + .getattr = f2fs_getattr, .setattr = f2fs_setattr, .get_acl = f2fs_get_acl, #ifdef CONFIG_F2FS_FS_XATTR diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 74f3c7b..b418aee 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -408,10 +408,13 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) level = get_node_path(index, offset, noffset); nids[0] = dn->inode->i_ino; - npage[0] = get_node_page(sbi, nids[0]); - if (IS_ERR(npage[0])) - return PTR_ERR(npage[0]); + npage[0] = dn->inode_page; + if (!npage[0]) { + npage[0] = get_node_page(sbi, nids[0]); + if (IS_ERR(npage[0])) + return PTR_ERR(npage[0]); + } parent = npage[0]; if (level != 0) nids[1] = get_nid(parent, offset[0], true); @@ -430,7 +433,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) } dn->nid = nids[i]; - npage[i] = new_node_page(dn, noffset[i]); + npage[i] = new_node_page(dn, noffset[i], NULL); if (IS_ERR(npage[i])) { alloc_nid_failed(sbi, nids[i]); err = PTR_ERR(npage[i]); @@ -803,22 +806,19 @@ int remove_inode_page(struct inode *inode) return 0; } -int new_inode_page(struct inode *inode, const struct qstr *name) +struct page *new_inode_page(struct inode *inode, const struct qstr *name) { - struct page *page; struct dnode_of_data dn; /* allocate inode page for new inode */ set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); - page = new_node_page(&dn, 0); - init_dent_inode(name, page); - if (IS_ERR(page)) - return PTR_ERR(page); - f2fs_put_page(page, 1); - return 0; + + /* caller should f2fs_put_page(page, 1); */ + return new_node_page(&dn, 0, NULL); } -struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs) +struct page *new_node_page(struct dnode_of_data *dn, + unsigned int ofs, struct page *ipage) { struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); struct address_space *mapping = sbi->node_inode->i_mapping; @@ -851,7 +851,10 @@ struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs) set_cold_node(dn->inode, page); dn->node_page = page; - sync_inode_page(dn); + if (ipage) + update_inode(dn->inode, ipage); + else + sync_inode_page(dn); set_page_dirty(page); if (ofs == 0) inc_valid_inode_count(sbi); @@ -1493,9 +1496,10 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) new_ni = old_ni; new_ni.ino = ino; + if (!inc_valid_node_count(sbi, NULL, 1)) + WARN_ON(1); set_node_addr(sbi, &new_ni, NEW_ADDR); inc_valid_inode_count(sbi); - f2fs_put_page(ipage, 1); return 0; } diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 0a2d72f..c65fb4f 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -275,25 +275,27 @@ static inline nid_t get_nid(struct page *p, int off, bool i) * - Mark cold node blocks in their node footer * - Mark cold data pages in page cache */ -static inline int is_cold_file(struct inode *inode) +static inline int is_file(struct inode *inode, int type) { - return F2FS_I(inode)->i_advise & FADVISE_COLD_BIT; + return F2FS_I(inode)->i_advise & type; } -static inline void set_cold_file(struct inode *inode) +static inline void set_file(struct inode *inode, int type) { - F2FS_I(inode)->i_advise |= FADVISE_COLD_BIT; + F2FS_I(inode)->i_advise |= type; } -static inline int is_cp_file(struct inode *inode) +static inline void clear_file(struct inode *inode, int type) { - return F2FS_I(inode)->i_advise & FADVISE_CP_BIT; + F2FS_I(inode)->i_advise &= ~type; } -static inline void set_cp_file(struct inode *inode) -{ - F2FS_I(inode)->i_advise |= FADVISE_CP_BIT; -} +#define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT) +#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) +#define file_set_cold(inode) set_file(inode, FADVISE_COLD_BIT) +#define file_lost_pino(inode) set_file(inode, FADVISE_LOST_PINO_BIT) +#define file_clear_cold(inode) clear_file(inode, FADVISE_COLD_BIT) +#define file_got_pino(inode) clear_file(inode, FADVISE_LOST_PINO_BIT) static inline int is_cold_data(struct page *page) { @@ -310,29 +312,16 @@ static inline void clear_cold_data(struct page *page) ClearPageChecked(page); } -static inline int is_cold_node(struct page *page) +static inline int is_node(struct page *page, int type) { void *kaddr = page_address(page); struct f2fs_node *rn = (struct f2fs_node *)kaddr; - unsigned int flag = le32_to_cpu(rn->footer.flag); - return flag & (0x1 << COLD_BIT_SHIFT); + return le32_to_cpu(rn->footer.flag) & (1 << type); } -static inline unsigned char is_fsync_dnode(struct page *page) -{ - void *kaddr = page_address(page); - struct f2fs_node *rn = (struct f2fs_node *)kaddr; - unsigned int flag = le32_to_cpu(rn->footer.flag); - return flag & (0x1 << FSYNC_BIT_SHIFT); -} - -static inline unsigned char is_dent_dnode(struct page *page) -{ - void *kaddr = page_address(page); - struct f2fs_node *rn = (struct f2fs_node *)kaddr; - unsigned int flag = le32_to_cpu(rn->footer.flag); - return flag & (0x1 << DENT_BIT_SHIFT); -} +#define is_cold_node(page) is_node(page, COLD_BIT_SHIFT) +#define is_fsync_dnode(page) is_node(page, FSYNC_BIT_SHIFT) +#define is_dent_dnode(page) is_node(page, DENT_BIT_SHIFT) static inline void set_cold_node(struct inode *inode, struct page *page) { @@ -346,26 +335,15 @@ static inline void set_cold_node(struct inode *inode, struct page *page) rn->footer.flag = cpu_to_le32(flag); } -static inline void set_fsync_mark(struct page *page, int mark) +static inline void set_mark(struct page *page, int mark, int type) { - void *kaddr = page_address(page); - struct f2fs_node *rn = (struct f2fs_node *)kaddr; - unsigned int flag = le32_to_cpu(rn->footer.flag); - if (mark) - flag |= (0x1 << FSYNC_BIT_SHIFT); - else - flag &= ~(0x1 << FSYNC_BIT_SHIFT); - rn->footer.flag = cpu_to_le32(flag); -} - -static inline void set_dentry_mark(struct page *page, int mark) -{ - void *kaddr = page_address(page); - struct f2fs_node *rn = (struct f2fs_node *)kaddr; + struct f2fs_node *rn = (struct f2fs_node *)page_address(page); unsigned int flag = le32_to_cpu(rn->footer.flag); if (mark) - flag |= (0x1 << DENT_BIT_SHIFT); + flag |= (0x1 << type); else - flag &= ~(0x1 << DENT_BIT_SHIFT); + flag &= ~(0x1 << type); rn->footer.flag = cpu_to_le32(flag); } +#define set_dentry_mark(page, mark) set_mark(page, mark, DENT_BIT_SHIFT) +#define set_fsync_mark(page, mark) set_mark(page, mark, FSYNC_BIT_SHIFT) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 60c8a50..d56d951 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -40,36 +40,54 @@ static struct fsync_inode_entry *get_fsync_inode(struct list_head *head, static int recover_dentry(struct page *ipage, struct inode *inode) { - struct f2fs_node *raw_node = (struct f2fs_node *)kmap(ipage); + void *kaddr = page_address(ipage); + struct f2fs_node *raw_node = (struct f2fs_node *)kaddr; struct f2fs_inode *raw_inode = &(raw_node->i); - struct qstr name; + nid_t pino = le32_to_cpu(raw_inode->i_pino); struct f2fs_dir_entry *de; + struct qstr name; struct page *page; - struct inode *dir; + struct inode *dir, *einode; int err = 0; - if (!is_dent_dnode(ipage)) - goto out; - - dir = f2fs_iget(inode->i_sb, le32_to_cpu(raw_inode->i_pino)); - if (IS_ERR(dir)) { - err = PTR_ERR(dir); - goto out; + dir = check_dirty_dir_inode(F2FS_SB(inode->i_sb), pino); + if (!dir) { + dir = f2fs_iget(inode->i_sb, pino); + if (IS_ERR(dir)) { + err = PTR_ERR(dir); + goto out; + } + set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT); + add_dirty_dir_inode(dir); } name.len = le32_to_cpu(raw_inode->i_namelen); name.name = raw_inode->i_name; - +retry: de = f2fs_find_entry(dir, &name, &page); - if (de) { + if (de && inode->i_ino == le32_to_cpu(de->ino)) { kunmap(page); f2fs_put_page(page, 0); - } else { - err = __f2fs_add_link(dir, &name, inode); + goto out; + } + if (de) { + einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); + if (IS_ERR(einode)) { + WARN_ON(1); + if (PTR_ERR(einode) == -ENOENT) + err = -EEXIST; + goto out; + } + f2fs_delete_entry(de, page, einode); + iput(einode); + goto retry; } - iput(dir); + err = __f2fs_add_link(dir, &name, inode); out: - kunmap(ipage); + f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode and its dentry: " + "ino = %x, name = %s, dir = %lx, err = %d", + ino_of_node(ipage), raw_inode->i_name, + IS_ERR(dir) ? 0 : dir->i_ino, err); return err; } @@ -79,6 +97,9 @@ static int recover_inode(struct inode *inode, struct page *node_page) struct f2fs_node *raw_node = (struct f2fs_node *)kaddr; struct f2fs_inode *raw_inode = &(raw_node->i); + if (!IS_INODE(node_page)) + return 0; + inode->i_mode = le16_to_cpu(raw_inode->i_mode); i_size_write(inode, le64_to_cpu(raw_inode->i_size)); inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); @@ -88,7 +109,12 @@ static int recover_inode(struct inode *inode, struct page *node_page) inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); - return recover_dentry(node_page, inode); + if (is_dent_dnode(node_page)) + return recover_dentry(node_page, inode); + + f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s", + ino_of_node(node_page), raw_inode->i_name); + return 0; } static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) @@ -119,14 +145,13 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) lock_page(page); if (cp_ver != cpver_of_node(page)) - goto unlock_out; + break; if (!is_fsync_dnode(page)) goto next; entry = get_fsync_inode(head, ino_of_node(page)); if (entry) { - entry->blkaddr = blkaddr; if (IS_INODE(page) && is_dent_dnode(page)) set_inode_flag(F2FS_I(entry->inode), FI_INC_LINK); @@ -134,48 +159,40 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) if (IS_INODE(page) && is_dent_dnode(page)) { err = recover_inode_page(sbi, page); if (err) - goto unlock_out; + break; } /* add this fsync inode to the list */ entry = kmem_cache_alloc(fsync_entry_slab, GFP_NOFS); if (!entry) { err = -ENOMEM; - goto unlock_out; + break; } entry->inode = f2fs_iget(sbi->sb, ino_of_node(page)); if (IS_ERR(entry->inode)) { err = PTR_ERR(entry->inode); kmem_cache_free(fsync_entry_slab, entry); - goto unlock_out; + break; } - list_add_tail(&entry->list, head); - entry->blkaddr = blkaddr; - } - if (IS_INODE(page)) { - err = recover_inode(entry->inode, page); - if (err == -ENOENT) { - goto next; - } else if (err) { - err = -EINVAL; - goto unlock_out; - } } + entry->blkaddr = blkaddr; + + err = recover_inode(entry->inode, page); + if (err && err != -ENOENT) + break; next: /* check next segment */ blkaddr = next_blkaddr_of_node(page); } -unlock_out: unlock_page(page); out: __free_pages(page, 0); return err; } -static void destroy_fsync_dnodes(struct f2fs_sb_info *sbi, - struct list_head *head) +static void destroy_fsync_dnodes(struct list_head *head) { struct fsync_inode_entry *entry, *tmp; @@ -186,15 +203,15 @@ static void destroy_fsync_dnodes(struct f2fs_sb_info *sbi, } } -static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi, - block_t blkaddr) +static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, + block_t blkaddr, struct dnode_of_data *dn) { struct seg_entry *sentry; unsigned int segno = GET_SEGNO(sbi, blkaddr); unsigned short blkoff = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & (sbi->blocks_per_seg - 1); struct f2fs_summary sum; - nid_t ino; + nid_t ino, nid; void *kaddr; struct inode *inode; struct page *node_page; @@ -203,7 +220,7 @@ static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi, sentry = get_seg_entry(sbi, segno); if (!f2fs_test_bit(blkoff, sentry->cur_valid_map)) - return; + return 0; /* Get the previous summary */ for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) { @@ -222,20 +239,39 @@ static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi, f2fs_put_page(sum_page, 1); } + /* Use the locked dnode page and inode */ + nid = le32_to_cpu(sum.nid); + if (dn->inode->i_ino == nid) { + struct dnode_of_data tdn = *dn; + tdn.nid = nid; + tdn.node_page = dn->inode_page; + tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); + truncate_data_blocks_range(&tdn, 1); + return 0; + } else if (dn->nid == nid) { + struct dnode_of_data tdn = *dn; + tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); + truncate_data_blocks_range(&tdn, 1); + return 0; + } + /* Get the node page */ - node_page = get_node_page(sbi, le32_to_cpu(sum.nid)); + node_page = get_node_page(sbi, nid); + if (IS_ERR(node_page)) + return PTR_ERR(node_page); bidx = start_bidx_of_node(ofs_of_node(node_page)) + - le16_to_cpu(sum.ofs_in_node); + le16_to_cpu(sum.ofs_in_node); ino = ino_of_node(node_page); f2fs_put_page(node_page, 1); /* Deallocate previous index in the node page */ inode = f2fs_iget(sbi->sb, ino); if (IS_ERR(inode)) - return; + return PTR_ERR(inode); truncate_hole(inode, bidx, bidx + 1); iput(inode); + return 0; } static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, @@ -245,7 +281,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, struct dnode_of_data dn; struct f2fs_summary sum; struct node_info ni; - int err = 0; + int err = 0, recovered = 0; int ilock; start = start_bidx_of_node(ofs_of_node(page)); @@ -283,13 +319,16 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, } /* Check the previous node page having this index */ - check_index_in_prev_nodes(sbi, dest); + err = check_index_in_prev_nodes(sbi, dest, &dn); + if (err) + goto err; set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); /* write dummy data page */ recover_data_page(sbi, NULL, &sum, src, dest); update_extent_cache(dest, &dn); + recovered++; } dn.ofs_in_node++; } @@ -305,9 +344,14 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, set_page_dirty(dn.node_page); recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr); +err: f2fs_put_dnode(&dn); mutex_unlock_op(sbi, ilock); - return 0; + + f2fs_msg(sbi->sb, KERN_NOTICE, "recover_data: ino = %lx, " + "recovered_data = %d blocks, err = %d", + inode->i_ino, recovered, err); + return err; } static int recover_data(struct f2fs_sb_info *sbi, @@ -340,7 +384,7 @@ static int recover_data(struct f2fs_sb_info *sbi, lock_page(page); if (cp_ver != cpver_of_node(page)) - goto unlock_out; + break; entry = get_fsync_inode(head, ino_of_node(page)); if (!entry) @@ -348,7 +392,7 @@ static int recover_data(struct f2fs_sb_info *sbi, err = do_recover_data(sbi, entry->inode, page, blkaddr); if (err) - goto out; + break; if (entry->blkaddr == blkaddr) { iput(entry->inode); @@ -359,7 +403,6 @@ next: /* check next segment */ blkaddr = next_blkaddr_of_node(page); } -unlock_out: unlock_page(page); out: __free_pages(page, 0); @@ -382,6 +425,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&inode_list); /* step #1: find fsynced inode numbers */ + sbi->por_doing = 1; err = find_fsync_dnodes(sbi, &inode_list); if (err) goto out; @@ -390,13 +434,13 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) goto out; /* step #2: recover data */ - sbi->por_doing = 1; err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); - sbi->por_doing = 0; BUG_ON(!list_empty(&inode_list)); out: - destroy_fsync_dnodes(sbi, &inode_list); + destroy_fsync_dnodes(&inode_list); kmem_cache_destroy(fsync_entry_slab); - write_checkpoint(sbi, false); + sbi->por_doing = 0; + if (!err) + write_checkpoint(sbi, false); return err; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d8e84e4..a86d125 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -94,7 +94,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, * Adding dirty entry into seglist is not critical operation. * If a given segment is one of current working segments, it won't be added. */ -void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) +static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); unsigned short valid_blocks; @@ -126,17 +126,16 @@ void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - unsigned int segno, offset = 0; + unsigned int segno = -1; unsigned int total_segs = TOTAL_SEGS(sbi); mutex_lock(&dirty_i->seglist_lock); while (1) { segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, - offset); + segno + 1); if (segno >= total_segs) break; __set_test_and_free(sbi, segno); - offset = segno + 1; } mutex_unlock(&dirty_i->seglist_lock); } @@ -144,17 +143,16 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) void clear_prefree_segments(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - unsigned int segno, offset = 0; + unsigned int segno = -1; unsigned int total_segs = TOTAL_SEGS(sbi); mutex_lock(&dirty_i->seglist_lock); while (1) { segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, - offset); + segno + 1); if (segno >= total_segs) break; - offset = segno + 1; if (test_and_clear_bit(segno, dirty_i->dirty_segmap[PRE])) dirty_i->nr_dirty[PRE]--; @@ -257,11 +255,11 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) * This function should be resided under the curseg_mutex lock */ static void __add_sum_entry(struct f2fs_sb_info *sbi, int type, - struct f2fs_summary *sum, unsigned short offset) + struct f2fs_summary *sum) { struct curseg_info *curseg = CURSEG_I(sbi, type); void *addr = curseg->sum_blk; - addr += offset * sizeof(struct f2fs_summary); + addr += curseg->next_blkoff * sizeof(struct f2fs_summary); memcpy(addr, sum, sizeof(struct f2fs_summary)); return; } @@ -311,64 +309,14 @@ static void write_sum_page(struct f2fs_sb_info *sbi, f2fs_put_page(page, 1); } -static unsigned int check_prefree_segments(struct f2fs_sb_info *sbi, int type) -{ - struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - unsigned long *prefree_segmap = dirty_i->dirty_segmap[PRE]; - unsigned int segno; - unsigned int ofs = 0; - - /* - * If there is not enough reserved sections, - * we should not reuse prefree segments. - */ - if (has_not_enough_free_secs(sbi, 0)) - return NULL_SEGNO; - - /* - * NODE page should not reuse prefree segment, - * since those information is used for SPOR. - */ - if (IS_NODESEG(type)) - return NULL_SEGNO; -next: - segno = find_next_bit(prefree_segmap, TOTAL_SEGS(sbi), ofs); - ofs += sbi->segs_per_sec; - - if (segno < TOTAL_SEGS(sbi)) { - int i; - - /* skip intermediate segments in a section */ - if (segno % sbi->segs_per_sec) - goto next; - - /* skip if the section is currently used */ - if (sec_usage_check(sbi, GET_SECNO(sbi, segno))) - goto next; - - /* skip if whole section is not prefree */ - for (i = 1; i < sbi->segs_per_sec; i++) - if (!test_bit(segno + i, prefree_segmap)) - goto next; - - /* skip if whole section was not free at the last checkpoint */ - for (i = 0; i < sbi->segs_per_sec; i++) - if (get_seg_entry(sbi, segno + i)->ckpt_valid_blocks) - goto next; - - return segno; - } - return NULL_SEGNO; -} - static int is_next_segment_free(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg = CURSEG_I(sbi, type); - unsigned int segno = curseg->segno; + unsigned int segno = curseg->segno + 1; struct free_segmap_info *free_i = FREE_I(sbi); - if (segno + 1 < TOTAL_SEGS(sbi) && (segno + 1) % sbi->segs_per_sec) - return !test_bit(segno + 1, free_i->free_segmap); + if (segno < TOTAL_SEGS(sbi) && segno % sbi->segs_per_sec) + return !test_bit(segno, free_i->free_segmap); return 0; } @@ -495,7 +443,7 @@ static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) int dir = ALLOC_LEFT; write_sum_page(sbi, curseg->sum_blk, - GET_SUM_BLOCK(sbi, curseg->segno)); + GET_SUM_BLOCK(sbi, segno)); if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA) dir = ALLOC_RIGHT; @@ -599,11 +547,7 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, goto out; } - curseg->next_segno = check_prefree_segments(sbi, type); - - if (curseg->next_segno != NULL_SEGNO) - change_curseg(sbi, type, false); - else if (type == CURSEG_WARM_NODE) + if (type == CURSEG_WARM_NODE) new_curseg(sbi, type, false); else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) new_curseg(sbi, type, false); @@ -612,7 +556,10 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, else new_curseg(sbi, type, false); out: +#ifdef CONFIG_F2FS_STAT_FS sbi->segment_count[curseg->alloc_type]++; +#endif + return; } void allocate_new_segments(struct f2fs_sb_info *sbi) @@ -795,7 +742,7 @@ static int __get_segment_type_6(struct page *page, enum page_type p_type) if (S_ISDIR(inode->i_mode)) return CURSEG_HOT_DATA; - else if (is_cold_data(page) || is_cold_file(inode)) + else if (is_cold_data(page) || file_is_cold(inode)) return CURSEG_COLD_DATA; else return CURSEG_WARM_DATA; @@ -844,11 +791,13 @@ static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, * because, this function updates a summary entry in the * current summary block. */ - __add_sum_entry(sbi, type, sum, curseg->next_blkoff); + __add_sum_entry(sbi, type, sum); mutex_lock(&sit_i->sentry_lock); __refresh_next_blkoff(sbi, curseg); +#ifdef CONFIG_F2FS_STAT_FS sbi->block_count[curseg->alloc_type]++; +#endif /* * SIT information should be updated before segment allocation, @@ -943,7 +892,7 @@ void recover_data_page(struct f2fs_sb_info *sbi, curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & (sbi->blocks_per_seg - 1); - __add_sum_entry(sbi, type, sum, curseg->next_blkoff); + __add_sum_entry(sbi, type, sum); refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); @@ -980,7 +929,7 @@ void rewrite_node_page(struct f2fs_sb_info *sbi, } curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & (sbi->blocks_per_seg - 1); - __add_sum_entry(sbi, type, sum, curseg->next_blkoff); + __add_sum_entry(sbi, type, sum); /* change the current log to the next block addr in advance */ if (next_segno != segno) { @@ -1579,13 +1528,13 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct free_segmap_info *free_i = FREE_I(sbi); - unsigned int segno = 0, offset = 0; + unsigned int segno = 0, offset = 0, total_segs = TOTAL_SEGS(sbi); unsigned short valid_blocks; - while (segno < TOTAL_SEGS(sbi)) { + while (1) { /* find dirty segment based on free segmap */ - segno = find_next_inuse(free_i, TOTAL_SEGS(sbi), offset); - if (segno >= TOTAL_SEGS(sbi)) + segno = find_next_inuse(free_i, total_segs, offset); + if (segno >= total_segs) break; offset = segno + 1; valid_blocks = get_valid_blocks(sbi, segno, 0); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8555f7d..75c7dc3 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -34,7 +34,7 @@ static struct kmem_cache *f2fs_inode_cachep; enum { - Opt_gc_background_off, + Opt_gc_background, Opt_disable_roll_forward, Opt_discard, Opt_noheap, @@ -46,7 +46,7 @@ enum { }; static match_table_t f2fs_tokens = { - {Opt_gc_background_off, "background_gc_off"}, + {Opt_gc_background, "background_gc=%s"}, {Opt_disable_roll_forward, "disable_roll_forward"}, {Opt_discard, "discard"}, {Opt_noheap, "no_heap"}, @@ -76,6 +76,91 @@ static void init_once(void *foo) inode_init_once(&fi->vfs_inode); } +static int parse_options(struct super_block *sb, char *options) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + substring_t args[MAX_OPT_ARGS]; + char *p, *name; + int arg = 0; + + if (!options) + return 0; + + while ((p = strsep(&options, ",")) != NULL) { + int token; + if (!*p) + continue; + /* + * Initialize args struct so we know whether arg was + * found; some options take optional arguments. + */ + args[0].to = args[0].from = NULL; + token = match_token(p, f2fs_tokens, args); + + switch (token) { + case Opt_gc_background: + name = match_strdup(&args[0]); + + if (!name) + return -ENOMEM; + if (!strncmp(name, "on", 2)) + set_opt(sbi, BG_GC); + else if (!strncmp(name, "off", 3)) + clear_opt(sbi, BG_GC); + else { + kfree(name); + return -EINVAL; + } + kfree(name); + break; + case Opt_disable_roll_forward: + set_opt(sbi, DISABLE_ROLL_FORWARD); + break; + case Opt_discard: + set_opt(sbi, DISCARD); + break; + case Opt_noheap: + set_opt(sbi, NOHEAP); + break; +#ifdef CONFIG_F2FS_FS_XATTR + case Opt_nouser_xattr: + clear_opt(sbi, XATTR_USER); + break; +#else + case Opt_nouser_xattr: + f2fs_msg(sb, KERN_INFO, + "nouser_xattr options not supported"); + break; +#endif +#ifdef CONFIG_F2FS_FS_POSIX_ACL + case Opt_noacl: + clear_opt(sbi, POSIX_ACL); + break; +#else + case Opt_noacl: + f2fs_msg(sb, KERN_INFO, "noacl options not supported"); + break; +#endif + case Opt_active_logs: + if (args->from && match_int(args, &arg)) + return -EINVAL; + if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE) + return -EINVAL; + sbi->active_logs = arg; + break; + case Opt_disable_ext_identify: + set_opt(sbi, DISABLE_EXT_IDENTIFY); + break; + default: + f2fs_msg(sb, KERN_ERR, + "Unrecognized mount option \"%s\" or missing value", + p); + return -EINVAL; + } + } + return 0; +} + static struct inode *f2fs_alloc_inode(struct super_block *sb) { struct f2fs_inode_info *fi; @@ -112,6 +197,17 @@ static int f2fs_drop_inode(struct inode *inode) return generic_drop_inode(inode); } +/* + * f2fs_dirty_inode() is called from __mark_inode_dirty() + * + * We should call set_dirty_inode to write the dirty inode through write_inode. + */ +static void f2fs_dirty_inode(struct inode *inode, int flags) +{ + set_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); + return; +} + static void f2fs_i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); @@ -170,7 +266,7 @@ static int f2fs_freeze(struct super_block *sb) { int err; - if (sb->s_flags & MS_RDONLY) + if (f2fs_readonly(sb)) return 0; err = f2fs_sync_fs(sb, 1); @@ -214,10 +310,10 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) { struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb); - if (test_opt(sbi, BG_GC)) - seq_puts(seq, ",background_gc_on"); + if (!(root->d_sb->s_flags & MS_RDONLY) && test_opt(sbi, BG_GC)) + seq_printf(seq, ",background_gc=%s", "on"); else - seq_puts(seq, ",background_gc_off"); + seq_printf(seq, ",background_gc=%s", "off"); if (test_opt(sbi, DISABLE_ROLL_FORWARD)) seq_puts(seq, ",disable_roll_forward"); if (test_opt(sbi, DISCARD)) @@ -244,11 +340,64 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) return 0; } +static int f2fs_remount(struct super_block *sb, int *flags, char *data) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct f2fs_mount_info org_mount_opt; + int err, active_logs; + + /* + * Save the old mount options in case we + * need to restore them. + */ + org_mount_opt = sbi->mount_opt; + active_logs = sbi->active_logs; + + /* parse mount options */ + err = parse_options(sb, data); + if (err) + goto restore_opts; + + /* + * Previous and new state of filesystem is RO, + * so no point in checking GC conditions. + */ + if ((sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) + goto skip; + + /* + * We stop the GC thread if FS is mounted as RO + * or if background_gc = off is passed in mount + * option. Also sync the filesystem. + */ + if ((*flags & MS_RDONLY) || !test_opt(sbi, BG_GC)) { + if (sbi->gc_thread) { + stop_gc_thread(sbi); + f2fs_sync_fs(sb, 1); + } + } else if (test_opt(sbi, BG_GC) && !sbi->gc_thread) { + err = start_gc_thread(sbi); + if (err) + goto restore_opts; + } +skip: + /* Update the POSIXACL Flag */ + sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | + (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); + return 0; + +restore_opts: + sbi->mount_opt = org_mount_opt; + sbi->active_logs = active_logs; + return err; +} + static struct super_operations f2fs_sops = { .alloc_inode = f2fs_alloc_inode, .drop_inode = f2fs_drop_inode, .destroy_inode = f2fs_destroy_inode, .write_inode = f2fs_write_inode, + .dirty_inode = f2fs_dirty_inode, .show_options = f2fs_show_options, .evict_inode = f2fs_evict_inode, .put_super = f2fs_put_super, @@ -256,6 +405,7 @@ static struct super_operations f2fs_sops = { .freeze_fs = f2fs_freeze, .unfreeze_fs = f2fs_unfreeze, .statfs = f2fs_statfs, + .remount_fs = f2fs_remount, }; static struct inode *f2fs_nfs_get_inode(struct super_block *sb, @@ -303,79 +453,6 @@ static const struct export_operations f2fs_export_ops = { .get_parent = f2fs_get_parent, }; -static int parse_options(struct super_block *sb, struct f2fs_sb_info *sbi, - char *options) -{ - substring_t args[MAX_OPT_ARGS]; - char *p; - int arg = 0; - - if (!options) - return 0; - - while ((p = strsep(&options, ",")) != NULL) { - int token; - if (!*p) - continue; - /* - * Initialize args struct so we know whether arg was - * found; some options take optional arguments. - */ - args[0].to = args[0].from = NULL; - token = match_token(p, f2fs_tokens, args); - - switch (token) { - case Opt_gc_background_off: - clear_opt(sbi, BG_GC); - break; - case Opt_disable_roll_forward: - set_opt(sbi, DISABLE_ROLL_FORWARD); - break; - case Opt_discard: - set_opt(sbi, DISCARD); - break; - case Opt_noheap: - set_opt(sbi, NOHEAP); - break; -#ifdef CONFIG_F2FS_FS_XATTR - case Opt_nouser_xattr: - clear_opt(sbi, XATTR_USER); - break; -#else - case Opt_nouser_xattr: - f2fs_msg(sb, KERN_INFO, - "nouser_xattr options not supported"); - break; -#endif -#ifdef CONFIG_F2FS_FS_POSIX_ACL - case Opt_noacl: - clear_opt(sbi, POSIX_ACL); - break; -#else - case Opt_noacl: - f2fs_msg(sb, KERN_INFO, "noacl options not supported"); - break; -#endif - case Opt_active_logs: - if (args->from && match_int(args, &arg)) - return -EINVAL; - if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE) - return -EINVAL; - sbi->active_logs = arg; - break; - case Opt_disable_ext_identify: - set_opt(sbi, DISABLE_EXT_IDENTIFY); - break; - default: - f2fs_msg(sb, KERN_ERR, - "Unrecognized mount option \"%s\" or missing value", - p); - return -EINVAL; - } - } - return 0; -} - static loff_t max_file_size(unsigned bits) { loff_t result = ADDRS_PER_INODE; @@ -541,6 +618,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) if (err) goto free_sb_buf; } + sb->s_fs_info = sbi; /* init some FS parameters */ sbi->active_logs = NR_CURSEG_TYPE; @@ -553,7 +631,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) set_opt(sbi, POSIX_ACL); #endif /* parse mount options */ - err = parse_options(sb, sbi, (char *)data); + err = parse_options(sb, (char *)data); if (err) goto free_sb_buf; @@ -565,7 +643,6 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) sb->s_xattr = f2fs_xattr_handlers; sb->s_export_op = &f2fs_export_ops; sb->s_magic = F2FS_SUPER_MAGIC; - sb->s_fs_info = sbi; sb->s_time_gran = 1; sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); @@ -674,10 +751,16 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) "Cannot recover all fsync data errno=%ld", err); } - /* After POR, we can run background GC thread */ - err = start_gc_thread(sbi); - if (err) - goto fail; + /* + * If filesystem is not mounted as read-only then + * do start the gc_thread. + */ + if (!(sb->s_flags & MS_RDONLY)) { + /* After POR, we can run background GC thread.*/ + err = start_gc_thread(sbi); + if (err) + goto fail; + } err = f2fs_build_stats(sbi); if (err) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 0b02dce..3ab07ec 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -20,6 +20,7 @@ */ #include <linux/rwsem.h> #include <linux/f2fs_fs.h> +#include <linux/security.h> #include "f2fs.h" #include "xattr.h" @@ -43,6 +44,10 @@ static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list, prefix = XATTR_TRUSTED_PREFIX; prefix_len = XATTR_TRUSTED_PREFIX_LEN; break; + case F2FS_XATTR_INDEX_SECURITY: + prefix = XATTR_SECURITY_PREFIX; + prefix_len = XATTR_SECURITY_PREFIX_LEN; + break; default: return -EINVAL; } @@ -50,7 +55,7 @@ static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list, total_len = prefix_len + name_len + 1; if (list && total_len <= list_size) { memcpy(list, prefix, prefix_len); - memcpy(list+prefix_len, name, name_len); + memcpy(list + prefix_len, name, name_len); list[prefix_len + name_len] = '\0'; } return total_len; @@ -70,13 +75,14 @@ static int f2fs_xattr_generic_get(struct dentry *dentry, const char *name, if (!capable(CAP_SYS_ADMIN)) return -EPERM; break; + case F2FS_XATTR_INDEX_SECURITY: + break; default: return -EINVAL; } if (strcmp(name, "") == 0) return -EINVAL; - return f2fs_getxattr(dentry->d_inode, type, name, - buffer, size); + return f2fs_getxattr(dentry->d_inode, type, name, buffer, size); } static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name, @@ -93,13 +99,15 @@ static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name, if (!capable(CAP_SYS_ADMIN)) return -EPERM; break; + case F2FS_XATTR_INDEX_SECURITY: + break; default: return -EINVAL; } if (strcmp(name, "") == 0) return -EINVAL; - return f2fs_setxattr(dentry->d_inode, type, name, value, size); + return f2fs_setxattr(dentry->d_inode, type, name, value, size, NULL); } static size_t f2fs_xattr_advise_list(struct dentry *dentry, char *list, @@ -145,6 +153,31 @@ static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name, return 0; } +#ifdef CONFIG_F2FS_FS_SECURITY +static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array, + void *page) +{ + const struct xattr *xattr; + int err = 0; + + for (xattr = xattr_array; xattr->name != NULL; xattr++) { + err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY, + xattr->name, xattr->value, + xattr->value_len, (struct page *)page); + if (err < 0) + break; + } + return err; +} + +int f2fs_init_security(struct inode *inode, struct inode *dir, + const struct qstr *qstr, struct page *ipage) +{ + return security_inode_init_security(inode, dir, qstr, + &f2fs_initxattrs, ipage); +} +#endif + const struct xattr_handler f2fs_xattr_user_handler = { .prefix = XATTR_USER_PREFIX, .flags = F2FS_XATTR_INDEX_USER, @@ -169,6 +202,14 @@ const struct xattr_handler f2fs_xattr_advise_handler = { .set = f2fs_xattr_advise_set, }; +const struct xattr_handler f2fs_xattr_security_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .flags = F2FS_XATTR_INDEX_SECURITY, + .list = f2fs_xattr_generic_list, + .get = f2fs_xattr_generic_get, + .set = f2fs_xattr_generic_set, +}; + static const struct xattr_handler *f2fs_xattr_handler_map[] = { [F2FS_XATTR_INDEX_USER] = &f2fs_xattr_user_handler, #ifdef CONFIG_F2FS_FS_POSIX_ACL @@ -176,6 +217,9 @@ static const struct xattr_handler *f2fs_xattr_handler_map[] = { [F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &f2fs_xattr_acl_default_handler, #endif [F2FS_XATTR_INDEX_TRUSTED] = &f2fs_xattr_trusted_handler, +#ifdef CONFIG_F2FS_FS_SECURITY + [F2FS_XATTR_INDEX_SECURITY] = &f2fs_xattr_security_handler, +#endif [F2FS_XATTR_INDEX_ADVISE] = &f2fs_xattr_advise_handler, }; @@ -186,6 +230,9 @@ const struct xattr_handler *f2fs_xattr_handlers[] = { &f2fs_xattr_acl_default_handler, #endif &f2fs_xattr_trusted_handler, +#ifdef CONFIG_F2FS_FS_SECURITY + &f2fs_xattr_security_handler, +#endif &f2fs_xattr_advise_handler, NULL, }; @@ -218,6 +265,8 @@ int f2fs_getxattr(struct inode *inode, int name_index, const char *name, return -ENODATA; page = get_node_page(sbi, fi->i_xattr_nid); + if (IS_ERR(page)) + return PTR_ERR(page); base_addr = page_address(page); list_for_each_xattr(entry, base_addr) { @@ -268,6 +317,8 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) return 0; page = get_node_page(sbi, fi->i_xattr_nid); + if (IS_ERR(page)) + return PTR_ERR(page); base_addr = page_address(page); list_for_each_xattr(entry, base_addr) { @@ -296,7 +347,7 @@ cleanup: } int f2fs_setxattr(struct inode *inode, int name_index, const char *name, - const void *value, size_t value_len) + const void *value, size_t value_len, struct page *ipage) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct f2fs_inode_info *fi = F2FS_I(inode); @@ -335,7 +386,7 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, set_new_dnode(&dn, inode, NULL, NULL, fi->i_xattr_nid); mark_inode_dirty(inode); - page = new_node_page(&dn, XATTR_NODE_OFFSET); + page = new_node_page(&dn, XATTR_NODE_OFFSET, ipage); if (IS_ERR(page)) { alloc_nid_failed(sbi, fi->i_xattr_nid); fi->i_xattr_nid = 0; @@ -435,7 +486,10 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, inode->i_ctime = CURRENT_TIME; clear_inode_flag(fi, FI_ACL_MODE); } - update_inode_page(inode); + if (ipage) + update_inode(inode, ipage); + else + update_inode_page(inode); mutex_unlock_op(sbi, ilock); return 0; diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 49c9558..3c0817b 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -112,21 +112,19 @@ extern const struct xattr_handler f2fs_xattr_trusted_handler; extern const struct xattr_handler f2fs_xattr_acl_access_handler; extern const struct xattr_handler f2fs_xattr_acl_default_handler; extern const struct xattr_handler f2fs_xattr_advise_handler; +extern const struct xattr_handler f2fs_xattr_security_handler; extern const struct xattr_handler *f2fs_xattr_handlers[]; -extern int f2fs_setxattr(struct inode *inode, int name_index, const char *name, - const void *value, size_t value_len); -extern int f2fs_getxattr(struct inode *inode, int name_index, const char *name, - void *buffer, size_t buffer_size); -extern ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, - size_t buffer_size); - +extern int f2fs_setxattr(struct inode *, int, const char *, + const void *, size_t, struct page *); +extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t); +extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t); #else #define f2fs_xattr_handlers NULL static inline int f2fs_setxattr(struct inode *inode, int name_index, - const char *name, const void *value, size_t value_len) + const char *name, const void *value, size_t value_len) { return -EOPNOTSUPP; } @@ -142,4 +140,14 @@ static inline ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, } #endif +#ifdef CONFIG_F2FS_FS_SECURITY +extern int f2fs_init_security(struct inode *, struct inode *, + const struct qstr *, struct page *); +#else +static inline int f2fs_init_security(struct inode *inode, struct inode *dir, + const struct qstr *qstr, struct page *ipage) +{ + return 0; +} +#endif #endif /* __F2FS_XATTR_H__ */ diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index df6fab8..383d5e3 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -20,8 +20,8 @@ #define F2FS_BLKSIZE 4096 /* support only 4KB block */ #define F2FS_MAX_EXTENSION 64 /* # of extension entries */ -#define NULL_ADDR 0x0U -#define NEW_ADDR -1U +#define NULL_ADDR ((block_t)0) /* used as block_t addresses */ +#define NEW_ADDR ((block_t)-1) /* used as block_t addresses */ #define F2FS_ROOT_INO(sbi) (sbi->root_ino_num) #define F2FS_NODE_INO(sbi) (sbi->node_ino_num) |