diff options
Diffstat (limited to 'fs')
67 files changed, 979 insertions, 750 deletions
@@ -793,6 +793,8 @@ void exit_aio(struct mm_struct *mm) for (i = 0; i < table->nr; ++i) { struct kioctx *ctx = table->table[i]; + struct completion requests_done = + COMPLETION_INITIALIZER_ONSTACK(requests_done); if (!ctx) continue; @@ -804,7 +806,10 @@ void exit_aio(struct mm_struct *mm) * that it needs to unmap the area, just set it to 0. */ ctx->mmap_size = 0; - kill_ioctx(mm, ctx, NULL); + kill_ioctx(mm, ctx, &requests_done); + + /* Wait until all IO for the context are done. */ + wait_for_completion(&requests_done); } RCU_INIT_POINTER(mm->ioctx_table, NULL); @@ -1111,6 +1116,12 @@ static long aio_read_events_ring(struct kioctx *ctx, tail = ring->tail; kunmap_atomic(ring); + /* + * Ensure that once we've read the current tail pointer, that + * we also see the events that were stored up to the tail. + */ + smp_rmb(); + pr_debug("h%u t%u m%u\n", head, tail, ctx->nr_events); if (head == tail) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 36861b7..ff1cc03 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1966,7 +1966,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) btrfs_init_log_ctx(&ctx); - ret = btrfs_log_dentry_safe(trans, root, dentry, &ctx); + ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx); if (ret < 0) { /* Fallthrough and commit/free transaction. */ ret = 1; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9c194bd..016c403 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -778,8 +778,12 @@ retry: ins.offset, BTRFS_ORDERED_COMPRESSED, async_extent->compress_type); - if (ret) + if (ret) { + btrfs_drop_extent_cache(inode, async_extent->start, + async_extent->start + + async_extent->ram_size - 1, 0); goto out_free_reserve; + } /* * clear dirty, set writeback and unlock the pages. @@ -971,14 +975,14 @@ static noinline int cow_file_range(struct inode *inode, ret = btrfs_add_ordered_extent(inode, start, ins.objectid, ram_size, cur_alloc_size, 0); if (ret) - goto out_reserve; + goto out_drop_extent_cache; if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) { ret = btrfs_reloc_clone_csums(inode, start, cur_alloc_size); if (ret) - goto out_reserve; + goto out_drop_extent_cache; } if (disk_num_bytes < cur_alloc_size) @@ -1006,6 +1010,8 @@ static noinline int cow_file_range(struct inode *inode, out: return ret; +out_drop_extent_cache: + btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0); out_reserve: btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1); out_unlock: @@ -4242,7 +4248,8 @@ out: btrfs_abort_transaction(trans, root, ret); } error: - if (last_size != (u64)-1) + if (last_size != (u64)-1 && + root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) btrfs_ordered_update_i_size(inode, last_size, NULL); btrfs_free_path(path); return err; @@ -5627,6 +5634,17 @@ int btrfs_set_inode_index(struct inode *dir, u64 *index) return ret; } +static int btrfs_insert_inode_locked(struct inode *inode) +{ + struct btrfs_iget_args args; + args.location = &BTRFS_I(inode)->location; + args.root = BTRFS_I(inode)->root; + + return insert_inode_locked4(inode, + btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root), + btrfs_find_actor, &args); +} + static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *dir, @@ -5719,10 +5737,19 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, sizes[1] = name_len + sizeof(*ref); } + location = &BTRFS_I(inode)->location; + location->objectid = objectid; + location->offset = 0; + btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); + + ret = btrfs_insert_inode_locked(inode); + if (ret < 0) + goto fail; + path->leave_spinning = 1; ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems); if (ret != 0) - goto fail; + goto fail_unlock; inode_init_owner(inode, dir, mode); inode_set_bytes(inode, 0); @@ -5745,11 +5772,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_free_path(path); - location = &BTRFS_I(inode)->location; - location->objectid = objectid; - location->offset = 0; - btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); - btrfs_inherit_iflags(inode, dir); if (S_ISREG(mode)) { @@ -5760,7 +5782,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, BTRFS_INODE_NODATASUM; } - btrfs_insert_inode_hash(inode); inode_tree_add(inode); trace_btrfs_inode_new(inode); @@ -5775,6 +5796,9 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, btrfs_ino(inode), root->root_key.objectid, ret); return inode; + +fail_unlock: + unlock_new_inode(inode); fail: if (dir && name) BTRFS_I(dir)->index_cnt--; @@ -5909,28 +5933,28 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, goto out_unlock; } - err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); - if (err) { - drop_inode = 1; - goto out_unlock; - } - /* * If the active LSM wants to access the inode during * d_instantiate it needs these. Smack checks to see * if the filesystem supports xattrs by looking at the * ops vector. */ - inode->i_op = &btrfs_special_inode_operations; - err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); + init_special_inode(inode, inode->i_mode, rdev); + + err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); if (err) - drop_inode = 1; - else { - init_special_inode(inode, inode->i_mode, rdev); + goto out_unlock_inode; + + err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); + if (err) { + goto out_unlock_inode; + } else { btrfs_update_inode(trans, root, inode); + unlock_new_inode(inode); d_instantiate(dentry, inode); } + out_unlock: btrfs_end_transaction(trans, root); btrfs_balance_delayed_items(root); @@ -5940,6 +5964,12 @@ out_unlock: iput(inode); } return err; + +out_unlock_inode: + drop_inode = 1; + unlock_new_inode(inode); + goto out_unlock; + } static int btrfs_create(struct inode *dir, struct dentry *dentry, @@ -5974,15 +6004,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, goto out_unlock; } drop_inode_on_err = 1; - - err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); - if (err) - goto out_unlock; - - err = btrfs_update_inode(trans, root, inode); - if (err) - goto out_unlock; - /* * If the active LSM wants to access the inode during * d_instantiate it needs these. Smack checks to see @@ -5991,14 +6012,23 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, */ inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; + inode->i_mapping->a_ops = &btrfs_aops; + inode->i_mapping->backing_dev_info = &root->fs_info->bdi; + + err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); + if (err) + goto out_unlock_inode; + + err = btrfs_update_inode(trans, root, inode); + if (err) + goto out_unlock_inode; err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); if (err) - goto out_unlock; + goto out_unlock_inode; - inode->i_mapping->a_ops = &btrfs_aops; - inode->i_mapping->backing_dev_info = &root->fs_info->bdi; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; + unlock_new_inode(inode); d_instantiate(dentry, inode); out_unlock: @@ -6010,6 +6040,11 @@ out_unlock: btrfs_balance_delayed_items(root); btrfs_btree_balance_dirty(root); return err; + +out_unlock_inode: + unlock_new_inode(inode); + goto out_unlock; + } static int btrfs_link(struct dentry *old_dentry, struct inode *dir, @@ -6117,25 +6152,30 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) } drop_on_err = 1; + /* these must be set before we unlock the inode */ + inode->i_op = &btrfs_dir_inode_operations; + inode->i_fop = &btrfs_dir_file_operations; err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); if (err) - goto out_fail; - - inode->i_op = &btrfs_dir_inode_operations; - inode->i_fop = &btrfs_dir_file_operations; + goto out_fail_inode; btrfs_i_size_write(inode, 0); err = btrfs_update_inode(trans, root, inode); if (err) - goto out_fail; + goto out_fail_inode; err = btrfs_add_link(trans, dir, inode, dentry->d_name.name, dentry->d_name.len, 0, index); if (err) - goto out_fail; + goto out_fail_inode; d_instantiate(dentry, inode); + /* + * mkdir is special. We're unlocking after we call d_instantiate + * to avoid a race with nfsd calling d_instantiate. + */ + unlock_new_inode(inode); drop_on_err = 0; out_fail: @@ -6145,6 +6185,10 @@ out_fail: btrfs_balance_delayed_items(root); btrfs_btree_balance_dirty(root); return err; + +out_fail_inode: + unlock_new_inode(inode); + goto out_fail; } /* helper for btfs_get_extent. Given an existing extent in the tree, @@ -8100,6 +8144,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, set_nlink(inode, 1); btrfs_i_size_write(inode, 0); + unlock_new_inode(inode); err = btrfs_subvol_inherit_props(trans, new_root, parent_root); if (err) @@ -8760,12 +8805,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, goto out_unlock; } - err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); - if (err) { - drop_inode = 1; - goto out_unlock; - } - /* * If the active LSM wants to access the inode during * d_instantiate it needs these. Smack checks to see @@ -8774,23 +8813,22 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, */ inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; + inode->i_mapping->a_ops = &btrfs_aops; + inode->i_mapping->backing_dev_info = &root->fs_info->bdi; + BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; + + err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); + if (err) + goto out_unlock_inode; err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); if (err) - drop_inode = 1; - else { - inode->i_mapping->a_ops = &btrfs_aops; - inode->i_mapping->backing_dev_info = &root->fs_info->bdi; - BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; - } - if (drop_inode) - goto out_unlock; + goto out_unlock_inode; path = btrfs_alloc_path(); if (!path) { err = -ENOMEM; - drop_inode = 1; - goto out_unlock; + goto out_unlock_inode; } key.objectid = btrfs_ino(inode); key.offset = 0; @@ -8799,9 +8837,8 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, err = btrfs_insert_empty_item(trans, root, path, &key, datasize); if (err) { - drop_inode = 1; btrfs_free_path(path); - goto out_unlock; + goto out_unlock_inode; } leaf = path->nodes[0]; ei = btrfs_item_ptr(leaf, path->slots[0], @@ -8825,12 +8862,15 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode_set_bytes(inode, name_len); btrfs_i_size_write(inode, name_len); err = btrfs_update_inode(trans, root, inode); - if (err) + if (err) { drop_inode = 1; + goto out_unlock_inode; + } + + unlock_new_inode(inode); + d_instantiate(dentry, inode); out_unlock: - if (!err) - d_instantiate(dentry, inode); btrfs_end_transaction(trans, root); if (drop_inode) { inode_dec_link_count(inode); @@ -8838,6 +8878,11 @@ out_unlock: } btrfs_btree_balance_dirty(root); return err; + +out_unlock_inode: + drop_inode = 1; + unlock_new_inode(inode); + goto out_unlock; } static int __btrfs_prealloc_file_range(struct inode *inode, int mode, @@ -9021,14 +9066,6 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) goto out; } - ret = btrfs_init_inode_security(trans, inode, dir, NULL); - if (ret) - goto out; - - ret = btrfs_update_inode(trans, root, inode); - if (ret) - goto out; - inode->i_fop = &btrfs_file_operations; inode->i_op = &btrfs_file_inode_operations; @@ -9036,9 +9073,16 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) inode->i_mapping->backing_dev_info = &root->fs_info->bdi; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; + ret = btrfs_init_inode_security(trans, inode, dir, NULL); + if (ret) + goto out_inode; + + ret = btrfs_update_inode(trans, root, inode); + if (ret) + goto out_inode; ret = btrfs_orphan_add(trans, inode); if (ret) - goto out; + goto out_inode; /* * We set number of links to 0 in btrfs_new_inode(), and here we set @@ -9048,6 +9092,7 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) * d_tmpfile() -> inode_dec_link_count() -> drop_nlink() */ set_nlink(inode, 1); + unlock_new_inode(inode); d_tmpfile(dentry, inode); mark_inode_dirty(inode); @@ -9057,8 +9102,12 @@ out: iput(inode); btrfs_balance_delayed_items(root); btrfs_btree_balance_dirty(root); - return ret; + +out_inode: + unlock_new_inode(inode); + goto out; + } static const struct inode_operations btrfs_dir_inode_operations = { diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index fce6fd0e..8a8e298 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1019,8 +1019,10 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em) return false; next = defrag_lookup_extent(inode, em->start + em->len); - if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE || - (em->block_start + em->block_len == next->block_start)) + if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE) + ret = false; + else if ((em->block_start + em->block_len == next->block_start) && + (em->block_len > 128 * 1024 && next->block_len > 128 * 1024)) ret = false; free_extent_map(next); @@ -1055,7 +1057,6 @@ static int should_defrag_range(struct inode *inode, u64 start, int thresh, } next_mergeable = defrag_check_next_extent(inode, em); - /* * we hit a real extent, if it is big or the next extent is not a * real extent, don't bother defragging it @@ -1702,7 +1703,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY | BTRFS_SUBVOL_QGROUP_INHERIT)) { ret = -EOPNOTSUPP; - goto out; + goto free_args; } if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC) @@ -1712,27 +1713,31 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) { if (vol_args->size > PAGE_CACHE_SIZE) { ret = -EINVAL; - goto out; + goto free_args; } inherit = memdup_user(vol_args->qgroup_inherit, vol_args->size); if (IS_ERR(inherit)) { ret = PTR_ERR(inherit); - goto out; + goto free_args; } } ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, vol_args->fd, subvol, ptr, readonly, inherit); + if (ret) + goto free_inherit; - if (ret == 0 && ptr && - copy_to_user(arg + - offsetof(struct btrfs_ioctl_vol_args_v2, - transid), ptr, sizeof(*ptr))) + if (ptr && copy_to_user(arg + + offsetof(struct btrfs_ioctl_vol_args_v2, + transid), + ptr, sizeof(*ptr))) ret = -EFAULT; -out: - kfree(vol_args); + +free_inherit: kfree(inherit); +free_args: + kfree(vol_args); return ret; } @@ -2652,7 +2657,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) vol_args = memdup_user(arg, sizeof(*vol_args)); if (IS_ERR(vol_args)) { ret = PTR_ERR(vol_args); - goto out; + goto err_drop; } vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; @@ -2670,6 +2675,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) out: kfree(vol_args); +err_drop: mnt_drop_write_file(file); return ret; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 7e0e6e3..d296efe 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -94,8 +94,10 @@ #define LOG_WALK_REPLAY_ALL 3 static int btrfs_log_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode, - int inode_only); + struct btrfs_root *root, struct inode *inode, + int inode_only, + const loff_t start, + const loff_t end); static int link_to_fixup_dir(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, u64 objectid); @@ -3858,8 +3860,10 @@ process: * This handles both files and directories. */ static int btrfs_log_inode(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct inode *inode, - int inode_only) + struct btrfs_root *root, struct inode *inode, + int inode_only, + const loff_t start, + const loff_t end) { struct btrfs_path *path; struct btrfs_path *dst_path; @@ -3876,6 +3880,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, int ins_nr; bool fast_search = false; u64 ino = btrfs_ino(inode); + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; path = btrfs_alloc_path(); if (!path) @@ -4049,13 +4054,35 @@ log_extents: goto out_unlock; } } else if (inode_only == LOG_INODE_ALL) { - struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; struct extent_map *em, *n; - write_lock(&tree->lock); - list_for_each_entry_safe(em, n, &tree->modified_extents, list) - list_del_init(&em->list); - write_unlock(&tree->lock); + write_lock(&em_tree->lock); + /* + * We can't just remove every em if we're called for a ranged + * fsync - that is, one that doesn't cover the whole possible + * file range (0 to LLONG_MAX). This is because we can have + * em's that fall outside the range we're logging and therefore + * their ordered operations haven't completed yet + * (btrfs_finish_ordered_io() not invoked yet). This means we + * didn't get their respective file extent item in the fs/subvol + * tree yet, and need to let the next fast fsync (one which + * consults the list of modified extent maps) find the em so + * that it logs a matching file extent item and waits for the + * respective ordered operation to complete (if it's still + * running). + * + * Removing every em outside the range we're logging would make + * the next fast fsync not log their matching file extent items, + * therefore making us lose data after a log replay. + */ + list_for_each_entry_safe(em, n, &em_tree->modified_extents, + list) { + const u64 mod_end = em->mod_start + em->mod_len - 1; + + if (em->mod_start >= start && mod_end <= end) + list_del_init(&em->list); + } + write_unlock(&em_tree->lock); } if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { @@ -4065,8 +4092,19 @@ log_extents: goto out_unlock; } } - BTRFS_I(inode)->logged_trans = trans->transid; - BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; + + write_lock(&em_tree->lock); + /* + * If we're doing a ranged fsync and there are still modified extents + * in the list, we must run on the next fsync call as it might cover + * those extents (a full fsync or an fsync for other range). + */ + if (list_empty(&em_tree->modified_extents)) { + BTRFS_I(inode)->logged_trans = trans->transid; + BTRFS_I(inode)->last_log_commit = + BTRFS_I(inode)->last_sub_trans; + } + write_unlock(&em_tree->lock); out_unlock: if (unlikely(err)) btrfs_put_logged_extents(&logged_list); @@ -4161,7 +4199,10 @@ out: */ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode, - struct dentry *parent, int exists_only, + struct dentry *parent, + const loff_t start, + const loff_t end, + int exists_only, struct btrfs_log_ctx *ctx) { int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; @@ -4207,7 +4248,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, if (ret) goto end_no_trans; - ret = btrfs_log_inode(trans, root, inode, inode_only); + ret = btrfs_log_inode(trans, root, inode, inode_only, start, end); if (ret) goto end_trans; @@ -4235,7 +4276,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, if (BTRFS_I(inode)->generation > root->fs_info->last_trans_committed) { - ret = btrfs_log_inode(trans, root, inode, inode_only); + ret = btrfs_log_inode(trans, root, inode, inode_only, + 0, LLONG_MAX); if (ret) goto end_trans; } @@ -4269,13 +4311,15 @@ end_no_trans: */ int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct dentry *dentry, + const loff_t start, + const loff_t end, struct btrfs_log_ctx *ctx) { struct dentry *parent = dget_parent(dentry); int ret; ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, - 0, ctx); + start, end, 0, ctx); dput(parent); return ret; @@ -4512,6 +4556,7 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans, root->fs_info->last_trans_committed)) return 0; - return btrfs_log_inode_parent(trans, root, inode, parent, 1, NULL); + return btrfs_log_inode_parent(trans, root, inode, parent, 0, + LLONG_MAX, 1, NULL); } diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 7f5b41b..e2e798a 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h @@ -59,6 +59,8 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, int btrfs_recover_log_trees(struct btrfs_root *tree_root); int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct dentry *dentry, + const loff_t start, + const loff_t end, struct btrfs_log_ctx *ctx); int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, struct btrfs_root *root, diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 603f18a..a2172f3 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -22,6 +22,11 @@ config CIFS support for OS/2 and Windows ME and similar servers is provided as well. + The module also provides optional support for the followon + protocols for CIFS including SMB3, which enables + useful performance and security features (see the description + of CONFIG_CIFS_SMB2). + The cifs module provides an advanced network file system client for mounting to CIFS compliant servers. It includes support for DFS (hierarchical name space), secure per-user @@ -121,7 +126,8 @@ config CIFS_ACL depends on CIFS_XATTR && KEYS help Allows fetching CIFS/NTFS ACL from the server. The DACL blob - is handed over to the application/caller. + is handed over to the application/caller. See the man + page for getcifsacl for more information. config CIFS_DEBUG bool "Enable CIFS debugging routines" @@ -162,7 +168,7 @@ config CIFS_NFSD_EXPORT Allows NFS server to export a CIFS mounted share (nfsd over cifs) config CIFS_SMB2 - bool "SMB2 network file system support" + bool "SMB2 and SMB3 network file system support" depends on CIFS && INET select NLS select KEYS @@ -170,16 +176,21 @@ config CIFS_SMB2 select DNS_RESOLVER help - This enables experimental support for the SMB2 (Server Message Block - version 2) protocol. The SMB2 protocol is the successor to the - popular CIFS and SMB network file sharing protocols. SMB2 is the - native file sharing mechanism for recent versions of Windows - operating systems (since Vista). SMB2 enablement will eventually - allow users better performance, security and features, than would be - possible with cifs. Note that smb2 mount options also are simpler - (compared to cifs) due to protocol improvements. - - Unless you are a developer or tester, say N. + This enables support for the Server Message Block version 2 + family of protocols, including SMB3. SMB3 support is + enabled on mount by specifying "vers=3.0" in the mount + options. These protocols are the successors to the popular + CIFS and SMB network file sharing protocols. SMB3 is the + native file sharing mechanism for the more recent + versions of Windows (Windows 8 and Windows 2012 and + later) and Samba server and many others support SMB3 well. + In general SMB3 enables better performance, security + and features, than would be possible with CIFS (Note that + when mounting to Samba, due to the CIFS POSIX extensions, + CIFS mounts can provide slightly better POSIX compatibility + than SMB3 mounts do though). Note that SMB2/SMB3 mount + options are also slightly simpler (compared to CIFS) due + to protocol improvements. config CIFS_FSCACHE bool "Provide CIFS client caching support" diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index dfc731b..25b8392 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -70,11 +70,6 @@ #define SERVER_NAME_LENGTH 40 #define SERVER_NAME_LEN_WITH_NULL (SERVER_NAME_LENGTH + 1) -/* used to define string lengths for reversing unicode strings */ -/* (256+1)*2 = 514 */ -/* (max path length + 1 for null) * 2 for unicode */ -#define MAX_NAME 514 - /* SMB echo "timeout" -- FIXME: tunable? */ #define SMB_ECHO_INTERVAL (60 * HZ) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 03ed8a0..8a9fded 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -837,7 +837,6 @@ cifs_demultiplex_thread(void *p) struct TCP_Server_Info *server = p; unsigned int pdu_length; char *buf = NULL; - struct task_struct *task_to_wake = NULL; struct mid_q_entry *mid_entry; current->flags |= PF_MEMALLOC; @@ -928,19 +927,7 @@ cifs_demultiplex_thread(void *p) if (server->smallbuf) /* no sense logging a debug message if NULL */ cifs_small_buf_release(server->smallbuf); - task_to_wake = xchg(&server->tsk, NULL); clean_demultiplex_info(server); - - /* if server->tsk was NULL then wait for a signal before exiting */ - if (!task_to_wake) { - set_current_state(TASK_INTERRUPTIBLE); - while (!signal_pending(current)) { - schedule(); - set_current_state(TASK_INTERRUPTIBLE); - } - set_current_state(TASK_RUNNING); - } - module_put_and_exit(0); } @@ -1600,6 +1587,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, tmp_end++; if (!(tmp_end < end && tmp_end[1] == delim)) { /* No it is not. Set the password to NULL */ + kfree(vol->password); vol->password = NULL; break; } @@ -1637,6 +1625,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, options = end; } + kfree(vol->password); /* Now build new password string */ temp_len = strlen(value); vol->password = kzalloc(temp_len+1, GFP_KERNEL); @@ -2061,8 +2050,6 @@ cifs_find_tcp_session(struct smb_vol *vol) static void cifs_put_tcp_session(struct TCP_Server_Info *server) { - struct task_struct *task; - spin_lock(&cifs_tcp_ses_lock); if (--server->srv_count > 0) { spin_unlock(&cifs_tcp_ses_lock); @@ -2086,10 +2073,6 @@ cifs_put_tcp_session(struct TCP_Server_Info *server) kfree(server->session_key.response); server->session_key.response = NULL; server->session_key.len = 0; - - task = xchg(&server->tsk, NULL); - if (task) - force_sig(SIGKILL, task); } static struct TCP_Server_Info * diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 3db0c5f..6cbd9c6 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -497,6 +497,14 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, goto out; } + if (file->f_flags & O_DIRECT && + CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) { + if (CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) + file->f_op = &cifs_file_direct_nobrl_ops; + else + file->f_op = &cifs_file_direct_ops; + } + file_info = cifs_new_fileinfo(&fid, file, tlink, oplock); if (file_info == NULL) { if (server->ops->close) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index d5fec92..7c018a1 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -467,6 +467,14 @@ int cifs_open(struct inode *inode, struct file *file) cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n", inode, file->f_flags, full_path); + if (file->f_flags & O_DIRECT && + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) { + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) + file->f_op = &cifs_file_direct_nobrl_ops; + else + file->f_op = &cifs_file_direct_ops; + } + if (server->oplocks) oplock = REQ_OPLOCK; else diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 949ec90..7899a40 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1720,7 +1720,10 @@ cifs_rename2(struct inode *source_dir, struct dentry *source_dentry, unlink_target: /* Try unlinking the target dentry if it's not negative */ if (target_dentry->d_inode && (rc == -EACCES || rc == -EEXIST)) { - tmprc = cifs_unlink(target_dir, target_dentry); + if (d_is_dir(target_dentry)) + tmprc = cifs_rmdir(target_dir, target_dentry); + else + tmprc = cifs_unlink(target_dir, target_dentry); if (tmprc) goto cifs_rename_exit; rc = cifs_do_rename(xid, source_dentry, from_name, diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 798c80a..b334a89 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -596,8 +596,8 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos, if (server->ops->dir_needs_close(cfile)) { cfile->invalidHandle = true; spin_unlock(&cifs_file_list_lock); - if (server->ops->close) - server->ops->close(xid, tcon, &cfile->fid); + if (server->ops->close_dir) + server->ops->close_dir(xid, tcon, &cfile->fid); } else spin_unlock(&cifs_file_list_lock); if (cfile->srch_inf.ntwrk_buf_start) { diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 39ee326..3a5e833 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -243,10 +243,11 @@ static void decode_ascii_ssetup(char **pbcc_area, __u16 bleft, kfree(ses->serverOS); ses->serverOS = kzalloc(len + 1, GFP_KERNEL); - if (ses->serverOS) + if (ses->serverOS) { strncpy(ses->serverOS, bcc_ptr, len); - if (strncmp(ses->serverOS, "OS/2", 4) == 0) - cifs_dbg(FYI, "OS/2 server\n"); + if (strncmp(ses->serverOS, "OS/2", 4) == 0) + cifs_dbg(FYI, "OS/2 server\n"); + } bcc_ptr += len + 1; bleft -= len + 1; diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index 3f17b45..4599294 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -50,7 +50,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, goto out; } - smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2, + smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2, GFP_KERNEL); if (smb2_data == NULL) { rc = -ENOMEM; diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 0150182..899bbc8 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -131,7 +131,7 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, *adjust_tz = false; *symlink = false; - smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2, + smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2, GFP_KERNEL); if (smb2_data == NULL) return -ENOMEM; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 5a48aa2..f522193 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -389,7 +389,7 @@ smb2_query_file_info(const unsigned int xid, struct cifs_tcon *tcon, int rc; struct smb2_file_all_info *smb2_data; - smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2, + smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2, GFP_KERNEL); if (smb2_data == NULL) return -ENOMEM; @@ -1035,7 +1035,7 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, if (keep_size == false) return -EOPNOTSUPP; - /* + /* * Must check if file sparse since fallocate -z (zero range) assumes * non-sparse allocation */ diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index fa0dd04..74b3a66 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -530,7 +530,7 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses, struct smb2_sess_setup_rsp *rsp = NULL; struct kvec iov[2]; int rc = 0; - int resp_buftype; + int resp_buftype = CIFS_NO_BUFFER; __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */ struct TCP_Server_Info *server = ses->server; u16 blob_length = 0; @@ -1403,8 +1403,7 @@ SMB2_close(const unsigned int xid, struct cifs_tcon *tcon, rsp = (struct smb2_close_rsp *)iov[0].iov_base; if (rc != 0) { - if (tcon) - cifs_stats_fail_inc(tcon, SMB2_CLOSE_HE); + cifs_stats_fail_inc(tcon, SMB2_CLOSE_HE); goto close_exit; } @@ -1533,7 +1532,7 @@ SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon, { return query_info(xid, tcon, persistent_fid, volatile_fid, FILE_ALL_INFORMATION, - sizeof(struct smb2_file_all_info) + MAX_NAME * 2, + sizeof(struct smb2_file_all_info) + PATH_MAX * 2, sizeof(struct smb2_file_all_info), data); } diff --git a/fs/dcache.c b/fs/dcache.c index d30ce69..7a5b514 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -106,8 +106,7 @@ static inline struct hlist_bl_head *d_hash(const struct dentry *parent, unsigned int hash) { hash += (unsigned long) parent / L1_CACHE_BYTES; - hash = hash + (hash >> d_hash_shift); - return dentry_hashtable + (hash & d_hash_mask); + return dentry_hashtable + hash_32(hash, d_hash_shift); } /* Statistics gathering. */ @@ -2656,6 +2655,12 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon) dentry->d_parent = dentry; list_del_init(&dentry->d_u.d_child); anon->d_parent = dparent; + if (likely(!d_unhashed(anon))) { + hlist_bl_lock(&anon->d_sb->s_anon); + __hlist_bl_del(&anon->d_hash); + anon->d_hash.pprev = NULL; + hlist_bl_unlock(&anon->d_sb->s_anon); + } list_move(&anon->d_u.d_child, &dparent->d_subdirs); write_seqcount_end(&dentry->d_seq); @@ -2714,7 +2719,6 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) write_seqlock(&rename_lock); __d_materialise_dentry(dentry, new); write_sequnlock(&rename_lock); - __d_drop(new); _d_rehash(new); spin_unlock(&new->d_lock); spin_unlock(&inode->i_lock); @@ -2778,7 +2782,6 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) * could splice into our tree? */ __d_materialise_dentry(dentry, alias); write_sequnlock(&rename_lock); - __d_drop(alias); goto found; } else { /* Nope, but we must(!) avoid directory diff --git a/fs/eventpoll.c b/fs/eventpoll.c index b10b48c..7bcfff9 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1852,7 +1852,8 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, goto error_tgt_fput; /* Check if EPOLLWAKEUP is allowed */ - ep_take_care_of_epollwakeup(&epds); + if (ep_op_has_event(op)) + ep_take_care_of_epollwakeup(&epds); /* * We have to check that the file structure underneath the file descriptor diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 90a3cdc..603e4eb 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -3240,6 +3240,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, &new.de, &new.inlined); if (IS_ERR(new.bh)) { retval = PTR_ERR(new.bh); + new.bh = NULL; goto end_rename; } if (new.bh) { @@ -3386,6 +3387,7 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry, &new.de, &new.inlined); if (IS_ERR(new.bh)) { retval = PTR_ERR(new.bh); + new.bh = NULL; goto end_rename; } diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index bb0e80f..1e43b90 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -575,6 +575,7 @@ handle_bb: bh = bclean(handle, sb, block); if (IS_ERR(bh)) { err = PTR_ERR(bh); + bh = NULL; goto out; } overhead = ext4_group_overhead_blocks(sb, group); @@ -603,6 +604,7 @@ handle_ib: bh = bclean(handle, sb, block); if (IS_ERR(bh)) { err = PTR_ERR(bh); + bh = NULL; goto out; } diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index 214fe10..736a348 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -23,7 +23,7 @@ config F2FS_STAT_FS mounted as f2fs. Each file shows the whole f2fs information. /sys/kernel/debug/f2fs/status includes: - - major file system information managed by f2fs currently + - major filesystem information managed by f2fs currently - average SIT information about whole segments - current memory footprint consumed by f2fs. @@ -68,6 +68,6 @@ config F2FS_CHECK_FS bool "F2FS consistency checking feature" depends on F2FS_FS help - Enables BUG_ONs which check the file system consistency in runtime. + Enables BUG_ONs which check the filesystem consistency in runtime. If you want to improve the performance, say N. diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 6aeed5b..ec3b7a5 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -160,14 +160,11 @@ static int f2fs_write_meta_page(struct page *page, goto redirty_out; if (wbc->for_reclaim) goto redirty_out; - - /* Should not write any meta pages, if any IO error was occurred */ - if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG))) - goto no_write; + if (unlikely(f2fs_cp_error(sbi))) + goto redirty_out; f2fs_wait_on_page_writeback(page, META); write_meta_page(sbi, page); -no_write: dec_page_count(sbi, F2FS_DIRTY_META); unlock_page(page); return 0; @@ -348,7 +345,7 @@ bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode) return e ? true : false; } -static void release_dirty_inode(struct f2fs_sb_info *sbi) +void release_dirty_inode(struct f2fs_sb_info *sbi) { struct ino_entry *e, *tmp; int i; @@ -446,8 +443,8 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) struct f2fs_orphan_block *orphan_blk = NULL; unsigned int nentries = 0; unsigned short index; - unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans + - (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK); + unsigned short orphan_blocks = + (unsigned short)GET_ORPHAN_BLOCKS(sbi->n_orphans); struct page *page = NULL; struct ino_entry *orphan = NULL; @@ -737,7 +734,7 @@ retry: /* * Freeze all the FS-operations for checkpoint. */ -static void block_operations(struct f2fs_sb_info *sbi) +static int block_operations(struct f2fs_sb_info *sbi) { struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, @@ -745,6 +742,7 @@ static void block_operations(struct f2fs_sb_info *sbi) .for_reclaim = 0, }; struct blk_plug plug; + int err = 0; blk_start_plug(&plug); @@ -754,11 +752,15 @@ retry_flush_dents: if (get_pages(sbi, F2FS_DIRTY_DENTS)) { f2fs_unlock_all(sbi); sync_dirty_dir_inodes(sbi); + if (unlikely(f2fs_cp_error(sbi))) { + err = -EIO; + goto out; + } goto retry_flush_dents; } /* - * POR: we should ensure that there is no dirty node pages + * POR: we should ensure that there are no dirty node pages * until finishing nat/sit flush. */ retry_flush_nodes: @@ -767,9 +769,16 @@ retry_flush_nodes: if (get_pages(sbi, F2FS_DIRTY_NODES)) { up_write(&sbi->node_write); sync_node_pages(sbi, 0, &wbc); + if (unlikely(f2fs_cp_error(sbi))) { + f2fs_unlock_all(sbi); + err = -EIO; + goto out; + } goto retry_flush_nodes; } +out: blk_finish_plug(&plug); + return err; } static void unblock_operations(struct f2fs_sb_info *sbi) @@ -813,8 +822,11 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg)); /* Flush all the NAT/SIT pages */ - while (get_pages(sbi, F2FS_DIRTY_META)) + while (get_pages(sbi, F2FS_DIRTY_META)) { sync_meta_pages(sbi, META, LONG_MAX); + if (unlikely(f2fs_cp_error(sbi))) + return; + } next_free_nid(sbi, &last_nid); @@ -825,7 +837,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi)); ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi)); ckpt->free_segment_count = cpu_to_le32(free_segments(sbi)); - for (i = 0; i < 3; i++) { + for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { ckpt->cur_node_segno[i] = cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE)); ckpt->cur_node_blkoff[i] = @@ -833,7 +845,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) ckpt->alloc_type[i + CURSEG_HOT_NODE] = curseg_alloc_type(sbi, i + CURSEG_HOT_NODE); } - for (i = 0; i < 3; i++) { + for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) { ckpt->cur_data_segno[i] = cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA)); ckpt->cur_data_blkoff[i] = @@ -848,24 +860,23 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) /* 2 cp + n data seg summary + orphan inode blocks */ data_sum_blocks = npages_for_summary_flush(sbi); - if (data_sum_blocks < 3) + if (data_sum_blocks < NR_CURSEG_DATA_TYPE) set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); else clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); - orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1) - / F2FS_ORPHANS_PER_BLOCK; + orphan_blocks = GET_ORPHAN_BLOCKS(sbi->n_orphans); ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + orphan_blocks); if (is_umount) { set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); - ckpt->cp_pack_total_block_count = cpu_to_le32(2 + + ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+ cp_payload_blks + data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE); } else { clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); - ckpt->cp_pack_total_block_count = cpu_to_le32(2 + + ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS + cp_payload_blks + data_sum_blocks + orphan_blocks); } @@ -924,6 +935,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) /* wait for previous submitted node/meta pages writeback */ wait_on_all_pages_writeback(sbi); + if (unlikely(f2fs_cp_error(sbi))) + return; + filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX); filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX); @@ -934,15 +948,17 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) /* Here, we only have one bio having CP pack */ sync_meta_pages(sbi, META_FLUSH, LONG_MAX); - if (!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG)) { - clear_prefree_segments(sbi); - release_dirty_inode(sbi); - F2FS_RESET_SB_DIRT(sbi); - } + release_dirty_inode(sbi); + + if (unlikely(f2fs_cp_error(sbi))) + return; + + clear_prefree_segments(sbi); + F2FS_RESET_SB_DIRT(sbi); } /* - * We guarantee that this checkpoint procedure should not fail. + * We guarantee that this checkpoint procedure will not fail. */ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) { @@ -952,7 +968,13 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) trace_f2fs_write_checkpoint(sbi->sb, is_umount, "start block_ops"); mutex_lock(&sbi->cp_mutex); - block_operations(sbi); + + if (!sbi->s_dirty) + goto out; + if (unlikely(f2fs_cp_error(sbi))) + goto out; + if (block_operations(sbi)) + goto out; trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops"); @@ -976,9 +998,9 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) do_checkpoint(sbi, is_umount); unblock_operations(sbi); - mutex_unlock(&sbi->cp_mutex); - stat_inc_cp_count(sbi->stat_info); +out: + mutex_unlock(&sbi->cp_mutex); trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint"); } @@ -999,8 +1021,8 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi) * for cp pack we can have max 1020*504 orphan entries */ sbi->n_orphans = 0; - sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE) - * F2FS_ORPHANS_PER_BLOCK; + sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS - + NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK; } int __init create_checkpoint_caches(void) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 03313099..76de83e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -53,7 +53,7 @@ static void f2fs_write_end_io(struct bio *bio, int err) struct page *page = bvec->bv_page; if (unlikely(err)) { - SetPageError(page); + set_page_dirty(page); set_bit(AS_EIO, &page->mapping->flags); f2fs_stop_checkpoint(sbi); } @@ -691,7 +691,7 @@ get_next: allocated = true; blkaddr = dn.data_blkaddr; } - /* Give more consecutive addresses for the read ahead */ + /* Give more consecutive addresses for the readahead */ if (blkaddr == (bh_result->b_blocknr + ofs)) { ofs++; dn.ofs_in_node++; @@ -739,7 +739,7 @@ static int f2fs_read_data_page(struct file *file, struct page *page) trace_f2fs_readpage(page, DATA); - /* If the file has inline data, try to read it directlly */ + /* If the file has inline data, try to read it directly */ if (f2fs_has_inline_data(inode)) ret = f2fs_read_inline_data(inode, page); else @@ -836,10 +836,19 @@ write: /* Dentry blocks are controlled by checkpoint */ if (S_ISDIR(inode->i_mode)) { + if (unlikely(f2fs_cp_error(sbi))) + goto redirty_out; err = do_write_data_page(page, &fio); goto done; } + /* we should bypass data pages to proceed the kworkder jobs */ + if (unlikely(f2fs_cp_error(sbi))) { + SetPageError(page); + unlock_page(page); + return 0; + } + if (!wbc->for_reclaim) need_balance_fs = true; else if (has_not_enough_free_secs(sbi, 0)) @@ -927,7 +936,7 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to) if (to > inode->i_size) { truncate_pagecache(inode, inode->i_size); - truncate_blocks(inode, inode->i_size); + truncate_blocks(inode, inode->i_size, true); } } @@ -946,7 +955,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, f2fs_balance_fs(sbi); repeat: - err = f2fs_convert_inline_data(inode, pos + len); + err = f2fs_convert_inline_data(inode, pos + len, NULL); if (err) goto fail; diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index a441ba3..fecebdb 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -32,7 +32,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) struct f2fs_stat_info *si = F2FS_STAT(sbi); int i; - /* valid check of the segment numbers */ + /* validation check of the segment numbers */ si->hit_ext = sbi->read_hit_ext; si->total_ext = sbi->total_hit_ext; si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES); @@ -152,7 +152,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->base_mem += NR_DIRTY_TYPE * f2fs_bitmap_size(TOTAL_SEGS(sbi)); si->base_mem += f2fs_bitmap_size(TOTAL_SECS(sbi)); - /* buld nm */ + /* build nm */ si->base_mem += sizeof(struct f2fs_nm_info); si->base_mem += __bitmap_size(sbi, NAT_BITMAP); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index bcf893c..155fb05 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -124,7 +124,7 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, /* * For the most part, it should be a bug when name_len is zero. - * We stop here for figuring out where the bugs are occurred. + * We stop here for figuring out where the bugs has occurred. */ f2fs_bug_on(!de->name_len); @@ -391,7 +391,7 @@ put_error: error: /* once the failed inode becomes a bad inode, i_mode is S_IFREG */ truncate_inode_pages(&inode->i_data, 0); - truncate_blocks(inode, 0); + truncate_blocks(inode, 0, false); remove_dirty_dir_inode(inode); remove_inode_page(inode); return ERR_PTR(err); @@ -563,7 +563,7 @@ fail: } /* - * It only removes the dentry from the dentry page,corresponding name + * It only removes the dentry from the dentry page, corresponding name * entry in name page does not need to be touched during deletion. */ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4dab533..e921242 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -24,7 +24,7 @@ #define f2fs_bug_on(condition) BUG_ON(condition) #define f2fs_down_write(x, y) down_write_nest_lock(x, y) #else -#define f2fs_bug_on(condition) +#define f2fs_bug_on(condition) WARN_ON(condition) #define f2fs_down_write(x, y) down_write(x) #endif @@ -395,7 +395,7 @@ enum count_type { }; /* - * The below are the page types of bios used in submti_bio(). + * The below are the page types of bios used in submit_bio(). * The available types are: * DATA User data pages. It operates as async mode. * NODE Node pages. It operates as async mode. @@ -470,7 +470,7 @@ struct f2fs_sb_info { struct list_head dir_inode_list; /* dir inode list */ spinlock_t dir_inode_lock; /* for dir inode list lock */ - /* basic file system units */ + /* basic filesystem units */ unsigned int log_sectors_per_block; /* log2 sectors per block */ unsigned int log_blocksize; /* log2 block size */ unsigned int blocksize; /* block size */ @@ -799,7 +799,7 @@ static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi) /* * odd numbered checkpoint should at cp segment 0 - * and even segent must be at cp segment 1 + * and even segment must be at cp segment 1 */ if (!(ckpt_version & 1)) start_addr += sbi->blocks_per_seg; @@ -1096,6 +1096,11 @@ static inline int f2fs_readonly(struct super_block *sb) return sb->s_flags & MS_RDONLY; } +static inline bool f2fs_cp_error(struct f2fs_sb_info *sbi) +{ + return is_set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); +} + static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi) { set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); @@ -1117,7 +1122,7 @@ static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi) */ int f2fs_sync_file(struct file *, loff_t, loff_t, int); void truncate_data_blocks(struct dnode_of_data *); -int truncate_blocks(struct inode *, u64); +int truncate_blocks(struct inode *, u64, bool); void f2fs_truncate(struct inode *); int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *); int f2fs_setattr(struct dentry *, struct iattr *); @@ -1202,10 +1207,8 @@ int sync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *); bool alloc_nid(struct f2fs_sb_info *, nid_t *); void alloc_nid_done(struct f2fs_sb_info *, nid_t); void alloc_nid_failed(struct f2fs_sb_info *, nid_t); -void recover_node_page(struct f2fs_sb_info *, struct page *, - struct f2fs_summary *, struct node_info *, block_t); void recover_inline_xattr(struct inode *, struct page *); -bool recover_xattr_data(struct inode *, struct page *, block_t); +void recover_xattr_data(struct inode *, struct page *, block_t); int recover_inode_page(struct f2fs_sb_info *, struct page *); int restore_node_summary(struct f2fs_sb_info *, unsigned int, struct f2fs_summary_block *); @@ -1238,8 +1241,6 @@ void write_data_page(struct page *, struct dnode_of_data *, block_t *, void rewrite_data_page(struct page *, block_t, struct f2fs_io_info *); void recover_data_page(struct f2fs_sb_info *, struct page *, struct f2fs_summary *, block_t, block_t); -void rewrite_node_page(struct f2fs_sb_info *, struct page *, - struct f2fs_summary *, block_t, block_t); void allocate_data_block(struct f2fs_sb_info *, struct page *, block_t, block_t *, struct f2fs_summary *, int); void f2fs_wait_on_page_writeback(struct page *, enum page_type); @@ -1262,6 +1263,7 @@ int ra_meta_pages(struct f2fs_sb_info *, int, int, int); long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type); void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type); +void release_dirty_inode(struct f2fs_sb_info *); bool exist_written_data(struct f2fs_sb_info *, nid_t, int); int acquire_orphan_inode(struct f2fs_sb_info *); void release_orphan_inode(struct f2fs_sb_info *); @@ -1439,8 +1441,8 @@ extern const struct inode_operations f2fs_special_inode_operations; */ bool f2fs_may_inline(struct inode *); int f2fs_read_inline_data(struct inode *, struct page *); -int f2fs_convert_inline_data(struct inode *, pgoff_t); +int f2fs_convert_inline_data(struct inode *, pgoff_t, struct page *); int f2fs_write_inline_data(struct inode *, struct page *, unsigned int); void truncate_inline_data(struct inode *, u64); -int recover_inline_data(struct inode *, struct page *); +bool recover_inline_data(struct inode *, struct page *); #endif diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 208f1a9..060aee6 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -41,6 +41,11 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, sb_start_pagefault(inode->i_sb); + /* force to convert with normal data indices */ + err = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, page); + if (err) + goto out; + /* block allocation */ f2fs_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); @@ -110,6 +115,25 @@ static int get_parent_ino(struct inode *inode, nid_t *pino) return 1; } +static inline bool need_do_checkpoint(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + bool need_cp = false; + + if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) + need_cp = true; + else if (file_wrong_pino(inode)) + need_cp = true; + else if (!space_for_roll_forward(sbi)) + need_cp = true; + else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) + need_cp = true; + else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi))) + need_cp = true; + + return need_cp; +} + int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; @@ -154,23 +178,12 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) /* guarantee free sections for fsync */ f2fs_balance_fs(sbi); - down_read(&fi->i_sem); - /* * Both of fdatasync() and fsync() are able to be recovered from * sudden-power-off. */ - if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) - need_cp = true; - else if (file_wrong_pino(inode)) - need_cp = true; - else if (!space_for_roll_forward(sbi)) - need_cp = true; - else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino)) - need_cp = true; - else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi))) - need_cp = true; - + down_read(&fi->i_sem); + need_cp = need_do_checkpoint(inode); up_read(&fi->i_sem); if (need_cp) { @@ -288,7 +301,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) if (err && err != -ENOENT) { goto fail; } else if (err == -ENOENT) { - /* direct node is not exist */ + /* direct node does not exists */ if (whence == SEEK_DATA) { pgofs = PGOFS_OF_NEXT_DNODE(pgofs, F2FS_I(inode)); @@ -417,7 +430,7 @@ out: f2fs_put_page(page, 1); } -int truncate_blocks(struct inode *inode, u64 from) +int truncate_blocks(struct inode *inode, u64 from, bool lock) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); unsigned int blocksize = inode->i_sb->s_blocksize; @@ -433,14 +446,16 @@ int truncate_blocks(struct inode *inode, u64 from) free_from = (pgoff_t) ((from + blocksize - 1) >> (sbi->log_blocksize)); - f2fs_lock_op(sbi); + if (lock) + f2fs_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); if (err) { if (err == -ENOENT) goto free_next; - f2fs_unlock_op(sbi); + if (lock) + f2fs_unlock_op(sbi); trace_f2fs_truncate_blocks_exit(inode, err); return err; } @@ -458,7 +473,8 @@ int truncate_blocks(struct inode *inode, u64 from) f2fs_put_dnode(&dn); free_next: err = truncate_inode_blocks(inode, free_from); - f2fs_unlock_op(sbi); + if (lock) + f2fs_unlock_op(sbi); done: /* lastly zero out the first data page */ truncate_partial_data_page(inode, from); @@ -475,7 +491,7 @@ void f2fs_truncate(struct inode *inode) trace_f2fs_truncate(inode); - if (!truncate_blocks(inode, i_size_read(inode))) { + if (!truncate_blocks(inode, i_size_read(inode), true)) { inode->i_mtime = inode->i_ctime = CURRENT_TIME; mark_inode_dirty(inode); } @@ -533,7 +549,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size != i_size_read(inode)) { - err = f2fs_convert_inline_data(inode, attr->ia_size); + err = f2fs_convert_inline_data(inode, attr->ia_size, NULL); if (err) return err; @@ -622,7 +638,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) loff_t off_start, off_end; int ret = 0; - ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1); + ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL); if (ret) return ret; @@ -678,7 +694,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset, if (ret) return ret; - ret = f2fs_convert_inline_data(inode, offset + len); + ret = f2fs_convert_inline_data(inode, offset + len, NULL); if (ret) return ret; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index d7947d9..943a31d 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -58,7 +58,7 @@ static int gc_thread_func(void *data) * 3. IO subsystem is idle by checking the # of requests in * bdev's request list. * - * Note) We have to avoid triggering GCs too much frequently. + * Note) We have to avoid triggering GCs frequently. * Because it is possible that some segments can be * invalidated soon after by user update or deletion. * So, I'd like to wait some time to collect dirty segments. @@ -222,7 +222,7 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) u = (vblocks * 100) >> sbi->log_blocks_per_seg; - /* Handle if the system time is changed by user */ + /* Handle if the system time has changed by the user */ if (mtime < sit_i->min_mtime) sit_i->min_mtime = mtime; if (mtime > sit_i->max_mtime) @@ -593,7 +593,7 @@ next_step: if (phase == 2) { inode = f2fs_iget(sb, dni.ino); - if (IS_ERR(inode)) + if (IS_ERR(inode) || is_bad_inode(inode)) continue; start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)); @@ -693,7 +693,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi) gc_more: if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) goto stop; - if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG))) + if (unlikely(f2fs_cp_error(sbi))) goto stop; if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 5d5eb60..16f0b2b 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -91,7 +91,7 @@ static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi) block_t invalid_user_blocks = sbi->user_block_count - written_block_count(sbi); /* - * Background GC is triggered with the following condition. + * Background GC is triggered with the following conditions. * 1. There are a number of invalid blocks. * 2. There is not enough free space. */ diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index 948d17bf..a844fcf 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -42,7 +42,8 @@ static void TEA_transform(unsigned int buf[4], unsigned int const in[]) buf[1] += b1; } -static void str2hashbuf(const char *msg, size_t len, unsigned int *buf, int num) +static void str2hashbuf(const unsigned char *msg, size_t len, + unsigned int *buf, int num) { unsigned pad, val; int i; @@ -73,9 +74,9 @@ f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info) { __u32 hash; f2fs_hash_t f2fs_hash; - const char *p; + const unsigned char *p; __u32 in[8], buf[4]; - const char *name = name_info->name; + const unsigned char *name = name_info->name; size_t len = name_info->len; if ((len <= 2) && (name[0] == '.') && diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 5beecce..3e8ecdf 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -68,7 +68,7 @@ out: static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) { - int err; + int err = 0; struct page *ipage; struct dnode_of_data dn; void *src_addr, *dst_addr; @@ -86,6 +86,10 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) goto out; } + /* someone else converted inline_data already */ + if (!f2fs_has_inline_data(inode)) + goto out; + /* * i_addr[0] is not used for inline data, * so reserving new block will not destroy inline data @@ -124,9 +128,10 @@ out: return err; } -int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size) +int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size, + struct page *page) { - struct page *page; + struct page *new_page = page; int err; if (!f2fs_has_inline_data(inode)) @@ -134,17 +139,20 @@ int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size) else if (to_size <= MAX_INLINE_DATA) return 0; - page = grab_cache_page(inode->i_mapping, 0); - if (!page) - return -ENOMEM; + if (!page || page->index != 0) { + new_page = grab_cache_page(inode->i_mapping, 0); + if (!new_page) + return -ENOMEM; + } - err = __f2fs_convert_inline_data(inode, page); - f2fs_put_page(page, 1); + err = __f2fs_convert_inline_data(inode, new_page); + if (!page || page->index != 0) + f2fs_put_page(new_page, 1); return err; } int f2fs_write_inline_data(struct inode *inode, - struct page *page, unsigned size) + struct page *page, unsigned size) { void *src_addr, *dst_addr; struct page *ipage; @@ -199,7 +207,7 @@ void truncate_inline_data(struct inode *inode, u64 from) f2fs_put_page(ipage, 1); } -int recover_inline_data(struct inode *inode, struct page *npage) +bool recover_inline_data(struct inode *inode, struct page *npage) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct f2fs_inode *ri = NULL; @@ -218,7 +226,7 @@ int recover_inline_data(struct inode *inode, struct page *npage) ri = F2FS_INODE(npage); if (f2fs_has_inline_data(inode) && - ri && ri->i_inline & F2FS_INLINE_DATA) { + ri && (ri->i_inline & F2FS_INLINE_DATA)) { process_inline: ipage = get_node_page(sbi, inode->i_ino); f2fs_bug_on(IS_ERR(ipage)); @@ -230,7 +238,7 @@ process_inline: memcpy(dst_addr, src_addr, MAX_INLINE_DATA); update_inode(inode, ipage); f2fs_put_page(ipage, 1); - return -1; + return true; } if (f2fs_has_inline_data(inode)) { @@ -242,10 +250,10 @@ process_inline: clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); update_inode(inode, ipage); f2fs_put_page(ipage, 1); - } else if (ri && ri->i_inline & F2FS_INLINE_DATA) { - truncate_blocks(inode, 0); + } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) { + truncate_blocks(inode, 0, false); set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); goto process_inline; } - return 0; + return false; } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 27b0377..ee103fd 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -134,9 +134,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, return 0; out: clear_nlink(inode); - unlock_new_inode(inode); - make_bad_inode(inode); - iput(inode); + iget_failed(inode); alloc_nid_failed(sbi, ino); return err; } @@ -229,7 +227,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) f2fs_delete_entry(de, page, inode); f2fs_unlock_op(sbi); - /* In order to evict this inode, we set it dirty */ + /* In order to evict this inode, we set it dirty */ mark_inode_dirty(inode); fail: trace_f2fs_unlink_exit(inode, err); @@ -267,9 +265,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, return err; out: clear_nlink(inode); - unlock_new_inode(inode); - make_bad_inode(inode); - iput(inode); + iget_failed(inode); alloc_nid_failed(sbi, inode->i_ino); return err; } @@ -308,9 +304,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) out_fail: clear_inode_flag(F2FS_I(inode), FI_INC_LINK); clear_nlink(inode); - unlock_new_inode(inode); - make_bad_inode(inode); - iput(inode); + iget_failed(inode); alloc_nid_failed(sbi, inode->i_ino); return err; } @@ -354,9 +348,7 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, return 0; out: clear_nlink(inode); - unlock_new_inode(inode); - make_bad_inode(inode); - iput(inode); + iget_failed(inode); alloc_nid_failed(sbi, inode->i_ino); return err; } @@ -688,9 +680,7 @@ release_out: out: f2fs_unlock_op(sbi); clear_nlink(inode); - unlock_new_inode(inode); - make_bad_inode(inode); - iput(inode); + iget_failed(inode); alloc_nid_failed(sbi, inode->i_ino); return err; } @@ -704,7 +694,6 @@ const struct inode_operations f2fs_dir_inode_operations = { .mkdir = f2fs_mkdir, .rmdir = f2fs_rmdir, .mknod = f2fs_mknod, - .rename = f2fs_rename, .rename2 = f2fs_rename2, .tmpfile = f2fs_tmpfile, .getattr = f2fs_getattr, diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index d3d90d2..4537819 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -237,7 +237,7 @@ retry: nat_get_blkaddr(e) != NULL_ADDR && new_blkaddr == NEW_ADDR); - /* increament version no as node is removed */ + /* increment version no as node is removed */ if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) { unsigned char version = nat_get_version(e); nat_set_version(e, inc_node_version(version)); @@ -274,7 +274,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) } /* - * This function returns always success + * This function always returns success */ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) { @@ -650,7 +650,7 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, /* get indirect nodes in the path */ for (i = 0; i < idx + 1; i++) { - /* refernece count'll be increased */ + /* reference count'll be increased */ pages[i] = get_node_page(sbi, nid[i]); if (IS_ERR(pages[i])) { err = PTR_ERR(pages[i]); @@ -823,22 +823,26 @@ int truncate_xattr_node(struct inode *inode, struct page *page) */ void remove_inode_page(struct inode *inode) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct page *page; - nid_t ino = inode->i_ino; struct dnode_of_data dn; - page = get_node_page(sbi, ino); - if (IS_ERR(page)) + set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); + if (get_dnode_of_data(&dn, 0, LOOKUP_NODE)) return; - if (truncate_xattr_node(inode, page)) { - f2fs_put_page(page, 1); + if (truncate_xattr_node(inode, dn.inode_page)) { + f2fs_put_dnode(&dn); return; } - /* 0 is possible, after f2fs_new_inode() is failed */ + + /* remove potential inline_data blocks */ + if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode)) + truncate_data_blocks_range(&dn, 1); + + /* 0 is possible, after f2fs_new_inode() has failed */ f2fs_bug_on(inode->i_blocks != 0 && inode->i_blocks != 1); - set_new_dnode(&dn, inode, page, page, ino); + + /* will put inode & node pages */ truncate_node(&dn); } @@ -1129,8 +1133,11 @@ continue_unlock: set_fsync_mark(page, 0); set_dentry_mark(page, 0); } - NODE_MAPPING(sbi)->a_ops->writepage(page, wbc); - wrote++; + + if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc)) + unlock_page(page); + else + wrote++; if (--wbc->nr_to_write == 0) break; @@ -1212,6 +1219,8 @@ static int f2fs_write_node_page(struct page *page, if (unlikely(sbi->por_doing)) goto redirty_out; + if (unlikely(f2fs_cp_error(sbi))) + goto redirty_out; f2fs_wait_on_page_writeback(page, NODE); @@ -1540,15 +1549,6 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) kmem_cache_free(free_nid_slab, i); } -void recover_node_page(struct f2fs_sb_info *sbi, struct page *page, - struct f2fs_summary *sum, struct node_info *ni, - block_t new_blkaddr) -{ - rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr); - set_node_addr(sbi, ni, new_blkaddr, false); - clear_node_page_dirty(page); -} - void recover_inline_xattr(struct inode *inode, struct page *page) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); @@ -1557,40 +1557,33 @@ void recover_inline_xattr(struct inode *inode, struct page *page) struct page *ipage; struct f2fs_inode *ri; - if (!f2fs_has_inline_xattr(inode)) - return; - - if (!IS_INODE(page)) - return; - - ri = F2FS_INODE(page); - if (!(ri->i_inline & F2FS_INLINE_XATTR)) - return; - ipage = get_node_page(sbi, inode->i_ino); f2fs_bug_on(IS_ERR(ipage)); + ri = F2FS_INODE(page); + if (!(ri->i_inline & F2FS_INLINE_XATTR)) { + clear_inode_flag(F2FS_I(inode), FI_INLINE_XATTR); + goto update_inode; + } + dst_addr = inline_xattr_addr(ipage); src_addr = inline_xattr_addr(page); inline_size = inline_xattr_size(inode); f2fs_wait_on_page_writeback(ipage, NODE); memcpy(dst_addr, src_addr, inline_size); - +update_inode: update_inode(inode, ipage); f2fs_put_page(ipage, 1); } -bool recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) +void recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid; nid_t new_xnid = nid_of_node(page); struct node_info ni; - if (!f2fs_has_xattr_block(ofs_of_node(page))) - return false; - /* 1: invalidate the previous xattr nid */ if (!prev_xnid) goto recover_xnid; @@ -1618,7 +1611,6 @@ recover_xnid: set_node_addr(sbi, &ni, blkaddr, false); update_inode_page(inode); - return true; } int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) @@ -1637,7 +1629,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) if (!ipage) return -ENOMEM; - /* Should not use this inode from free nid list */ + /* Should not use this inode from free nid list */ remove_free_nid(NM_I(sbi), ino); SetPageUptodate(ipage); @@ -1651,6 +1643,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) dst->i_blocks = cpu_to_le64(1); dst->i_links = cpu_to_le32(1); dst->i_xattr_nid = 0; + dst->i_inline = src->i_inline & F2FS_INLINE_XATTR; new_ni = old_ni; new_ni.ino = ino; @@ -1659,13 +1652,14 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) WARN_ON(1); set_node_addr(sbi, &new_ni, NEW_ADDR, false); inc_valid_inode_count(sbi); + set_page_dirty(ipage); f2fs_put_page(ipage, 1); return 0; } /* * ra_sum_pages() merge contiguous pages into one bio and submit. - * these pre-readed pages are alloced in bd_inode's mapping tree. + * these pre-read pages are allocated in bd_inode's mapping tree. */ static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages, int start, int nrpages) @@ -1709,7 +1703,7 @@ int restore_node_summary(struct f2fs_sb_info *sbi, for (i = 0; !err && i < last_offset; i += nrpages, addr += nrpages) { nrpages = min(last_offset - i, bio_blocks); - /* read ahead node pages */ + /* readahead node pages */ nrpages = ra_sum_pages(sbi, pages, addr, nrpages); if (!nrpages) return -ENOMEM; @@ -1967,7 +1961,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi) nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks; /* not used nids: 0, node, meta, (and root counted as valid node) */ - nm_i->available_nids = nm_i->max_nid - 3; + nm_i->available_nids = nm_i->max_nid - F2FS_RESERVED_NODE_NUM; nm_i->fcnt = 0; nm_i->nat_cnt = 0; nm_i->ram_thresh = DEF_RAM_THRESHOLD; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index fe1c6d9..756c41c 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -62,8 +62,10 @@ static int recover_dentry(struct page *ipage, struct inode *inode) } retry: de = f2fs_find_entry(dir, &name, &page); - if (de && inode->i_ino == le32_to_cpu(de->ino)) + if (de && inode->i_ino == le32_to_cpu(de->ino)) { + clear_inode_flag(F2FS_I(inode), FI_INC_LINK); goto out_unmap_put; + } if (de) { einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); if (IS_ERR(einode)) { @@ -300,14 +302,19 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, struct node_info ni; int err = 0, recovered = 0; - recover_inline_xattr(inode, page); - - if (recover_inline_data(inode, page)) + /* step 1: recover xattr */ + if (IS_INODE(page)) { + recover_inline_xattr(inode, page); + } else if (f2fs_has_xattr_block(ofs_of_node(page))) { + recover_xattr_data(inode, page, blkaddr); goto out; + } - if (recover_xattr_data(inode, page, blkaddr)) + /* step 2: recover inline data */ + if (recover_inline_data(inode, page)) goto out; + /* step 3: recover data indices */ start = start_bidx_of_node(ofs_of_node(page), fi); end = start + ADDRS_PER_PAGE(page, fi); @@ -364,8 +371,6 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, fill_node_footer(dn.node_page, dn.nid, ni.ino, ofs_of_node(page), false); set_page_dirty(dn.node_page); - - recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr); err: f2fs_put_dnode(&dn); f2fs_unlock_op(sbi); @@ -452,6 +457,9 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) /* step #1: find fsynced inode numbers */ sbi->por_doing = true; + /* prevent checkpoint */ + mutex_lock(&sbi->cp_mutex); + blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); err = find_fsync_dnodes(sbi, &inode_list); @@ -465,7 +473,8 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) /* step #2: recover data */ err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); - f2fs_bug_on(!list_empty(&inode_list)); + if (!err) + f2fs_bug_on(!list_empty(&inode_list)); out: destroy_fsync_dnodes(&inode_list); kmem_cache_destroy(fsync_entry_slab); @@ -482,8 +491,13 @@ out: /* Flush all the NAT/SIT pages */ while (get_pages(sbi, F2FS_DIRTY_META)) sync_meta_pages(sbi, META, LONG_MAX); + set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); + mutex_unlock(&sbi->cp_mutex); } else if (need_writecp) { + mutex_unlock(&sbi->cp_mutex); write_checkpoint(sbi, false); + } else { + mutex_unlock(&sbi->cp_mutex); } return err; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0dfeeba..0aa337c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -62,7 +62,7 @@ static inline unsigned long __reverse_ffs(unsigned long word) } /* - * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c becasue + * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because * f2fs_set_bit makes MSB and LSB reversed in a byte. * Example: * LSB <--> MSB @@ -808,7 +808,7 @@ static void __refresh_next_blkoff(struct f2fs_sb_info *sbi, } /* - * This function always allocates a used segment (from dirty seglist) by SSR + * This function always allocates a used segment(from dirty seglist) by SSR * manner, so it should recover the existing segment information of valid blocks */ static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse) @@ -1103,55 +1103,6 @@ void recover_data_page(struct f2fs_sb_info *sbi, mutex_unlock(&curseg->curseg_mutex); } -void rewrite_node_page(struct f2fs_sb_info *sbi, - struct page *page, struct f2fs_summary *sum, - block_t old_blkaddr, block_t new_blkaddr) -{ - struct sit_info *sit_i = SIT_I(sbi); - int type = CURSEG_WARM_NODE; - struct curseg_info *curseg; - unsigned int segno, old_cursegno; - block_t next_blkaddr = next_blkaddr_of_node(page); - unsigned int next_segno = GET_SEGNO(sbi, next_blkaddr); - struct f2fs_io_info fio = { - .type = NODE, - .rw = WRITE_SYNC, - }; - - curseg = CURSEG_I(sbi, type); - - mutex_lock(&curseg->curseg_mutex); - mutex_lock(&sit_i->sentry_lock); - - segno = GET_SEGNO(sbi, new_blkaddr); - old_cursegno = curseg->segno; - - /* change the current segment */ - if (segno != curseg->segno) { - curseg->next_segno = segno; - change_curseg(sbi, type, true); - } - curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr); - __add_sum_entry(sbi, type, sum); - - /* change the current log to the next block addr in advance */ - if (next_segno != segno) { - curseg->next_segno = next_segno; - change_curseg(sbi, type, true); - } - curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, next_blkaddr); - - /* rewrite node page */ - set_page_writeback(page); - f2fs_submit_page_mbio(sbi, page, new_blkaddr, &fio); - f2fs_submit_merged_bio(sbi, NODE, WRITE); - refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); - locate_dirty_segment(sbi, old_cursegno); - - mutex_unlock(&sit_i->sentry_lock); - mutex_unlock(&curseg->curseg_mutex); -} - static inline bool is_merged_page(struct f2fs_sb_info *sbi, struct page *page, enum page_type type) { diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 55973f7..ff48325 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -549,7 +549,7 @@ static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) } /* - * Summary block is always treated as invalid block + * Summary block is always treated as an invalid block */ static inline void check_block_count(struct f2fs_sb_info *sbi, int segno, struct f2fs_sit_entry *raw_sit) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 657582f..41bdf51 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -432,9 +432,15 @@ static void f2fs_put_super(struct super_block *sb) stop_gc_thread(sbi); /* We don't need to do checkpoint when it's clean */ - if (sbi->s_dirty && get_pages(sbi, F2FS_DIRTY_NODES)) + if (sbi->s_dirty) write_checkpoint(sbi, true); + /* + * normally superblock is clean, so we need to release this. + * In addition, EIO will skip do checkpoint, we need this as well. + */ + release_dirty_inode(sbi); + iput(sbi->node_inode); iput(sbi->meta_inode); @@ -457,9 +463,6 @@ int f2fs_sync_fs(struct super_block *sb, int sync) trace_f2fs_sync_fs(sb, sync); - if (!sbi->s_dirty && !get_pages(sbi, F2FS_DIRTY_NODES)) - return 0; - if (sync) { mutex_lock(&sbi->gc_mutex); write_checkpoint(sbi, false); @@ -505,8 +508,8 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_bfree = buf->f_blocks - valid_user_blocks(sbi) - ovp_count; buf->f_bavail = user_block_count - valid_user_blocks(sbi); - buf->f_files = sbi->total_node_count; - buf->f_ffree = sbi->total_node_count - valid_inode_count(sbi); + buf->f_files = sbi->total_node_count - F2FS_RESERVED_NODE_NUM; + buf->f_ffree = buf->f_files - valid_inode_count(sbi); buf->f_namelen = F2FS_NAME_LEN; buf->f_fsid.val[0] = (u32)id; @@ -663,7 +666,7 @@ restore_gc: if (need_restart_gc) { if (start_gc_thread(sbi)) f2fs_msg(sbi->sb, KERN_WARNING, - "background gc thread is stop"); + "background gc thread has stopped"); } else if (need_stop_gc) { stop_gc_thread(sbi); } @@ -812,7 +815,7 @@ static int sanity_check_ckpt(struct f2fs_sb_info *sbi) if (unlikely(fsmeta >= total)) return 1; - if (unlikely(is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) { + if (unlikely(f2fs_cp_error(sbi))) { f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); return 1; } @@ -899,8 +902,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) struct buffer_head *raw_super_buf; struct inode *root; long err = -EINVAL; + bool retry = true; int i; +try_onemore: /* allocate memory for f2fs-specific super block info */ sbi = kzalloc(sizeof(struct f2fs_sb_info), GFP_KERNEL); if (!sbi) @@ -1080,9 +1085,11 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) /* recover fsynced data */ if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { err = recover_fsync_data(sbi); - if (err) + if (err) { f2fs_msg(sb, KERN_ERR, "Cannot recover all fsync data errno=%ld", err); + goto free_kobj; + } } /* @@ -1123,6 +1130,13 @@ free_sb_buf: brelse(raw_super_buf); free_sbi: kfree(sbi); + + /* give only one another chance */ + if (retry) { + retry = 0; + shrink_dcache_sb(sb); + goto try_onemore; + } return err; } diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 8bea941..728a5dc 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -528,7 +528,7 @@ static int __f2fs_setxattr(struct inode *inode, int index, int free; /* * If value is NULL, it is remove operation. - * In case of update operation, we caculate free. + * In case of update operation, we calculate free. */ free = MIN_OFFSET(inode) - ((char *)last - (char *)base_addr); if (found) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index e6ee5b6..f0b945a 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -359,7 +359,7 @@ static inline void release_metapath(struct metapath *mp) * Returns: The length of the extent (minimum of one block) */ -static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __be64 *ptr, unsigned limit, int *eob) +static inline unsigned int gfs2_extent_length(void *start, unsigned int len, __be64 *ptr, size_t limit, int *eob) { const __be64 *end = (start + len); const __be64 *first = ptr; @@ -449,7 +449,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, struct buffer_head *bh_map, struct metapath *mp, const unsigned int sheight, const unsigned int height, - const unsigned int maxlen) + const size_t maxlen) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); @@ -483,7 +483,8 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, } else { /* Need to allocate indirect blocks */ ptrs_per_blk = height > 1 ? sdp->sd_inptrs : sdp->sd_diptrs; - dblks = min(maxlen, ptrs_per_blk - mp->mp_list[end_of_metadata]); + dblks = min(maxlen, (size_t)(ptrs_per_blk - + mp->mp_list[end_of_metadata])); if (height == ip->i_height) { /* Writing into existing tree, extend tree down */ iblks = height - sheight; @@ -605,7 +606,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock, struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); unsigned int bsize = sdp->sd_sb.sb_bsize; - const unsigned int maxlen = bh_map->b_size >> inode->i_blkbits; + const size_t maxlen = bh_map->b_size >> inode->i_blkbits; const u64 *arr = sdp->sd_heightsize; __be64 *ptr; u64 size; diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 26b3f95..7f4ed3d 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -26,6 +26,7 @@ #include <linux/dlm.h> #include <linux/dlm_plock.h> #include <linux/aio.h> +#include <linux/delay.h> #include "gfs2.h" #include "incore.h" @@ -979,9 +980,10 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl) unsigned int state; int flags; int error = 0; + int sleeptime; state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; - flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT; + flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY_1CB) | GL_EXACT; mutex_lock(&fp->f_fl_mutex); @@ -1001,7 +1003,14 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl) gfs2_holder_init(gl, state, flags, fl_gh); gfs2_glock_put(gl); } - error = gfs2_glock_nq(fl_gh); + for (sleeptime = 1; sleeptime <= 4; sleeptime <<= 1) { + error = gfs2_glock_nq(fl_gh); + if (error != GLR_TRYFAILED) + break; + fl_gh->gh_flags = LM_FLAG_TRY | GL_EXACT; + fl_gh->gh_error = 0; + msleep(sleeptime); + } if (error) { gfs2_holder_uninit(fl_gh); if (error == GLR_TRYFAILED) @@ -1024,7 +1033,7 @@ static void do_unflock(struct file *file, struct file_lock *fl) mutex_lock(&fp->f_fl_mutex); flock_lock_file_wait(file, fl); if (fl_gh->gh_gl) { - gfs2_glock_dq_wait(fl_gh); + gfs2_glock_dq(fl_gh); gfs2_holder_uninit(fl_gh); } mutex_unlock(&fp->f_fl_mutex); diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 67d310c..39e7e99 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -262,6 +262,9 @@ struct gfs2_holder { unsigned long gh_ip; }; +/* Number of quota types we support */ +#define GFS2_MAXQUOTAS 2 + /* Resource group multi-block reservation, in order of appearance: Step 1. Function prepares to write, allocates a mb, sets the size hint. @@ -282,8 +285,8 @@ struct gfs2_blkreserv { u64 rs_inum; /* Inode number for reservation */ /* ancillary quota stuff */ - struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS]; - struct gfs2_holder rs_qa_qd_ghs[2 * MAXQUOTAS]; + struct gfs2_quota_data *rs_qa_qd[2 * GFS2_MAXQUOTAS]; + struct gfs2_holder rs_qa_qd_ghs[2 * GFS2_MAXQUOTAS]; unsigned int rs_qa_qd_num; }; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index e62e594..fc8ac2e 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -626,8 +626,10 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (!IS_ERR(inode)) { d = d_splice_alias(inode, dentry); error = PTR_ERR(d); - if (IS_ERR(d)) + if (IS_ERR(d)) { + inode = ERR_CAST(d); goto fail_gunlock; + } error = 0; if (file) { if (S_ISREG(inode->i_mode)) { @@ -840,8 +842,10 @@ static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry, int error; inode = gfs2_lookupi(dir, &dentry->d_name, 0); - if (!inode) + if (inode == NULL) { + d_add(dentry, NULL); return NULL; + } if (IS_ERR(inode)) return ERR_CAST(inode); @@ -854,7 +858,6 @@ static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry, d = d_splice_alias(inode, dentry); if (IS_ERR(d)) { - iput(inode); gfs2_glock_dq_uninit(&gh); return d; } diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 2607ff1..a346f56 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1294,7 +1294,7 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root) int val; if (is_ancestor(root, sdp->sd_master_dir)) - seq_printf(s, ",meta"); + seq_puts(s, ",meta"); if (args->ar_lockproto[0]) seq_printf(s, ",lockproto=%s", args->ar_lockproto); if (args->ar_locktable[0]) @@ -1302,13 +1302,13 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root) if (args->ar_hostdata[0]) seq_printf(s, ",hostdata=%s", args->ar_hostdata); if (args->ar_spectator) - seq_printf(s, ",spectator"); + seq_puts(s, ",spectator"); if (args->ar_localflocks) - seq_printf(s, ",localflocks"); + seq_puts(s, ",localflocks"); if (args->ar_debug) - seq_printf(s, ",debug"); + seq_puts(s, ",debug"); if (args->ar_posix_acl) - seq_printf(s, ",acl"); + seq_puts(s, ",acl"); if (args->ar_quota != GFS2_QUOTA_DEFAULT) { char *state; switch (args->ar_quota) { @@ -1328,7 +1328,7 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root) seq_printf(s, ",quota=%s", state); } if (args->ar_suiddir) - seq_printf(s, ",suiddir"); + seq_puts(s, ",suiddir"); if (args->ar_data != GFS2_DATA_DEFAULT) { char *state; switch (args->ar_data) { @@ -1345,7 +1345,7 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root) seq_printf(s, ",data=%s", state); } if (args->ar_discard) - seq_printf(s, ",discard"); + seq_puts(s, ",discard"); val = sdp->sd_tune.gt_logd_secs; if (val != 30) seq_printf(s, ",commit=%d", val); @@ -1376,11 +1376,11 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root) seq_printf(s, ",errors=%s", state); } if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) - seq_printf(s, ",nobarrier"); + seq_puts(s, ",nobarrier"); if (test_bit(SDF_DEMOTE, &sdp->sd_flags)) - seq_printf(s, ",demote_interface_used"); + seq_puts(s, ",demote_interface_used"); if (args->ar_rgrplvb) - seq_printf(s, ",rgrplvb"); + seq_puts(s, ",rgrplvb"); return 0; } diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 8f27c93..ec9e082f 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -253,13 +253,11 @@ static int lockd_up_net(struct svc_serv *serv, struct net *net) error = make_socks(serv, net); if (error < 0) - goto err_socks; + goto err_bind; set_grace_period(net); dprintk("lockd_up_net: per-net data created; net=%p\n", net); return 0; -err_socks: - svc_rpcb_cleanup(serv, net); err_bind: ln->nlmsvc_users--; return error; @@ -34,6 +34,7 @@ #include <linux/device_cgroup.h> #include <linux/fs_struct.h> #include <linux/posix_acl.h> +#include <linux/hash.h> #include <asm/uaccess.h> #include "internal.h" @@ -643,24 +644,22 @@ static int complete_walk(struct nameidata *nd) static __always_inline void set_root(struct nameidata *nd) { - if (!nd->root.mnt) - get_fs_root(current->fs, &nd->root); + get_fs_root(current->fs, &nd->root); } static int link_path_walk(const char *, struct nameidata *); -static __always_inline void set_root_rcu(struct nameidata *nd) +static __always_inline unsigned set_root_rcu(struct nameidata *nd) { - if (!nd->root.mnt) { - struct fs_struct *fs = current->fs; - unsigned seq; + struct fs_struct *fs = current->fs; + unsigned seq, res; - do { - seq = read_seqcount_begin(&fs->seq); - nd->root = fs->root; - nd->seq = __read_seqcount_begin(&nd->root.dentry->d_seq); - } while (read_seqcount_retry(&fs->seq, seq)); - } + do { + seq = read_seqcount_begin(&fs->seq); + nd->root = fs->root; + res = __read_seqcount_begin(&nd->root.dentry->d_seq); + } while (read_seqcount_retry(&fs->seq, seq)); + return res; } static void path_put_conditional(struct path *path, struct nameidata *nd) @@ -860,7 +859,8 @@ follow_link(struct path *link, struct nameidata *nd, void **p) return PTR_ERR(s); } if (*s == '/') { - set_root(nd); + if (!nd->root.mnt) + set_root(nd); path_put(&nd->path); nd->path = nd->root; path_get(&nd->root); @@ -1137,13 +1137,15 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, */ *inode = path->dentry->d_inode; } - return read_seqretry(&mount_lock, nd->m_seq) && + return !read_seqretry(&mount_lock, nd->m_seq) && !(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT); } static int follow_dotdot_rcu(struct nameidata *nd) { - set_root_rcu(nd); + struct inode *inode = nd->inode; + if (!nd->root.mnt) + set_root_rcu(nd); while (1) { if (nd->path.dentry == nd->root.dentry && @@ -1155,6 +1157,7 @@ static int follow_dotdot_rcu(struct nameidata *nd) struct dentry *parent = old->d_parent; unsigned seq; + inode = parent->d_inode; seq = read_seqcount_begin(&parent->d_seq); if (read_seqcount_retry(&old->d_seq, nd->seq)) goto failed; @@ -1164,6 +1167,7 @@ static int follow_dotdot_rcu(struct nameidata *nd) } if (!follow_up_rcu(&nd->path)) break; + inode = nd->path.dentry->d_inode; nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); } while (d_mountpoint(nd->path.dentry)) { @@ -1173,11 +1177,12 @@ static int follow_dotdot_rcu(struct nameidata *nd) break; nd->path.mnt = &mounted->mnt; nd->path.dentry = mounted->mnt.mnt_root; + inode = nd->path.dentry->d_inode; nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); - if (!read_seqretry(&mount_lock, nd->m_seq)) + if (read_seqretry(&mount_lock, nd->m_seq)) goto failed; } - nd->inode = nd->path.dentry->d_inode; + nd->inode = inode; return 0; failed: @@ -1256,7 +1261,8 @@ static void follow_mount(struct path *path) static void follow_dotdot(struct nameidata *nd) { - set_root(nd); + if (!nd->root.mnt) + set_root(nd); while(1) { struct dentry *old = nd->path.dentry; @@ -1634,8 +1640,7 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd) static inline unsigned int fold_hash(unsigned long hash) { - hash += hash >> (8*sizeof(int)); - return hash; + return hash_64(hash, 32); } #else /* 32-bit case */ @@ -1669,9 +1674,9 @@ EXPORT_SYMBOL(full_name_hash); /* * Calculate the length and hash of the path component, and - * return the length of the component; + * return the "hash_len" as the result. */ -static inline unsigned long hash_name(const char *name, unsigned int *hashp) +static inline u64 hash_name(const char *name) { unsigned long a, b, adata, bdata, mask, hash, len; const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; @@ -1691,9 +1696,8 @@ static inline unsigned long hash_name(const char *name, unsigned int *hashp) mask = create_zero_mask(adata | bdata); hash += a & zero_bytemask(mask); - *hashp = fold_hash(hash); - - return len + find_zero(mask); + len += find_zero(mask); + return hashlen_create(fold_hash(hash), len); } #else @@ -1711,7 +1715,7 @@ EXPORT_SYMBOL(full_name_hash); * We know there's a real path component here of at least * one character. */ -static inline unsigned long hash_name(const char *name, unsigned int *hashp) +static inline u64 hash_name(const char *name) { unsigned long hash = init_name_hash(); unsigned long len = 0, c; @@ -1722,8 +1726,7 @@ static inline unsigned long hash_name(const char *name, unsigned int *hashp) hash = partial_name_hash(c, hash); c = (unsigned char)name[len]; } while (c && c != '/'); - *hashp = end_name_hash(hash); - return len; + return hashlen_create(end_name_hash(hash), len); } #endif @@ -1748,20 +1751,17 @@ static int link_path_walk(const char *name, struct nameidata *nd) /* At this point we know we have a real path component. */ for(;;) { - struct qstr this; - long len; + u64 hash_len; int type; err = may_lookup(nd); if (err) break; - len = hash_name(name, &this.hash); - this.name = name; - this.len = len; + hash_len = hash_name(name); type = LAST_NORM; - if (name[0] == '.') switch (len) { + if (name[0] == '.') switch (hashlen_len(hash_len)) { case 2: if (name[1] == '.') { type = LAST_DOTDOT; @@ -1775,29 +1775,32 @@ static int link_path_walk(const char *name, struct nameidata *nd) struct dentry *parent = nd->path.dentry; nd->flags &= ~LOOKUP_JUMPED; if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { + struct qstr this = { { .hash_len = hash_len }, .name = name }; err = parent->d_op->d_hash(parent, &this); if (err < 0) break; + hash_len = this.hash_len; + name = this.name; } } - nd->last = this; + nd->last.hash_len = hash_len; + nd->last.name = name; nd->last_type = type; - if (!name[len]) + name += hashlen_len(hash_len); + if (!*name) return 0; /* * If it wasn't NUL, we know it was '/'. Skip that * slash, and continue until no more slashes. */ do { - len++; - } while (unlikely(name[len] == '/')); - if (!name[len]) + name++; + } while (unlikely(*name == '/')); + if (!*name) return 0; - name += len; - err = walk_component(nd, &next, LOOKUP_FOLLOW); if (err < 0) return err; @@ -1852,7 +1855,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, if (*name=='/') { if (flags & LOOKUP_RCU) { rcu_read_lock(); - set_root_rcu(nd); + nd->seq = set_root_rcu(nd); } else { set_root(nd); path_get(&nd->root); @@ -1903,7 +1906,14 @@ static int path_init(int dfd, const char *name, unsigned int flags, } nd->inode = nd->path.dentry->d_inode; - return 0; + if (!(flags & LOOKUP_RCU)) + return 0; + if (likely(!read_seqcount_retry(&nd->path.dentry->d_seq, nd->seq))) + return 0; + if (!(nd->flags & LOOKUP_ROOT)) + nd->root.mnt = NULL; + rcu_read_unlock(); + return -ECHILD; } static inline int lookup_last(struct nameidata *nd, struct path *path) diff --git a/fs/namespace.c b/fs/namespace.c index a01c773..ef42d9b 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1217,6 +1217,11 @@ static void namespace_unlock(void) head.first->pprev = &head.first; INIT_HLIST_HEAD(&unmounted); + /* undo decrements we'd done in umount_tree() */ + hlist_for_each_entry(mnt, &head, mnt_hash) + if (mnt->mnt_ex_mountpoint.mnt) + mntget(mnt->mnt_ex_mountpoint.mnt); + up_write(&namespace_sem); synchronize_rcu(); @@ -1253,6 +1258,9 @@ void umount_tree(struct mount *mnt, int how) hlist_add_head(&p->mnt_hash, &tmp_list); } + hlist_for_each_entry(p, &tmp_list, mnt_hash) + list_del_init(&p->mnt_child); + if (how) propagate_umount(&tmp_list); @@ -1263,9 +1271,9 @@ void umount_tree(struct mount *mnt, int how) p->mnt_ns = NULL; if (how < 2) p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; - list_del_init(&p->mnt_child); if (mnt_has_parent(p)) { put_mountpoint(p->mnt_mp); + mnt_add_count(p->mnt_parent, -1); /* move the reference to mountpoint into ->mnt_ex_mountpoint */ p->mnt_ex_mountpoint.dentry = p->mnt_mountpoint; p->mnt_ex_mountpoint.mnt = &p->mnt_parent->mnt; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 1c5ff6d..6a4f366 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1412,24 +1412,18 @@ int nfs_fs_proc_net_init(struct net *net) p = proc_create("volumes", S_IFREG|S_IRUGO, nn->proc_nfsfs, &nfs_volume_list_fops); if (!p) - goto error_2; + goto error_1; return 0; -error_2: - remove_proc_entry("servers", nn->proc_nfsfs); error_1: - remove_proc_entry("fs/nfsfs", NULL); + remove_proc_subtree("nfsfs", net->proc_net); error_0: return -ENOMEM; } void nfs_fs_proc_net_exit(struct net *net) { - struct nfs_net *nn = net_generic(net, nfs_net_id); - - remove_proc_entry("volumes", nn->proc_nfsfs); - remove_proc_entry("servers", nn->proc_nfsfs); - remove_proc_entry("fs/nfsfs", NULL); + remove_proc_subtree("nfsfs", net->proc_net); } /* diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 1359c4a..9097807 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -1269,11 +1269,12 @@ filelayout_search_commit_reqs(struct nfs_commit_info *cinfo, struct page *page) static void filelayout_retry_commit(struct nfs_commit_info *cinfo, int idx) { struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; - struct pnfs_commit_bucket *bucket = fl_cinfo->buckets; + struct pnfs_commit_bucket *bucket; struct pnfs_layout_segment *freeme; int i; - for (i = idx; i < fl_cinfo->nbuckets; i++, bucket++) { + for (i = idx; i < fl_cinfo->nbuckets; i++) { + bucket = &fl_cinfo->buckets[i]; if (list_empty(&bucket->committing)) continue; nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 92193ed..a8b855a 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -130,16 +130,15 @@ enum { */ struct nfs4_lock_state { - struct list_head ls_locks; /* Other lock stateids */ - struct nfs4_state * ls_state; /* Pointer to open state */ + struct list_head ls_locks; /* Other lock stateids */ + struct nfs4_state * ls_state; /* Pointer to open state */ #define NFS_LOCK_INITIALIZED 0 #define NFS_LOCK_LOST 1 - unsigned long ls_flags; + unsigned long ls_flags; struct nfs_seqid_counter ls_seqid; - nfs4_stateid ls_stateid; - atomic_t ls_count; - fl_owner_t ls_owner; - struct work_struct ls_release; + nfs4_stateid ls_stateid; + atomic_t ls_count; + fl_owner_t ls_owner; }; /* bits for nfs4_state->flags */ diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index a043f61..22fe351 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -799,18 +799,6 @@ __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) return NULL; } -static void -free_lock_state_work(struct work_struct *work) -{ - struct nfs4_lock_state *lsp = container_of(work, - struct nfs4_lock_state, ls_release); - struct nfs4_state *state = lsp->ls_state; - struct nfs_server *server = state->owner->so_server; - struct nfs_client *clp = server->nfs_client; - - clp->cl_mvops->free_lock_state(server, lsp); -} - /* * Return a compatible lock_state. If no initialized lock_state structure * exists, return an uninitialized one. @@ -832,7 +820,6 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f if (lsp->ls_seqid.owner_id < 0) goto out_free; INIT_LIST_HEAD(&lsp->ls_locks); - INIT_WORK(&lsp->ls_release, free_lock_state_work); return lsp; out_free: kfree(lsp); @@ -896,12 +883,13 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp) if (list_empty(&state->lock_states)) clear_bit(LK_STATE_IN_USE, &state->flags); spin_unlock(&state->state_lock); - if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) - queue_work(nfsiod_workqueue, &lsp->ls_release); - else { - server = state->owner->so_server; + server = state->owner->so_server; + if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) { + struct nfs_client *clp = server->nfs_client; + + clp->cl_mvops->free_lock_state(server, lsp); + } else nfs4_free_lock_state(server, lsp); - } } static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index f9821ce..e94457c 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2657,6 +2657,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, struct xdr_stream *xdr = cd->xdr; int start_offset = xdr->buf->len; int cookie_offset; + u32 name_and_cookie; int entry_bytes; __be32 nfserr = nfserr_toosmall; __be64 wire_offset; @@ -2718,7 +2719,14 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, cd->rd_maxcount -= entry_bytes; if (!cd->rd_dircount) goto fail; - cd->rd_dircount--; + /* + * RFC 3530 14.2.24 describes rd_dircount as only a "hint", so + * let's always let through the first entry, at least: + */ + name_and_cookie = 4 * XDR_QUADLEN(namlen) + 8; + if (name_and_cookie > cd->rd_dircount && cd->cookie_offset) + goto fail; + cd->rd_dircount -= min(cd->rd_dircount, name_and_cookie); cd->cookie_offset = cookie_offset; skip_entry: cd->common.err = nfs_ok; @@ -3321,6 +3329,10 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 } maxcount = min_t(int, maxcount-16, bytes_left); + /* RFC 3530 14.2.24 allows us to ignore dircount when it's 0: */ + if (!readdir->rd_dircount) + readdir->rd_dircount = INT_MAX; + readdir->xdr = xdr; readdir->rd_maxcount = maxcount; readdir->common.err = 0; diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index 238a593..9d7e2b9 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -42,7 +42,7 @@ static int show_mark_fhandle(struct seq_file *m, struct inode *inode) { struct { struct file_handle handle; - u8 pad[64]; + u8 pad[MAX_HANDLE_SZ]; } f; int size, ret, i; @@ -50,7 +50,7 @@ static int show_mark_fhandle(struct seq_file *m, struct inode *inode) size = f.handle.handle_bytes >> 2; ret = exportfs_encode_inode_fh(inode, (struct fid *)f.handle.f_handle, &size, 0); - if ((ret == 255) || (ret == -ENOSPC)) { + if ((ret == FILEID_INVALID) || (ret < 0)) { WARN_ONCE(1, "Can't encode file handler for inotify: %d\n", ret); return 0; } @@ -381,6 +381,7 @@ static void __propagate_umount(struct mount *mnt) * other children */ if (child && list_empty(&child->mnt_mounts)) { + list_del_init(&child->mnt_child); hlist_del_init_rcu(&child->mnt_hash); hlist_add_before_rcu(&child->mnt_hash, &mnt->mnt_hash); } @@ -65,7 +65,7 @@ int sync_filesystem(struct super_block *sb) return ret; return __sync_filesystem(sb, 1); } -EXPORT_SYMBOL_GPL(sync_filesystem); +EXPORT_SYMBOL(sync_filesystem); static void sync_inodes_one_sb(struct super_block *sb, void *arg) { diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index 6eaf5ed..e77db62 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c @@ -45,7 +45,7 @@ void udf_free_inode(struct inode *inode) udf_free_blocks(sb, NULL, &UDF_I(inode)->i_location, 0, 1); } -struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err) +struct inode *udf_new_inode(struct inode *dir, umode_t mode) { struct super_block *sb = dir->i_sb; struct udf_sb_info *sbi = UDF_SB(sb); @@ -55,14 +55,12 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err) struct udf_inode_info *iinfo; struct udf_inode_info *dinfo = UDF_I(dir); struct logicalVolIntegrityDescImpUse *lvidiu; + int err; inode = new_inode(sb); - if (!inode) { - *err = -ENOMEM; - return NULL; - } - *err = -ENOSPC; + if (!inode) + return ERR_PTR(-ENOMEM); iinfo = UDF_I(inode); if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_EXTENDED_FE)) { @@ -80,21 +78,22 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err) } if (!iinfo->i_ext.i_data) { iput(inode); - *err = -ENOMEM; - return NULL; + return ERR_PTR(-ENOMEM); } + err = -ENOSPC; block = udf_new_block(dir->i_sb, NULL, dinfo->i_location.partitionReferenceNum, - start, err); - if (*err) { + start, &err); + if (err) { iput(inode); - return NULL; + return ERR_PTR(err); } lvidiu = udf_sb_lvidiu(sb); if (lvidiu) { iinfo->i_unique = lvid_get_unique_id(sb); + inode->i_generation = iinfo->i_unique; mutex_lock(&sbi->s_alloc_mutex); if (S_ISDIR(mode)) le32_add_cpu(&lvidiu->numDirs, 1); @@ -123,9 +122,12 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err) iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; inode->i_mtime = inode->i_atime = inode->i_ctime = iinfo->i_crtime = current_fs_time(inode->i_sb); - insert_inode_hash(inode); + if (unlikely(insert_inode_locked(inode) < 0)) { + make_bad_inode(inode); + iput(inode); + return ERR_PTR(-EIO); + } mark_inode_dirty(inode); - *err = 0; return inode; } diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 236cd48..0859884 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -51,7 +51,6 @@ MODULE_LICENSE("GPL"); static umode_t udf_convert_permissions(struct fileEntry *); static int udf_update_inode(struct inode *, int); -static void udf_fill_inode(struct inode *, struct buffer_head *); static int udf_sync_inode(struct inode *inode); static int udf_alloc_i_data(struct inode *inode, size_t size); static sector_t inode_getblk(struct inode *, sector_t, int *, int *); @@ -1271,12 +1270,33 @@ update_time: return 0; } -static void __udf_read_inode(struct inode *inode) +/* + * Maximum length of linked list formed by ICB hierarchy. The chosen number is + * arbitrary - just that we hopefully don't limit any real use of rewritten + * inode on write-once media but avoid looping for too long on corrupted media. + */ +#define UDF_MAX_ICB_NESTING 1024 + +static int udf_read_inode(struct inode *inode) { struct buffer_head *bh = NULL; struct fileEntry *fe; + struct extendedFileEntry *efe; uint16_t ident; struct udf_inode_info *iinfo = UDF_I(inode); + struct udf_sb_info *sbi = UDF_SB(inode->i_sb); + struct kernel_lb_addr *iloc = &iinfo->i_location; + unsigned int link_count; + unsigned int indirections = 0; + int ret = -EIO; + +reread: + if (iloc->logicalBlockNum >= + sbi->s_partmaps[iloc->partitionReferenceNum].s_partition_len) { + udf_debug("block=%d, partition=%d out of range\n", + iloc->logicalBlockNum, iloc->partitionReferenceNum); + return -EIO; + } /* * Set defaults, but the inode is still incomplete! @@ -1290,78 +1310,54 @@ static void __udf_read_inode(struct inode *inode) * i_nlink = 1 * i_op = NULL; */ - bh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 0, &ident); + bh = udf_read_ptagged(inode->i_sb, iloc, 0, &ident); if (!bh) { udf_err(inode->i_sb, "(ino %ld) failed !bh\n", inode->i_ino); - make_bad_inode(inode); - return; + return -EIO; } if (ident != TAG_IDENT_FE && ident != TAG_IDENT_EFE && ident != TAG_IDENT_USE) { udf_err(inode->i_sb, "(ino %ld) failed ident=%d\n", inode->i_ino, ident); - brelse(bh); - make_bad_inode(inode); - return; + goto out; } fe = (struct fileEntry *)bh->b_data; + efe = (struct extendedFileEntry *)bh->b_data; if (fe->icbTag.strategyType == cpu_to_le16(4096)) { struct buffer_head *ibh; - ibh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 1, - &ident); + ibh = udf_read_ptagged(inode->i_sb, iloc, 1, &ident); if (ident == TAG_IDENT_IE && ibh) { - struct buffer_head *nbh = NULL; struct kernel_lb_addr loc; struct indirectEntry *ie; ie = (struct indirectEntry *)ibh->b_data; loc = lelb_to_cpu(ie->indirectICB.extLocation); - if (ie->indirectICB.extLength && - (nbh = udf_read_ptagged(inode->i_sb, &loc, 0, - &ident))) { - if (ident == TAG_IDENT_FE || - ident == TAG_IDENT_EFE) { - memcpy(&iinfo->i_location, - &loc, - sizeof(struct kernel_lb_addr)); - brelse(bh); - brelse(ibh); - brelse(nbh); - __udf_read_inode(inode); - return; + if (ie->indirectICB.extLength) { + brelse(ibh); + memcpy(&iinfo->i_location, &loc, + sizeof(struct kernel_lb_addr)); + if (++indirections > UDF_MAX_ICB_NESTING) { + udf_err(inode->i_sb, + "too many ICBs in ICB hierarchy" + " (max %d supported)\n", + UDF_MAX_ICB_NESTING); + goto out; } - brelse(nbh); + brelse(bh); + goto reread; } } brelse(ibh); } else if (fe->icbTag.strategyType != cpu_to_le16(4)) { udf_err(inode->i_sb, "unsupported strategy type: %d\n", le16_to_cpu(fe->icbTag.strategyType)); - brelse(bh); - make_bad_inode(inode); - return; + goto out; } - udf_fill_inode(inode, bh); - - brelse(bh); -} - -static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) -{ - struct fileEntry *fe; - struct extendedFileEntry *efe; - struct udf_sb_info *sbi = UDF_SB(inode->i_sb); - struct udf_inode_info *iinfo = UDF_I(inode); - unsigned int link_count; - - fe = (struct fileEntry *)bh->b_data; - efe = (struct extendedFileEntry *)bh->b_data; - if (fe->icbTag.strategyType == cpu_to_le16(4)) iinfo->i_strat4096 = 0; else /* if (fe->icbTag.strategyType == cpu_to_le16(4096)) */ @@ -1378,11 +1374,10 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_EFE)) { iinfo->i_efe = 1; iinfo->i_use = 0; - if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - - sizeof(struct extendedFileEntry))) { - make_bad_inode(inode); - return; - } + ret = udf_alloc_i_data(inode, inode->i_sb->s_blocksize - + sizeof(struct extendedFileEntry)); + if (ret) + goto out; memcpy(iinfo->i_ext.i_data, bh->b_data + sizeof(struct extendedFileEntry), inode->i_sb->s_blocksize - @@ -1390,11 +1385,10 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) } else if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_FE)) { iinfo->i_efe = 0; iinfo->i_use = 0; - if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - - sizeof(struct fileEntry))) { - make_bad_inode(inode); - return; - } + ret = udf_alloc_i_data(inode, inode->i_sb->s_blocksize - + sizeof(struct fileEntry)); + if (ret) + goto out; memcpy(iinfo->i_ext.i_data, bh->b_data + sizeof(struct fileEntry), inode->i_sb->s_blocksize - sizeof(struct fileEntry)); @@ -1404,18 +1398,18 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) iinfo->i_lenAlloc = le32_to_cpu( ((struct unallocSpaceEntry *)bh->b_data)-> lengthAllocDescs); - if (udf_alloc_i_data(inode, inode->i_sb->s_blocksize - - sizeof(struct unallocSpaceEntry))) { - make_bad_inode(inode); - return; - } + ret = udf_alloc_i_data(inode, inode->i_sb->s_blocksize - + sizeof(struct unallocSpaceEntry)); + if (ret) + goto out; memcpy(iinfo->i_ext.i_data, bh->b_data + sizeof(struct unallocSpaceEntry), inode->i_sb->s_blocksize - sizeof(struct unallocSpaceEntry)); - return; + return 0; } + ret = -EIO; read_lock(&sbi->s_cred_lock); i_uid_write(inode, le32_to_cpu(fe->uid)); if (!uid_valid(inode->i_uid) || @@ -1441,8 +1435,10 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) read_unlock(&sbi->s_cred_lock); link_count = le16_to_cpu(fe->fileLinkCount); - if (!link_count) - link_count = 1; + if (!link_count) { + ret = -ESTALE; + goto out; + } set_nlink(inode, link_count); inode->i_size = le64_to_cpu(fe->informationLength); @@ -1488,6 +1484,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) iinfo->i_lenAlloc = le32_to_cpu(efe->lengthAllocDescs); iinfo->i_checkpoint = le32_to_cpu(efe->checkpoint); } + inode->i_generation = iinfo->i_unique; switch (fe->icbTag.fileType) { case ICBTAG_FILE_TYPE_DIRECTORY: @@ -1537,8 +1534,7 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) default: udf_err(inode->i_sb, "(ino %ld) failed unknown file type=%d\n", inode->i_ino, fe->icbTag.fileType); - make_bad_inode(inode); - return; + goto out; } if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { struct deviceSpec *dsea = @@ -1549,8 +1545,12 @@ static void udf_fill_inode(struct inode *inode, struct buffer_head *bh) le32_to_cpu(dsea->minorDeviceIdent))); /* Developer ID ??? */ } else - make_bad_inode(inode); + goto out; } + ret = 0; +out: + brelse(bh); + return ret; } static int udf_alloc_i_data(struct inode *inode, size_t size) @@ -1664,7 +1664,7 @@ static int udf_update_inode(struct inode *inode, int do_sync) FE_PERM_U_DELETE | FE_PERM_U_CHATTR)); fe->permissions = cpu_to_le32(udfperms); - if (S_ISDIR(inode->i_mode)) + if (S_ISDIR(inode->i_mode) && inode->i_nlink > 0) fe->fileLinkCount = cpu_to_le16(inode->i_nlink - 1); else fe->fileLinkCount = cpu_to_le16(inode->i_nlink); @@ -1830,32 +1830,23 @@ struct inode *udf_iget(struct super_block *sb, struct kernel_lb_addr *ino) { unsigned long block = udf_get_lb_pblock(sb, ino, 0); struct inode *inode = iget_locked(sb, block); + int err; if (!inode) - return NULL; - - if (inode->i_state & I_NEW) { - memcpy(&UDF_I(inode)->i_location, ino, sizeof(struct kernel_lb_addr)); - __udf_read_inode(inode); - unlock_new_inode(inode); - } + return ERR_PTR(-ENOMEM); - if (is_bad_inode(inode)) - goto out_iput; + if (!(inode->i_state & I_NEW)) + return inode; - if (ino->logicalBlockNum >= UDF_SB(sb)-> - s_partmaps[ino->partitionReferenceNum].s_partition_len) { - udf_debug("block=%d, partition=%d out of range\n", - ino->logicalBlockNum, ino->partitionReferenceNum); - make_bad_inode(inode); - goto out_iput; + memcpy(&UDF_I(inode)->i_location, ino, sizeof(struct kernel_lb_addr)); + err = udf_read_inode(inode); + if (err < 0) { + iget_failed(inode); + return ERR_PTR(err); } + unlock_new_inode(inode); return inode; - - out_iput: - iput(inode); - return NULL; } int udf_add_aext(struct inode *inode, struct extent_position *epos, diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 83a0600..c12e260 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -270,9 +270,8 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry, NULL, 0), }; inode = udf_iget(dir->i_sb, lb); - if (!inode) { - return ERR_PTR(-EACCES); - } + if (IS_ERR(inode)) + return inode; } else #endif /* UDF_RECOVERY */ @@ -285,9 +284,8 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry, loc = lelb_to_cpu(cfi.icb.extLocation); inode = udf_iget(dir->i_sb, &loc); - if (!inode) { - return ERR_PTR(-EACCES); - } + if (IS_ERR(inode)) + return ERR_CAST(inode); } return d_splice_alias(inode, dentry); @@ -550,32 +548,18 @@ static int udf_delete_entry(struct inode *inode, struct fileIdentDesc *fi, return udf_write_fi(inode, cfi, fi, fibh, NULL, NULL); } -static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode, - bool excl) +static int udf_add_nondir(struct dentry *dentry, struct inode *inode) { + struct udf_inode_info *iinfo = UDF_I(inode); + struct inode *dir = dentry->d_parent->d_inode; struct udf_fileident_bh fibh; - struct inode *inode; struct fileIdentDesc cfi, *fi; int err; - struct udf_inode_info *iinfo; - - inode = udf_new_inode(dir, mode, &err); - if (!inode) { - return err; - } - - iinfo = UDF_I(inode); - if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) - inode->i_data.a_ops = &udf_adinicb_aops; - else - inode->i_data.a_ops = &udf_aops; - inode->i_op = &udf_file_inode_operations; - inode->i_fop = &udf_file_operations; - mark_inode_dirty(inode); fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); - if (!fi) { + if (unlikely(!fi)) { inode_dec_link_count(inode); + unlock_new_inode(inode); iput(inode); return err; } @@ -589,23 +573,21 @@ static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode, if (fibh.sbh != fibh.ebh) brelse(fibh.ebh); brelse(fibh.sbh); + unlock_new_inode(inode); d_instantiate(dentry, inode); return 0; } -static int udf_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) +static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode, + bool excl) { - struct inode *inode; - struct udf_inode_info *iinfo; - int err; + struct inode *inode = udf_new_inode(dir, mode); - inode = udf_new_inode(dir, mode, &err); - if (!inode) - return err; + if (IS_ERR(inode)) + return PTR_ERR(inode); - iinfo = UDF_I(inode); - if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) + if (UDF_I(inode)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) inode->i_data.a_ops = &udf_adinicb_aops; else inode->i_data.a_ops = &udf_aops; @@ -613,7 +595,25 @@ static int udf_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) inode->i_fop = &udf_file_operations; mark_inode_dirty(inode); + return udf_add_nondir(dentry, inode); +} + +static int udf_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + struct inode *inode = udf_new_inode(dir, mode); + + if (IS_ERR(inode)) + return PTR_ERR(inode); + + if (UDF_I(inode)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) + inode->i_data.a_ops = &udf_adinicb_aops; + else + inode->i_data.a_ops = &udf_aops; + inode->i_op = &udf_file_inode_operations; + inode->i_fop = &udf_file_operations; + mark_inode_dirty(inode); d_tmpfile(dentry, inode); + unlock_new_inode(inode); return 0; } @@ -621,44 +621,16 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct inode *inode; - struct udf_fileident_bh fibh; - struct fileIdentDesc cfi, *fi; - int err; - struct udf_inode_info *iinfo; if (!old_valid_dev(rdev)) return -EINVAL; - err = -EIO; - inode = udf_new_inode(dir, mode, &err); - if (!inode) - goto out; + inode = udf_new_inode(dir, mode); + if (IS_ERR(inode)) + return PTR_ERR(inode); - iinfo = UDF_I(inode); init_special_inode(inode, mode, rdev); - fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); - if (!fi) { - inode_dec_link_count(inode); - iput(inode); - return err; - } - cfi.icb.extLength = cpu_to_le32(inode->i_sb->s_blocksize); - cfi.icb.extLocation = cpu_to_lelb(iinfo->i_location); - *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse = - cpu_to_le32(iinfo->i_unique & 0x00000000FFFFFFFFUL); - udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); - if (UDF_I(dir)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) - mark_inode_dirty(dir); - mark_inode_dirty(inode); - - if (fibh.sbh != fibh.ebh) - brelse(fibh.ebh); - brelse(fibh.sbh); - d_instantiate(dentry, inode); - err = 0; - -out: - return err; + return udf_add_nondir(dentry, inode); } static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) @@ -670,10 +642,9 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) struct udf_inode_info *dinfo = UDF_I(dir); struct udf_inode_info *iinfo; - err = -EIO; - inode = udf_new_inode(dir, S_IFDIR | mode, &err); - if (!inode) - goto out; + inode = udf_new_inode(dir, S_IFDIR | mode); + if (IS_ERR(inode)) + return PTR_ERR(inode); iinfo = UDF_I(inode); inode->i_op = &udf_dir_inode_operations; @@ -681,6 +652,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) fi = udf_add_entry(inode, NULL, &fibh, &cfi, &err); if (!fi) { inode_dec_link_count(inode); + unlock_new_inode(inode); iput(inode); goto out; } @@ -699,6 +671,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) if (!fi) { clear_nlink(inode); mark_inode_dirty(inode); + unlock_new_inode(inode); iput(inode); goto out; } @@ -710,6 +683,7 @@ static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); inc_nlink(dir); mark_inode_dirty(dir); + unlock_new_inode(inode); d_instantiate(dentry, inode); if (fibh.sbh != fibh.ebh) brelse(fibh.ebh); @@ -876,14 +850,11 @@ out: static int udf_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { - struct inode *inode; + struct inode *inode = udf_new_inode(dir, S_IFLNK | S_IRWXUGO); struct pathComponent *pc; const char *compstart; - struct udf_fileident_bh fibh; struct extent_position epos = {}; int eoffset, elen = 0; - struct fileIdentDesc *fi; - struct fileIdentDesc cfi; uint8_t *ea; int err; int block; @@ -892,9 +863,8 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, struct udf_inode_info *iinfo; struct super_block *sb = dir->i_sb; - inode = udf_new_inode(dir, S_IFLNK | S_IRWXUGO, &err); - if (!inode) - goto out; + if (IS_ERR(inode)) + return PTR_ERR(inode); iinfo = UDF_I(inode); down_write(&iinfo->i_data_sem); @@ -1012,32 +982,15 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, mark_inode_dirty(inode); up_write(&iinfo->i_data_sem); - fi = udf_add_entry(dir, dentry, &fibh, &cfi, &err); - if (!fi) - goto out_fail; - cfi.icb.extLength = cpu_to_le32(sb->s_blocksize); - cfi.icb.extLocation = cpu_to_lelb(iinfo->i_location); - if (UDF_SB(inode->i_sb)->s_lvid_bh) { - *(__le32 *)((struct allocDescImpUse *)cfi.icb.impUse)->impUse = - cpu_to_le32(lvid_get_unique_id(sb)); - } - udf_write_fi(dir, &cfi, fi, &fibh, NULL, NULL); - if (UDF_I(dir)->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) - mark_inode_dirty(dir); - if (fibh.sbh != fibh.ebh) - brelse(fibh.ebh); - brelse(fibh.sbh); - d_instantiate(dentry, inode); - err = 0; - + err = udf_add_nondir(dentry, inode); out: kfree(name); return err; out_no_entry: up_write(&iinfo->i_data_sem); -out_fail: inode_dec_link_count(inode); + unlock_new_inode(inode); iput(inode); goto out; } @@ -1222,7 +1175,7 @@ static struct dentry *udf_get_parent(struct dentry *child) struct udf_fileident_bh fibh; if (!udf_find_entry(child->d_inode, &dotdot, &fibh, &cfi)) - goto out_unlock; + return ERR_PTR(-EACCES); if (fibh.sbh != fibh.ebh) brelse(fibh.ebh); @@ -1230,12 +1183,10 @@ static struct dentry *udf_get_parent(struct dentry *child) tloc = lelb_to_cpu(cfi.icb.extLocation); inode = udf_iget(child->d_inode->i_sb, &tloc); - if (!inode) - goto out_unlock; + if (IS_ERR(inode)) + return ERR_CAST(inode); return d_obtain_alias(inode); -out_unlock: - return ERR_PTR(-EACCES); } @@ -1252,8 +1203,8 @@ static struct dentry *udf_nfs_get_inode(struct super_block *sb, u32 block, loc.partitionReferenceNum = partref; inode = udf_iget(sb, &loc); - if (inode == NULL) - return ERR_PTR(-ENOMEM); + if (IS_ERR(inode)) + return ERR_CAST(inode); if (generation && inode->i_generation != generation) { iput(inode); diff --git a/fs/udf/super.c b/fs/udf/super.c index 813da94..5401fc3 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -961,12 +961,14 @@ struct inode *udf_find_metadata_inode_efe(struct super_block *sb, metadata_fe = udf_iget(sb, &addr); - if (metadata_fe == NULL) + if (IS_ERR(metadata_fe)) { udf_warn(sb, "metadata inode efe not found\n"); - else if (UDF_I(metadata_fe)->i_alloc_type != ICBTAG_FLAG_AD_SHORT) { + return metadata_fe; + } + if (UDF_I(metadata_fe)->i_alloc_type != ICBTAG_FLAG_AD_SHORT) { udf_warn(sb, "metadata inode efe does not have short allocation descriptors!\n"); iput(metadata_fe); - metadata_fe = NULL; + return ERR_PTR(-EIO); } return metadata_fe; @@ -978,6 +980,7 @@ static int udf_load_metadata_files(struct super_block *sb, int partition) struct udf_part_map *map; struct udf_meta_data *mdata; struct kernel_lb_addr addr; + struct inode *fe; map = &sbi->s_partmaps[partition]; mdata = &map->s_type_specific.s_metadata; @@ -986,22 +989,24 @@ static int udf_load_metadata_files(struct super_block *sb, int partition) udf_debug("Metadata file location: block = %d part = %d\n", mdata->s_meta_file_loc, map->s_partition_num); - mdata->s_metadata_fe = udf_find_metadata_inode_efe(sb, - mdata->s_meta_file_loc, map->s_partition_num); - - if (mdata->s_metadata_fe == NULL) { + fe = udf_find_metadata_inode_efe(sb, mdata->s_meta_file_loc, + map->s_partition_num); + if (IS_ERR(fe)) { /* mirror file entry */ udf_debug("Mirror metadata file location: block = %d part = %d\n", mdata->s_mirror_file_loc, map->s_partition_num); - mdata->s_mirror_fe = udf_find_metadata_inode_efe(sb, - mdata->s_mirror_file_loc, map->s_partition_num); + fe = udf_find_metadata_inode_efe(sb, mdata->s_mirror_file_loc, + map->s_partition_num); - if (mdata->s_mirror_fe == NULL) { + if (IS_ERR(fe)) { udf_err(sb, "Both metadata and mirror metadata inode efe can not found\n"); - return -EIO; + return PTR_ERR(fe); } - } + mdata->s_mirror_fe = fe; + } else + mdata->s_metadata_fe = fe; + /* * bitmap file entry @@ -1015,15 +1020,16 @@ static int udf_load_metadata_files(struct super_block *sb, int partition) udf_debug("Bitmap file location: block = %d part = %d\n", addr.logicalBlockNum, addr.partitionReferenceNum); - mdata->s_bitmap_fe = udf_iget(sb, &addr); - if (mdata->s_bitmap_fe == NULL) { + fe = udf_iget(sb, &addr); + if (IS_ERR(fe)) { if (sb->s_flags & MS_RDONLY) udf_warn(sb, "bitmap inode efe not found but it's ok since the disc is mounted read-only\n"); else { udf_err(sb, "bitmap inode efe not found and attempted read-write mount\n"); - return -EIO; + return PTR_ERR(fe); } - } + } else + mdata->s_bitmap_fe = fe; } udf_debug("udf_load_metadata_files Ok\n"); @@ -1111,13 +1117,15 @@ static int udf_fill_partdesc_info(struct super_block *sb, phd->unallocSpaceTable.extPosition), .partitionReferenceNum = p_index, }; + struct inode *inode; - map->s_uspace.s_table = udf_iget(sb, &loc); - if (!map->s_uspace.s_table) { + inode = udf_iget(sb, &loc); + if (IS_ERR(inode)) { udf_debug("cannot load unallocSpaceTable (part %d)\n", p_index); - return -EIO; + return PTR_ERR(inode); } + map->s_uspace.s_table = inode; map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_TABLE; udf_debug("unallocSpaceTable (part %d) @ %ld\n", p_index, map->s_uspace.s_table->i_ino); @@ -1144,14 +1152,15 @@ static int udf_fill_partdesc_info(struct super_block *sb, phd->freedSpaceTable.extPosition), .partitionReferenceNum = p_index, }; + struct inode *inode; - map->s_fspace.s_table = udf_iget(sb, &loc); - if (!map->s_fspace.s_table) { + inode = udf_iget(sb, &loc); + if (IS_ERR(inode)) { udf_debug("cannot load freedSpaceTable (part %d)\n", p_index); - return -EIO; + return PTR_ERR(inode); } - + map->s_fspace.s_table = inode; map->s_partition_flags |= UDF_PART_FLAG_FREED_TABLE; udf_debug("freedSpaceTable (part %d) @ %ld\n", p_index, map->s_fspace.s_table->i_ino); @@ -1178,6 +1187,7 @@ static void udf_find_vat_block(struct super_block *sb, int p_index, struct udf_part_map *map = &sbi->s_partmaps[p_index]; sector_t vat_block; struct kernel_lb_addr ino; + struct inode *inode; /* * VAT file entry is in the last recorded block. Some broken disks have @@ -1186,10 +1196,13 @@ static void udf_find_vat_block(struct super_block *sb, int p_index, ino.partitionReferenceNum = type1_index; for (vat_block = start_block; vat_block >= map->s_partition_root && - vat_block >= start_block - 3 && - !sbi->s_vat_inode; vat_block--) { + vat_block >= start_block - 3; vat_block--) { ino.logicalBlockNum = vat_block - map->s_partition_root; - sbi->s_vat_inode = udf_iget(sb, &ino); + inode = udf_iget(sb, &ino); + if (!IS_ERR(inode)) { + sbi->s_vat_inode = inode; + break; + } } } @@ -2205,10 +2218,10 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) /* assign inodes by physical block number */ /* perhaps it's not extensible enough, but for now ... */ inode = udf_iget(sb, &rootdir); - if (!inode) { + if (IS_ERR(inode)) { udf_err(sb, "Error in udf_iget, block=%d, partition=%d\n", rootdir.logicalBlockNum, rootdir.partitionReferenceNum); - ret = -EIO; + ret = PTR_ERR(inode); goto error_out; } diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index be7dabb..742557b 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -143,7 +143,6 @@ extern int udf_expand_file_adinicb(struct inode *); extern struct buffer_head *udf_expand_dir_adinicb(struct inode *, int *, int *); extern struct buffer_head *udf_bread(struct inode *, int, int, int *); extern int udf_setsize(struct inode *, loff_t); -extern void udf_read_inode(struct inode *); extern void udf_evict_inode(struct inode *); extern int udf_write_inode(struct inode *, struct writeback_control *wbc); extern long udf_block_map(struct inode *, sector_t); @@ -209,7 +208,7 @@ extern int udf_CS0toUTF8(struct ustr *, const struct ustr *); /* ialloc.c */ extern void udf_free_inode(struct inode *); -extern struct inode *udf_new_inode(struct inode *, umode_t, int *); +extern struct inode *udf_new_inode(struct inode *, umode_t); /* truncate.c */ extern void udf_truncate_tail_extent(struct inode *); diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 7c580c9..be7d42c 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -902,9 +902,6 @@ void ufs_evict_inode(struct inode * inode) invalidate_inode_buffers(inode); clear_inode(inode); - if (want_delete) { - lock_ufs(inode->i_sb); - ufs_free_inode (inode); - unlock_ufs(inode->i_sb); - } + if (want_delete) + ufs_free_inode(inode); } diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 90d74b8..2df62a7 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -126,12 +126,12 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry, if (l > sb->s_blocksize) goto out_notlocked; - lock_ufs(dir->i_sb); inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO); err = PTR_ERR(inode); if (IS_ERR(inode)) - goto out; + goto out_notlocked; + lock_ufs(dir->i_sb); if (l > UFS_SB(sb)->s_uspi->s_maxsymlinklen) { /* slow symlink */ inode->i_op = &ufs_symlink_inode_operations; @@ -181,13 +181,9 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) struct inode * inode; int err; - lock_ufs(dir->i_sb); - inode_inc_link_count(dir); - inode = ufs_new_inode(dir, S_IFDIR|mode); - err = PTR_ERR(inode); if (IS_ERR(inode)) - goto out_dir; + return PTR_ERR(inode); inode->i_op = &ufs_dir_inode_operations; inode->i_fop = &ufs_dir_operations; @@ -195,6 +191,9 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) inode_inc_link_count(inode); + lock_ufs(dir->i_sb); + inode_inc_link_count(dir); + err = ufs_make_empty(inode, dir); if (err) goto out_fail; @@ -212,7 +211,6 @@ out_fail: inode_dec_link_count(inode); inode_dec_link_count(inode); iput (inode); -out_dir: inode_dec_link_count(dir); unlock_ufs(dir->i_sb); goto out; diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index de2d26d..86df952 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5424,7 +5424,7 @@ xfs_bmap_shift_extents( struct xfs_bmap_free *flist, int num_exts) { - struct xfs_btree_cur *cur; + struct xfs_btree_cur *cur = NULL; struct xfs_bmbt_rec_host *gotp; struct xfs_bmbt_irec got; struct xfs_bmbt_irec left; @@ -5435,7 +5435,7 @@ xfs_bmap_shift_extents( int error = 0; int i; int whichfork = XFS_DATA_FORK; - int logflags; + int logflags = 0; xfs_filblks_t blockcount = 0; int total_extents; @@ -5478,16 +5478,11 @@ xfs_bmap_shift_extents( } } - /* We are going to change core inode */ - logflags = XFS_ILOG_CORE; if (ifp->if_flags & XFS_IFBROOT) { cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); cur->bc_private.b.firstblock = *firstblock; cur->bc_private.b.flist = flist; cur->bc_private.b.flags = 0; - } else { - cur = NULL; - logflags |= XFS_ILOG_DEXT; } /* @@ -5545,11 +5540,14 @@ xfs_bmap_shift_extents( blockcount = left.br_blockcount + got.br_blockcount; xfs_iext_remove(ip, *current_ext, 1, 0); + logflags |= XFS_ILOG_CORE; if (cur) { error = xfs_btree_delete(cur, &i); if (error) goto del_cursor; XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); + } else { + logflags |= XFS_ILOG_DEXT; } XFS_IFORK_NEXT_SET(ip, whichfork, XFS_IFORK_NEXTENTS(ip, whichfork) - 1); @@ -5575,6 +5573,7 @@ xfs_bmap_shift_extents( got.br_startoff = startoff; } + logflags |= XFS_ILOG_CORE; if (cur) { error = xfs_bmbt_update(cur, got.br_startoff, got.br_startblock, @@ -5582,6 +5581,8 @@ xfs_bmap_shift_extents( got.br_state); if (error) goto del_cursor; + } else { + logflags |= XFS_ILOG_DEXT; } (*current_ext)++; @@ -5597,6 +5598,7 @@ del_cursor: xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); - xfs_trans_log_inode(tp, ip, logflags); + if (logflags) + xfs_trans_log_inode(tp, ip, logflags); return error; } diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 11e9b4c..b984647 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1753,11 +1753,72 @@ xfs_vm_readpages( return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); } +/* + * This is basically a copy of __set_page_dirty_buffers() with one + * small tweak: buffers beyond EOF do not get marked dirty. If we mark them + * dirty, we'll never be able to clean them because we don't write buffers + * beyond EOF, and that means we can't invalidate pages that span EOF + * that have been marked dirty. Further, the dirty state can leak into + * the file interior if the file is extended, resulting in all sorts of + * bad things happening as the state does not match the underlying data. + * + * XXX: this really indicates that bufferheads in XFS need to die. Warts like + * this only exist because of bufferheads and how the generic code manages them. + */ +STATIC int +xfs_vm_set_page_dirty( + struct page *page) +{ + struct address_space *mapping = page->mapping; + struct inode *inode = mapping->host; + loff_t end_offset; + loff_t offset; + int newly_dirty; + + if (unlikely(!mapping)) + return !TestSetPageDirty(page); + + end_offset = i_size_read(inode); + offset = page_offset(page); + + spin_lock(&mapping->private_lock); + if (page_has_buffers(page)) { + struct buffer_head *head = page_buffers(page); + struct buffer_head *bh = head; + + do { + if (offset < end_offset) + set_buffer_dirty(bh); + bh = bh->b_this_page; + offset += 1 << inode->i_blkbits; + } while (bh != head); + } + newly_dirty = !TestSetPageDirty(page); + spin_unlock(&mapping->private_lock); + + if (newly_dirty) { + /* sigh - __set_page_dirty() is static, so copy it here, too */ + unsigned long flags; + + spin_lock_irqsave(&mapping->tree_lock, flags); + if (page->mapping) { /* Race with truncate? */ + WARN_ON_ONCE(!PageUptodate(page)); + account_page_dirtied(page, mapping); + radix_tree_tag_set(&mapping->page_tree, + page_index(page), PAGECACHE_TAG_DIRTY); + } + spin_unlock_irqrestore(&mapping->tree_lock, flags); + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); + } + return newly_dirty; +} + const struct address_space_operations xfs_address_space_operations = { .readpage = xfs_vm_readpage, .readpages = xfs_vm_readpages, .writepage = xfs_vm_writepage, .writepages = xfs_vm_writepages, + .set_page_dirty = xfs_vm_set_page_dirty, .releasepage = xfs_vm_releasepage, .invalidatepage = xfs_vm_invalidatepage, .write_begin = xfs_vm_write_begin, diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 2f1e30d..1707980 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1470,6 +1470,26 @@ xfs_collapse_file_space( start_fsb = XFS_B_TO_FSB(mp, offset + len); shift_fsb = XFS_B_TO_FSB(mp, len); + /* + * Writeback the entire file and force remove any post-eof blocks. The + * writeback prevents changes to the extent list via concurrent + * writeback and the eofblocks trim prevents the extent shift algorithm + * from running into a post-eof delalloc extent. + * + * XXX: This is a temporary fix until the extent shift loop below is + * converted to use offsets and lookups within the ILOCK rather than + * carrying around the index into the extent list for the next + * iteration. + */ + error = filemap_write_and_wait(VFS_I(ip)->i_mapping); + if (error) + return error; + if (xfs_can_free_eofblocks(ip, true)) { + error = xfs_free_eofblocks(mp, ip, false); + if (error) + return error; + } + error = xfs_free_file_space(ip, offset, len); if (error) return error; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 076b170..de5368c 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -291,12 +291,22 @@ xfs_file_read_iter( if (inode->i_mapping->nrpages) { ret = filemap_write_and_wait_range( VFS_I(ip)->i_mapping, - pos, -1); + pos, pos + size - 1); if (ret) { xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); return ret; } - truncate_pagecache_range(VFS_I(ip), pos, -1); + + /* + * Invalidate whole pages. This can return an error if + * we fail to invalidate a page, but this should never + * happen on XFS. Warn if it does fail. + */ + ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, + pos >> PAGE_CACHE_SHIFT, + (pos + size - 1) >> PAGE_CACHE_SHIFT); + WARN_ON_ONCE(ret); + ret = 0; } xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); } @@ -632,10 +642,19 @@ xfs_file_dio_aio_write( if (mapping->nrpages) { ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, - pos, -1); + pos, pos + count - 1); if (ret) goto out; - truncate_pagecache_range(VFS_I(ip), pos, -1); + /* + * Invalidate whole pages. This can return an error if + * we fail to invalidate a page, but this should never + * happen on XFS. Warn if it does fail. + */ + ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, + pos >> PAGE_CACHE_SHIFT, + (pos + count - 1) >> PAGE_CACHE_SHIFT); + WARN_ON_ONCE(ret); + ret = 0; } /* |