diff options
Diffstat (limited to 'fs')
54 files changed, 641 insertions, 609 deletions
diff --git a/fs/affs/super.c b/fs/affs/super.c index 6d589f2..895ac7d 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -340,8 +340,6 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent) &blocksize,&sbi->s_prefix, sbi->s_volume, &mount_flags)) { printk(KERN_ERR "AFFS: Error parsing options\n"); - kfree(sbi->s_prefix); - kfree(sbi); return -EINVAL; } /* N.B. after this point s_prefix must be released */ @@ -112,6 +112,11 @@ struct kioctx { struct work_struct free_work; + /* + * signals when all in-flight requests are done + */ + struct completion *requests_done; + struct { /* * This counts the number of available slots in the ringbuffer, @@ -508,6 +513,10 @@ static void free_ioctx_reqs(struct percpu_ref *ref) { struct kioctx *ctx = container_of(ref, struct kioctx, reqs); + /* At this point we know that there are no any in-flight requests */ + if (ctx->requests_done) + complete(ctx->requests_done); + INIT_WORK(&ctx->free_work, free_ioctx); schedule_work(&ctx->free_work); } @@ -718,7 +727,8 @@ err: * when the processes owning a context have all exited to encourage * the rapid destruction of the kioctx. */ -static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx) +static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, + struct completion *requests_done) { if (!atomic_xchg(&ctx->dead, 1)) { struct kioctx_table *table; @@ -747,7 +757,11 @@ static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx) if (ctx->mmap_size) vm_munmap(ctx->mmap_base, ctx->mmap_size); + ctx->requests_done = requests_done; percpu_ref_kill(&ctx->users); + } else { + if (requests_done) + complete(requests_done); } } @@ -809,7 +823,7 @@ void exit_aio(struct mm_struct *mm) */ ctx->mmap_size = 0; - kill_ioctx(mm, ctx); + kill_ioctx(mm, ctx, NULL); } } @@ -1185,7 +1199,7 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp) if (!IS_ERR(ioctx)) { ret = put_user(ioctx->user_id, ctxp); if (ret) - kill_ioctx(current->mm, ioctx); + kill_ioctx(current->mm, ioctx, NULL); percpu_ref_put(&ioctx->users); } @@ -1203,8 +1217,22 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) { struct kioctx *ioctx = lookup_ioctx(ctx); if (likely(NULL != ioctx)) { - kill_ioctx(current->mm, ioctx); + struct completion requests_done = + COMPLETION_INITIALIZER_ONSTACK(requests_done); + + /* Pass requests_done to kill_ioctx() where it can be set + * in a thread-safe way. If we try to set it here then we have + * a race condition if two io_destroy() called simultaneously. + */ + kill_ioctx(current->mm, ioctx, &requests_done); percpu_ref_put(&ioctx->users); + + /* Wait until all IO for the context are done. Otherwise kernel + * keep using user-space buffers even if user thinks the context + * is destroyed. + */ + wait_for_completion(&requests_done); + return 0; } pr_debug("EINVAL: io_destroy: invalid context id\n"); @@ -1299,10 +1327,8 @@ rw_common: &iovec, compat) : aio_setup_single_vector(req, rw, buf, &nr_segs, iovec); - if (ret) - return ret; - - ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes); + if (!ret) + ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes); if (ret < 0) { if (iovec != &inline_vec) kfree(iovec); diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 2caf36a..cc87c1a 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -179,7 +179,7 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry) spin_lock(&active->d_lock); /* Already gone? */ - if (!d_count(active)) + if ((int) d_count(active) <= 0) goto next; qstr = &active->d_name; @@ -230,7 +230,7 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry) spin_lock(&expiring->d_lock); - /* Bad luck, we've already been dentry_iput */ + /* We've already been dentry_iput or unlinked */ if (!expiring->d_inode) goto next; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4c48df5..ba6b885 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2058,6 +2058,20 @@ struct btrfs_ioctl_defrag_range_args { #define btrfs_raw_test_opt(o, opt) ((o) & BTRFS_MOUNT_##opt) #define btrfs_test_opt(root, opt) ((root)->fs_info->mount_opt & \ BTRFS_MOUNT_##opt) +#define btrfs_set_and_info(root, opt, fmt, args...) \ +{ \ + if (!btrfs_test_opt(root, opt)) \ + btrfs_info(root->fs_info, fmt, ##args); \ + btrfs_set_opt(root->fs_info->mount_opt, opt); \ +} + +#define btrfs_clear_and_info(root, opt, fmt, args...) \ +{ \ + if (btrfs_test_opt(root, opt)) \ + btrfs_info(root->fs_info, fmt, ##args); \ + btrfs_clear_opt(root->fs_info->mount_opt, opt); \ +} + /* * Inode flags */ diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 029d46c..9833149 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2861,7 +2861,7 @@ retry_root_backup: printk(KERN_ERR "BTRFS: failed to read log tree\n"); free_extent_buffer(log_tree_root->node); kfree(log_tree_root); - goto fail_trans_kthread; + goto fail_qgroup; } /* returns with log_tree_root freed on success */ ret = btrfs_recover_log_trees(log_tree_root); @@ -2870,24 +2870,24 @@ retry_root_backup: "Failed to recover log tree"); free_extent_buffer(log_tree_root->node); kfree(log_tree_root); - goto fail_trans_kthread; + goto fail_qgroup; } if (sb->s_flags & MS_RDONLY) { ret = btrfs_commit_super(tree_root); if (ret) - goto fail_trans_kthread; + goto fail_qgroup; } } ret = btrfs_find_orphan_roots(tree_root); if (ret) - goto fail_trans_kthread; + goto fail_qgroup; if (!(sb->s_flags & MS_RDONLY)) { ret = btrfs_cleanup_fs_roots(fs_info); if (ret) - goto fail_trans_kthread; + goto fail_qgroup; ret = btrfs_recover_relocation(tree_root); if (ret < 0) { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1306487..5590af9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1542,6 +1542,7 @@ again: ret = 0; } if (ret) { + key.objectid = bytenr; key.type = BTRFS_EXTENT_ITEM_KEY; key.offset = num_bytes; btrfs_release_path(path); @@ -3542,11 +3543,13 @@ static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) return extended_to_chunk(flags | tmp); } -static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) +static u64 get_alloc_profile(struct btrfs_root *root, u64 orig_flags) { unsigned seq; + u64 flags; do { + flags = orig_flags; seq = read_seqbegin(&root->fs_info->profiles_lock); if (flags & BTRFS_BLOCK_GROUP_DATA) @@ -5719,6 +5722,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, if (ret > 0 && skinny_metadata) { skinny_metadata = false; + key.objectid = bytenr; key.type = BTRFS_EXTENT_ITEM_KEY; key.offset = num_bytes; btrfs_release_path(path); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index eb742c0..ae6af07 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -800,7 +800,7 @@ next_slot: if (start > key.offset && end < extent_end) { BUG_ON(del_nr > 0); if (extent_type == BTRFS_FILE_EXTENT_INLINE) { - ret = -EINVAL; + ret = -EOPNOTSUPP; break; } @@ -846,7 +846,7 @@ next_slot: */ if (start <= key.offset && end < extent_end) { if (extent_type == BTRFS_FILE_EXTENT_INLINE) { - ret = -EINVAL; + ret = -EOPNOTSUPP; break; } @@ -872,7 +872,7 @@ next_slot: if (start > key.offset && end >= extent_end) { BUG_ON(del_nr > 0); if (extent_type == BTRFS_FILE_EXTENT_INLINE) { - ret = -EINVAL; + ret = -EOPNOTSUPP; break; } @@ -1777,7 +1777,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, start_pos = round_down(pos, root->sectorsize); if (start_pos > i_size_read(inode)) { /* Expand hole size to cover write data, preventing empty gap */ - end_pos = round_up(pos + iov->iov_len, root->sectorsize); + end_pos = round_up(pos + count, root->sectorsize); err = btrfs_cont_expand(inode, i_size_read(inode), end_pos); if (err) { mutex_unlock(&inode->i_mutex); diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index cc8ca193..86935f5 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -176,7 +176,11 @@ static void start_caching(struct btrfs_root *root) tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu\n", root->root_key.objectid); - BUG_ON(IS_ERR(tsk)); /* -ENOMEM */ + if (IS_ERR(tsk)) { + btrfs_warn(root->fs_info, "failed to start inode caching task"); + btrfs_clear_and_info(root, CHANGE_INODE_CACHE, + "disabling inode map caching"); + } } int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid) @@ -205,24 +209,14 @@ again: void btrfs_return_ino(struct btrfs_root *root, u64 objectid) { - struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; struct btrfs_free_space_ctl *pinned = root->free_ino_pinned; if (!btrfs_test_opt(root, INODE_MAP_CACHE)) return; - again: if (root->cached == BTRFS_CACHE_FINISHED) { - __btrfs_add_free_space(ctl, objectid, 1); + __btrfs_add_free_space(pinned, objectid, 1); } else { - /* - * If we are in the process of caching free ino chunks, - * to avoid adding the same inode number to the free_ino - * tree twice due to cross transaction, we'll leave it - * in the pinned tree until a transaction is committed - * or the caching work is done. - */ - down_write(&root->fs_info->commit_root_sem); spin_lock(&root->cache_lock); if (root->cached == BTRFS_CACHE_FINISHED) { @@ -234,11 +228,7 @@ again: start_caching(root); - if (objectid <= root->cache_progress || - objectid >= root->highest_objectid) - __btrfs_add_free_space(ctl, objectid, 1); - else - __btrfs_add_free_space(pinned, objectid, 1); + __btrfs_add_free_space(pinned, objectid, 1); up_write(&root->fs_info->commit_root_sem); } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index e79ff6b..2f6d7b1 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3066,7 +3066,7 @@ process_slot: new_key.offset + datal, 1); if (ret) { - if (ret != -EINVAL) + if (ret != -EOPNOTSUPP) btrfs_abort_transaction(trans, root, ret); btrfs_end_transaction(trans, root); @@ -3120,6 +3120,8 @@ process_slot: } else if (type == BTRFS_FILE_EXTENT_INLINE) { u64 skip = 0; u64 trim = 0; + u64 aligned_end = 0; + if (off > key.offset) { skip = off - key.offset; new_key.offset += skip; @@ -3136,12 +3138,14 @@ process_slot: size -= skip + trim; datal -= skip + trim; + aligned_end = ALIGN(new_key.offset + datal, + root->sectorsize); ret = btrfs_drop_extents(trans, root, inode, new_key.offset, - new_key.offset + datal, + aligned_end, 1); if (ret) { - if (ret != -EINVAL) + if (ret != -EOPNOTSUPP) btrfs_abort_transaction(trans, root, ret); btrfs_end_transaction(trans, root); diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 1ac3ca9..fd38b50 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -349,6 +349,11 @@ static int fs_path_ensure_buf(struct fs_path *p, int len) if (p->buf_len >= len) return 0; + if (len > PATH_MAX) { + WARN_ON(1); + return -ENOMEM; + } + path_len = p->end - p->start; old_buf_len = p->buf_len; @@ -1663,7 +1668,7 @@ static int get_first_ref(struct btrfs_root *root, u64 ino, goto out; } - if (key.type == BTRFS_INODE_REF_KEY) { + if (found_key.type == BTRFS_INODE_REF_KEY) { struct btrfs_inode_ref *iref; iref = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_inode_ref); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 5011aad..9601d25 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -385,20 +385,6 @@ static match_table_t tokens = { {Opt_err, NULL}, }; -#define btrfs_set_and_info(root, opt, fmt, args...) \ -{ \ - if (!btrfs_test_opt(root, opt)) \ - btrfs_info(root->fs_info, fmt, ##args); \ - btrfs_set_opt(root->fs_info->mount_opt, opt); \ -} - -#define btrfs_clear_and_info(root, opt, fmt, args...) \ -{ \ - if (btrfs_test_opt(root, opt)) \ - btrfs_info(root->fs_info, fmt, ##args); \ - btrfs_clear_opt(root->fs_info->mount_opt, opt); \ -} - /* * Regular mount options parser. Everything that is needed only when * reading in a new superblock is parsed here. @@ -1186,7 +1172,6 @@ static struct dentry *mount_subvol(const char *subvol_name, int flags, return ERR_PTR(-ENOMEM); mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, newargs); - kfree(newargs); if (PTR_RET(mnt) == -EBUSY) { if (flags & MS_RDONLY) { @@ -1196,17 +1181,22 @@ static struct dentry *mount_subvol(const char *subvol_name, int flags, int r; mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY, device_name, newargs); - if (IS_ERR(mnt)) + if (IS_ERR(mnt)) { + kfree(newargs); return ERR_CAST(mnt); + } r = btrfs_remount(mnt->mnt_sb, &flags, NULL); if (r < 0) { /* FIXME: release vfsmount mnt ??*/ + kfree(newargs); return ERR_PTR(r); } } } + kfree(newargs); + if (IS_ERR(mnt)) return ERR_CAST(mnt); diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 2e5e648..c561b62 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -3261,7 +3261,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode, rel->seq = cpu_to_le32(cap->seq); rel->issue_seq = cpu_to_le32(cap->issue_seq), rel->mseq = cpu_to_le32(cap->mseq); - rel->caps = cpu_to_le32(cap->issued); + rel->caps = cpu_to_le32(cap->implemented); rel->wanted = cpu_to_le32(cap->mds_wanted); rel->dname_len = 0; rel->dname_seq = 0; diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 766410a..c29d6ae 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -141,7 +141,7 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx, /* start at beginning? */ if (ctx->pos == 2 || last == NULL || - ctx->pos < ceph_dentry(last)->offset) { + fpos_cmp(ctx->pos, ceph_dentry(last)->offset) < 0) { if (list_empty(&parent->d_subdirs)) goto out_unlock; p = parent->d_subdirs.prev; @@ -182,9 +182,16 @@ more: spin_unlock(&dentry->d_lock); spin_unlock(&parent->d_lock); + /* make sure a dentry wasn't dropped while we didn't have parent lock */ + if (!ceph_dir_is_complete(dir)) { + dout(" lost dir complete on %p; falling back to mds\n", dir); + dput(dentry); + err = -EAGAIN; + goto out; + } + dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, ctx->pos, dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); - ctx->pos = di->offset; if (!dir_emit(ctx, dentry->d_name.name, dentry->d_name.len, ceph_translate_ino(dentry->d_sb, dentry->d_inode->i_ino), @@ -198,19 +205,12 @@ more: return 0; } + ctx->pos = di->offset + 1; + if (last) dput(last); last = dentry; - ctx->pos++; - - /* make sure a dentry wasn't dropped while we didn't have parent lock */ - if (!ceph_dir_is_complete(dir)) { - dout(" lost dir complete on %p; falling back to mds\n", dir); - err = -EAGAIN; - goto out; - } - spin_lock(&parent->d_lock); p = p->prev; /* advance to next dentry */ goto more; @@ -296,6 +296,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) err = __dcache_readdir(file, ctx, shared_gen); if (err != -EAGAIN) return err; + frag = fpos_frag(ctx->pos); + off = fpos_off(ctx->pos); } else { spin_unlock(&ci->i_ceph_lock); } @@ -446,7 +448,6 @@ more: if (atomic_read(&ci->i_release_count) == fi->dir_release_count) { dout(" marking %p complete\n", inode); __ceph_dir_set_complete(ci, fi->dir_release_count); - ci->i_max_offset = ctx->pos; } spin_unlock(&ci->i_ceph_lock); @@ -935,14 +936,16 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, * to do it here. */ - /* d_move screws up d_subdirs order */ - ceph_dir_clear_complete(new_dir); - d_move(old_dentry, new_dentry); /* ensure target dentry is invalidated, despite rehashing bug in vfs_rename_dir */ ceph_invalidate_dentry_lease(new_dentry); + + /* d_move screws up sibling dentries' offsets */ + ceph_dir_clear_complete(old_dir); + ceph_dir_clear_complete(new_dir); + } ceph_mdsc_put_request(req); return err; diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 0b0728e..233c6f9 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -744,7 +744,6 @@ static int fill_inode(struct inode *inode, !__ceph_dir_is_complete(ci)) { dout(" marking %p complete (empty)\n", inode); __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count)); - ci->i_max_offset = 2; } no_change: /* only update max_size on auth cap */ @@ -890,41 +889,6 @@ out_unlock: } /* - * Set dentry's directory position based on the current dir's max, and - * order it in d_subdirs, so that dcache_readdir behaves. - * - * Always called under directory's i_mutex. - */ -static void ceph_set_dentry_offset(struct dentry *dn) -{ - struct dentry *dir = dn->d_parent; - struct inode *inode = dir->d_inode; - struct ceph_inode_info *ci; - struct ceph_dentry_info *di; - - BUG_ON(!inode); - - ci = ceph_inode(inode); - di = ceph_dentry(dn); - - spin_lock(&ci->i_ceph_lock); - if (!__ceph_dir_is_complete(ci)) { - spin_unlock(&ci->i_ceph_lock); - return; - } - di->offset = ceph_inode(inode)->i_max_offset++; - spin_unlock(&ci->i_ceph_lock); - - spin_lock(&dir->d_lock); - spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED); - list_move(&dn->d_u.d_child, &dir->d_subdirs); - dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset, - dn->d_u.d_child.prev, dn->d_u.d_child.next); - spin_unlock(&dn->d_lock); - spin_unlock(&dir->d_lock); -} - -/* * splice a dentry to an inode. * caller must hold directory i_mutex for this to be safe. * @@ -933,7 +897,7 @@ static void ceph_set_dentry_offset(struct dentry *dn) * the caller) if we fail. */ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, - bool *prehash, bool set_offset) + bool *prehash) { struct dentry *realdn; @@ -965,8 +929,6 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, } if ((!prehash || *prehash) && d_unhashed(dn)) d_rehash(dn); - if (set_offset) - ceph_set_dentry_offset(dn); out: return dn; } @@ -987,7 +949,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, { struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; struct inode *in = NULL; - struct ceph_mds_reply_inode *ininfo; struct ceph_vino vino; struct ceph_fs_client *fsc = ceph_sb_to_client(sb); int err = 0; @@ -1161,6 +1122,9 @@ retry_lookup: /* rename? */ if (req->r_old_dentry && req->r_op == CEPH_MDS_OP_RENAME) { + struct inode *olddir = req->r_old_dentry_dir; + BUG_ON(!olddir); + dout(" src %p '%.*s' dst %p '%.*s'\n", req->r_old_dentry, req->r_old_dentry->d_name.len, @@ -1180,13 +1144,10 @@ retry_lookup: rehashing bug in vfs_rename_dir */ ceph_invalidate_dentry_lease(dn); - /* - * d_move() puts the renamed dentry at the end of - * d_subdirs. We need to assign it an appropriate - * directory offset so we can behave when dir is - * complete. - */ - ceph_set_dentry_offset(req->r_old_dentry); + /* d_move screws up sibling dentries' offsets */ + ceph_dir_clear_complete(dir); + ceph_dir_clear_complete(olddir); + dout("dn %p gets new offset %lld\n", req->r_old_dentry, ceph_dentry(req->r_old_dentry)->offset); @@ -1213,8 +1174,9 @@ retry_lookup: /* attach proper inode */ if (!dn->d_inode) { + ceph_dir_clear_complete(dir); ihold(in); - dn = splice_dentry(dn, in, &have_lease, true); + dn = splice_dentry(dn, in, &have_lease); if (IS_ERR(dn)) { err = PTR_ERR(dn); goto done; @@ -1235,17 +1197,16 @@ retry_lookup: (req->r_op == CEPH_MDS_OP_LOOKUPSNAP || req->r_op == CEPH_MDS_OP_MKSNAP)) { struct dentry *dn = req->r_dentry; + struct inode *dir = req->r_locked_dir; /* fill out a snapdir LOOKUPSNAP dentry */ BUG_ON(!dn); - BUG_ON(!req->r_locked_dir); - BUG_ON(ceph_snap(req->r_locked_dir) != CEPH_SNAPDIR); - ininfo = rinfo->targeti.in; - vino.ino = le64_to_cpu(ininfo->ino); - vino.snap = le64_to_cpu(ininfo->snapid); + BUG_ON(!dir); + BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR); dout(" linking snapped dir %p to dn %p\n", in, dn); + ceph_dir_clear_complete(dir); ihold(in); - dn = splice_dentry(dn, in, NULL, true); + dn = splice_dentry(dn, in, NULL); if (IS_ERR(dn)) { err = PTR_ERR(dn); goto done; @@ -1407,7 +1368,7 @@ retry_lookup: } if (!dn->d_inode) { - dn = splice_dentry(dn, in, NULL, false); + dn = splice_dentry(dn, in, NULL); if (IS_ERR(dn)) { err = PTR_ERR(dn); dn = NULL; diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index fdf941b..a822a6e 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -109,6 +109,8 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg) return PTR_ERR(req); req->r_inode = inode; ihold(inode); + req->r_num_caps = 1; + req->r_inode_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL; req->r_args.setlayout.layout.fl_stripe_unit = @@ -153,6 +155,7 @@ static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg) return PTR_ERR(req); req->r_inode = inode; ihold(inode); + req->r_num_caps = 1; req->r_args.setlayout.layout.fl_stripe_unit = cpu_to_le32(l.stripe_unit); diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index d94ba0d..1913988 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -45,6 +45,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, return PTR_ERR(req); req->r_inode = inode; ihold(inode); + req->r_num_caps = 1; /* mds requires start and length rather than start and end */ if (LLONG_MAX == fl->fl_end) diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 7866cd0..ead05cc 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -266,7 +266,6 @@ struct ceph_inode_info { struct timespec i_rctime; u64 i_rbytes, i_rfiles, i_rsubdirs; u64 i_files, i_subdirs; - u64 i_max_offset; /* largest readdir offset, set with complete dir */ struct rb_root i_fragtree; struct mutex i_fragtree_mutex; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index aadc2b6..a22d667 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1737,6 +1737,9 @@ cifs_inode_needs_reval(struct inode *inode) if (cifs_i->time == 0) return true; + if (!cifs_sb->actimeo) + return true; + if (!time_in_range(jiffies, cifs_i->time, cifs_i->time + cifs_sb->actimeo)) return true; diff --git a/fs/compat.c b/fs/compat.c index ca926ad..66d3d3c 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -457,9 +457,9 @@ COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, case F_GETLK64: case F_SETLK64: case F_SETLKW64: - case F_GETLKP: - case F_SETLKP: - case F_SETLKPW: + case F_OFD_GETLK: + case F_OFD_SETLK: + case F_OFD_SETLKW: ret = get_compat_flock64(&f, compat_ptr(arg)); if (ret != 0) break; @@ -468,7 +468,7 @@ COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, conv_cmd = convert_fcntl_cmd(cmd); ret = sys_fcntl(fd, conv_cmd, (unsigned long)&f); set_fs(old_fs); - if ((conv_cmd == F_GETLK || conv_cmd == F_GETLKP) && ret == 0) { + if ((conv_cmd == F_GETLK || conv_cmd == F_OFD_GETLK) && ret == 0) { /* need to return lock information - see above for commentary */ if (f.l_start > COMPAT_LOFF_T_MAX) ret = -EOVERFLOW; @@ -493,9 +493,9 @@ COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, case F_GETLK64: case F_SETLK64: case F_SETLKW64: - case F_GETLKP: - case F_SETLKP: - case F_SETLKPW: + case F_OFD_GETLK: + case F_OFD_SETLK: + case F_OFD_SETLKW: return -EINVAL; } return compat_sys_fcntl64(fd, cmd, arg); diff --git a/fs/dcache.c b/fs/dcache.c index 40707d8..42ae01e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -246,16 +246,8 @@ static void __d_free(struct rcu_head *head) kmem_cache_free(dentry_cache, dentry); } -/* - * no locks, please. - */ -static void d_free(struct dentry *dentry) +static void dentry_free(struct dentry *dentry) { - BUG_ON((int)dentry->d_lockref.count > 0); - this_cpu_dec(nr_dentry); - if (dentry->d_op && dentry->d_op->d_release) - dentry->d_op->d_release(dentry); - /* if dentry was never visible to RCU, immediate free is OK */ if (!(dentry->d_flags & DCACHE_RCUACCESS)) __d_free(&dentry->d_u.d_rcu); @@ -403,56 +395,6 @@ static void dentry_lru_add(struct dentry *dentry) d_lru_add(dentry); } -/* - * Remove a dentry with references from the LRU. - * - * If we are on the shrink list, then we can get to try_prune_one_dentry() and - * lose our last reference through the parent walk. In this case, we need to - * remove ourselves from the shrink list, not the LRU. - */ -static void dentry_lru_del(struct dentry *dentry) -{ - if (dentry->d_flags & DCACHE_LRU_LIST) { - if (dentry->d_flags & DCACHE_SHRINK_LIST) - return d_shrink_del(dentry); - d_lru_del(dentry); - } -} - -/** - * d_kill - kill dentry and return parent - * @dentry: dentry to kill - * @parent: parent dentry - * - * The dentry must already be unhashed and removed from the LRU. - * - * If this is the root of the dentry tree, return NULL. - * - * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by - * d_kill. - */ -static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) - __releases(dentry->d_lock) - __releases(parent->d_lock) - __releases(dentry->d_inode->i_lock) -{ - list_del(&dentry->d_u.d_child); - /* - * Inform d_walk() that we are no longer attached to the - * dentry tree - */ - dentry->d_flags |= DCACHE_DENTRY_KILLED; - if (parent) - spin_unlock(&parent->d_lock); - dentry_iput(dentry); - /* - * dentry_iput drops the locks, at which point nobody (except - * transient RCU lookups) can reach this dentry. - */ - d_free(dentry); - return parent; -} - /** * d_drop - drop a dentry * @dentry: dentry to drop @@ -510,7 +452,14 @@ dentry_kill(struct dentry *dentry, int unlock_on_failure) __releases(dentry->d_lock) { struct inode *inode; - struct dentry *parent; + struct dentry *parent = NULL; + bool can_free = true; + + if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) { + can_free = dentry->d_flags & DCACHE_MAY_FREE; + spin_unlock(&dentry->d_lock); + goto out; + } inode = dentry->d_inode; if (inode && !spin_trylock(&inode->i_lock)) { @@ -521,9 +470,7 @@ relock: } return dentry; /* try again with same dentry */ } - if (IS_ROOT(dentry)) - parent = NULL; - else + if (!IS_ROOT(dentry)) parent = dentry->d_parent; if (parent && !spin_trylock(&parent->d_lock)) { if (inode) @@ -543,10 +490,40 @@ relock: if ((dentry->d_flags & DCACHE_OP_PRUNE) && !d_unhashed(dentry)) dentry->d_op->d_prune(dentry); - dentry_lru_del(dentry); + if (dentry->d_flags & DCACHE_LRU_LIST) { + if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) + d_lru_del(dentry); + } /* if it was on the hash then remove it */ __d_drop(dentry); - return d_kill(dentry, parent); + list_del(&dentry->d_u.d_child); + /* + * Inform d_walk() that we are no longer attached to the + * dentry tree + */ + dentry->d_flags |= DCACHE_DENTRY_KILLED; + if (parent) + spin_unlock(&parent->d_lock); + dentry_iput(dentry); + /* + * dentry_iput drops the locks, at which point nobody (except + * transient RCU lookups) can reach this dentry. + */ + BUG_ON((int)dentry->d_lockref.count > 0); + this_cpu_dec(nr_dentry); + if (dentry->d_op && dentry->d_op->d_release) + dentry->d_op->d_release(dentry); + + spin_lock(&dentry->d_lock); + if (dentry->d_flags & DCACHE_SHRINK_LIST) { + dentry->d_flags |= DCACHE_MAY_FREE; + can_free = false; + } + spin_unlock(&dentry->d_lock); +out: + if (likely(can_free)) + dentry_free(dentry); + return parent; } /* @@ -815,65 +792,13 @@ restart: } EXPORT_SYMBOL(d_prune_aliases); -/* - * Try to throw away a dentry - free the inode, dput the parent. - * Requires dentry->d_lock is held, and dentry->d_count == 0. - * Releases dentry->d_lock. - * - * This may fail if locks cannot be acquired no problem, just try again. - */ -static struct dentry * try_prune_one_dentry(struct dentry *dentry) - __releases(dentry->d_lock) -{ - struct dentry *parent; - - parent = dentry_kill(dentry, 0); - /* - * If dentry_kill returns NULL, we have nothing more to do. - * if it returns the same dentry, trylocks failed. In either - * case, just loop again. - * - * Otherwise, we need to prune ancestors too. This is necessary - * to prevent quadratic behavior of shrink_dcache_parent(), but - * is also expected to be beneficial in reducing dentry cache - * fragmentation. - */ - if (!parent) - return NULL; - if (parent == dentry) - return dentry; - - /* Prune ancestors. */ - dentry = parent; - while (dentry) { - if (lockref_put_or_lock(&dentry->d_lockref)) - return NULL; - dentry = dentry_kill(dentry, 1); - } - return NULL; -} - static void shrink_dentry_list(struct list_head *list) { - struct dentry *dentry; + struct dentry *dentry, *parent; - rcu_read_lock(); - for (;;) { - dentry = list_entry_rcu(list->prev, struct dentry, d_lru); - if (&dentry->d_lru == list) - break; /* empty */ - - /* - * Get the dentry lock, and re-verify that the dentry is - * this on the shrinking list. If it is, we know that - * DCACHE_SHRINK_LIST and DCACHE_LRU_LIST are set. - */ + while (!list_empty(list)) { + dentry = list_entry(list->prev, struct dentry, d_lru); spin_lock(&dentry->d_lock); - if (dentry != list_entry(list->prev, struct dentry, d_lru)) { - spin_unlock(&dentry->d_lock); - continue; - } - /* * The dispose list is isolated and dentries are not accounted * to the LRU here, so we can simply remove it from the list @@ -885,30 +810,38 @@ static void shrink_dentry_list(struct list_head *list) * We found an inuse dentry which was not removed from * the LRU because of laziness during lookup. Do not free it. */ - if (dentry->d_lockref.count) { + if ((int)dentry->d_lockref.count > 0) { spin_unlock(&dentry->d_lock); continue; } - rcu_read_unlock(); + parent = dentry_kill(dentry, 0); /* - * If 'try_to_prune()' returns a dentry, it will - * be the same one we passed in, and d_lock will - * have been held the whole time, so it will not - * have been added to any other lists. We failed - * to get the inode lock. - * - * We just add it back to the shrink list. + * If dentry_kill returns NULL, we have nothing more to do. */ - dentry = try_prune_one_dentry(dentry); + if (!parent) + continue; - rcu_read_lock(); - if (dentry) { + if (unlikely(parent == dentry)) { + /* + * trylocks have failed and d_lock has been held the + * whole time, so it could not have been added to any + * other lists. Just add it back to the shrink list. + */ d_shrink_add(dentry, list); spin_unlock(&dentry->d_lock); + continue; } + /* + * We need to prune ancestors too. This is necessary to prevent + * quadratic behavior of shrink_dcache_parent(), but is also + * expected to be beneficial in reducing dentry cache + * fragmentation. + */ + dentry = parent; + while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) + dentry = dentry_kill(dentry, 1); } - rcu_read_unlock(); } static enum lru_status @@ -1261,34 +1194,23 @@ static enum d_walk_ret select_collect(void *_data, struct dentry *dentry) if (data->start == dentry) goto out; - /* - * move only zero ref count dentries to the dispose list. - * - * Those which are presently on the shrink list, being processed - * by shrink_dentry_list(), shouldn't be moved. Otherwise the - * loop in shrink_dcache_parent() might not make any progress - * and loop forever. - */ - if (dentry->d_lockref.count) { - dentry_lru_del(dentry); - } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { - /* - * We can't use d_lru_shrink_move() because we - * need to get the global LRU lock and do the - * LRU accounting. - */ - d_lru_del(dentry); - d_shrink_add(dentry, &data->dispose); + if (dentry->d_flags & DCACHE_SHRINK_LIST) { data->found++; - ret = D_WALK_NORETRY; + } else { + if (dentry->d_flags & DCACHE_LRU_LIST) + d_lru_del(dentry); + if (!dentry->d_lockref.count) { + d_shrink_add(dentry, &data->dispose); + data->found++; + } } /* * We can return to the caller if we have found some (this * ensures forward progress). We'll be coming back to find * the rest. */ - if (data->found && need_resched()) - ret = D_WALK_QUIT; + if (!list_empty(&data->dispose)) + ret = need_resched() ? D_WALK_QUIT : D_WALK_NORETRY; out: return ret; } @@ -1318,45 +1240,35 @@ void shrink_dcache_parent(struct dentry *parent) } EXPORT_SYMBOL(shrink_dcache_parent); -static enum d_walk_ret umount_collect(void *_data, struct dentry *dentry) +static enum d_walk_ret umount_check(void *_data, struct dentry *dentry) { - struct select_data *data = _data; - enum d_walk_ret ret = D_WALK_CONTINUE; + /* it has busy descendents; complain about those instead */ + if (!list_empty(&dentry->d_subdirs)) + return D_WALK_CONTINUE; - if (dentry->d_lockref.count) { - dentry_lru_del(dentry); - if (likely(!list_empty(&dentry->d_subdirs))) - goto out; - if (dentry == data->start && dentry->d_lockref.count == 1) - goto out; - printk(KERN_ERR - "BUG: Dentry %p{i=%lx,n=%s}" - " still in use (%d)" - " [unmount of %s %s]\n", + /* root with refcount 1 is fine */ + if (dentry == _data && dentry->d_lockref.count == 1) + return D_WALK_CONTINUE; + + printk(KERN_ERR "BUG: Dentry %p{i=%lx,n=%pd} " + " still in use (%d) [unmount of %s %s]\n", dentry, dentry->d_inode ? dentry->d_inode->i_ino : 0UL, - dentry->d_name.name, + dentry, dentry->d_lockref.count, dentry->d_sb->s_type->name, dentry->d_sb->s_id); - BUG(); - } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) { - /* - * We can't use d_lru_shrink_move() because we - * need to get the global LRU lock and do the - * LRU accounting. - */ - if (dentry->d_flags & DCACHE_LRU_LIST) - d_lru_del(dentry); - d_shrink_add(dentry, &data->dispose); - data->found++; - ret = D_WALK_NORETRY; - } -out: - if (data->found && need_resched()) - ret = D_WALK_QUIT; - return ret; + WARN_ON(1); + return D_WALK_CONTINUE; +} + +static void do_one_tree(struct dentry *dentry) +{ + shrink_dcache_parent(dentry); + d_walk(dentry, dentry, umount_check, NULL); + d_drop(dentry); + dput(dentry); } /* @@ -1366,40 +1278,15 @@ void shrink_dcache_for_umount(struct super_block *sb) { struct dentry *dentry; - if (down_read_trylock(&sb->s_umount)) - BUG(); + WARN(down_read_trylock(&sb->s_umount), "s_umount should've been locked"); dentry = sb->s_root; sb->s_root = NULL; - for (;;) { - struct select_data data; - - INIT_LIST_HEAD(&data.dispose); - data.start = dentry; - data.found = 0; - - d_walk(dentry, &data, umount_collect, NULL); - if (!data.found) - break; - - shrink_dentry_list(&data.dispose); - cond_resched(); - } - d_drop(dentry); - dput(dentry); + do_one_tree(dentry); while (!hlist_bl_empty(&sb->s_anon)) { - struct select_data data; - dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash); - - INIT_LIST_HEAD(&data.dispose); - data.start = NULL; - data.found = 0; - - d_walk(dentry, &data, umount_collect, NULL); - if (data.found) - shrink_dentry_list(&data.dispose); - cond_resched(); + dentry = dget(hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash)); + do_one_tree(dentry); } } @@ -1647,8 +1534,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) unsigned add_flags = d_flags_for_inode(inode); spin_lock(&dentry->d_lock); - dentry->d_flags &= ~DCACHE_ENTRY_TYPE; - dentry->d_flags |= add_flags; + __d_set_type(dentry, add_flags); if (inode) hlist_add_head(&dentry->d_alias, &inode->i_dentry); dentry->d_inode = inode; @@ -657,10 +657,10 @@ int setup_arg_pages(struct linux_binprm *bprm, unsigned long rlim_stack; #ifdef CONFIG_STACK_GROWSUP - /* Limit stack size to 1GB */ + /* Limit stack size */ stack_base = rlimit_max(RLIMIT_STACK); - if (stack_base > (1 << 30)) - stack_base = 1 << 30; + if (stack_base > STACK_SIZE_MAX) + stack_base = STACK_SIZE_MAX; /* Make sure we didn't let the argument array grow too large. */ if (vma->vm_end - vma->vm_start > stack_base) @@ -274,15 +274,15 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, break; #if BITS_PER_LONG != 32 /* 32-bit arches must use fcntl64() */ - case F_GETLKP: + case F_OFD_GETLK: #endif case F_GETLK: err = fcntl_getlk(filp, cmd, (struct flock __user *) arg); break; #if BITS_PER_LONG != 32 /* 32-bit arches must use fcntl64() */ - case F_SETLKP: - case F_SETLKPW: + case F_OFD_SETLK: + case F_OFD_SETLKW: #endif /* Fallthrough */ case F_SETLK: @@ -399,13 +399,13 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, switch (cmd) { case F_GETLK64: - case F_GETLKP: + case F_OFD_GETLK: err = fcntl_getlk64(f.file, cmd, (struct flock64 __user *) arg); break; case F_SETLK64: case F_SETLKW64: - case F_SETLKP: - case F_SETLKPW: + case F_OFD_SETLK: + case F_OFD_SETLKW: err = fcntl_setlk64(fd, f.file, cmd, (struct flock64 __user *) arg); break; diff --git a/fs/fuse/control.c b/fs/fuse/control.c index a0b0855..205e0d5 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -348,7 +348,7 @@ int __init fuse_ctl_init(void) return register_filesystem(&fuse_ctl_fs_type); } -void fuse_ctl_cleanup(void) +void __exit fuse_ctl_cleanup(void) { unregister_filesystem(&fuse_ctl_fs_type); } diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 5b4e035..4219835 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -679,6 +679,14 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry, return create_new_entry(fc, req, dir, entry, S_IFLNK); } +static inline void fuse_update_ctime(struct inode *inode) +{ + if (!IS_NOCMTIME(inode)) { + inode->i_ctime = current_fs_time(inode->i_sb); + mark_inode_dirty_sync(inode); + } +} + static int fuse_unlink(struct inode *dir, struct dentry *entry) { int err; @@ -713,6 +721,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) fuse_invalidate_attr(inode); fuse_invalidate_attr(dir); fuse_invalidate_entry_cache(entry); + fuse_update_ctime(inode); } else if (err == -EINTR) fuse_invalidate_entry(entry); return err; @@ -743,23 +752,26 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) return err; } -static int fuse_rename(struct inode *olddir, struct dentry *oldent, - struct inode *newdir, struct dentry *newent) +static int fuse_rename_common(struct inode *olddir, struct dentry *oldent, + struct inode *newdir, struct dentry *newent, + unsigned int flags, int opcode, size_t argsize) { int err; - struct fuse_rename_in inarg; + struct fuse_rename2_in inarg; struct fuse_conn *fc = get_fuse_conn(olddir); - struct fuse_req *req = fuse_get_req_nopages(fc); + struct fuse_req *req; + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); - memset(&inarg, 0, sizeof(inarg)); + memset(&inarg, 0, argsize); inarg.newdir = get_node_id(newdir); - req->in.h.opcode = FUSE_RENAME; + inarg.flags = flags; + req->in.h.opcode = opcode; req->in.h.nodeid = get_node_id(olddir); req->in.numargs = 3; - req->in.args[0].size = sizeof(inarg); + req->in.args[0].size = argsize; req->in.args[0].value = &inarg; req->in.args[1].size = oldent->d_name.len + 1; req->in.args[1].value = oldent->d_name.name; @@ -771,15 +783,22 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent, if (!err) { /* ctime changes */ fuse_invalidate_attr(oldent->d_inode); + fuse_update_ctime(oldent->d_inode); + + if (flags & RENAME_EXCHANGE) { + fuse_invalidate_attr(newent->d_inode); + fuse_update_ctime(newent->d_inode); + } fuse_invalidate_attr(olddir); if (olddir != newdir) fuse_invalidate_attr(newdir); /* newent will end up negative */ - if (newent->d_inode) { + if (!(flags & RENAME_EXCHANGE) && newent->d_inode) { fuse_invalidate_attr(newent->d_inode); fuse_invalidate_entry_cache(newent); + fuse_update_ctime(newent->d_inode); } } else if (err == -EINTR) { /* If request was interrupted, DEITY only knows if the @@ -795,6 +814,36 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent, return err; } +static int fuse_rename(struct inode *olddir, struct dentry *oldent, + struct inode *newdir, struct dentry *newent) +{ + return fuse_rename_common(olddir, oldent, newdir, newent, 0, + FUSE_RENAME, sizeof(struct fuse_rename_in)); +} + +static int fuse_rename2(struct inode *olddir, struct dentry *oldent, + struct inode *newdir, struct dentry *newent, + unsigned int flags) +{ + struct fuse_conn *fc = get_fuse_conn(olddir); + int err; + + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) + return -EINVAL; + + if (fc->no_rename2 || fc->minor < 23) + return -EINVAL; + + err = fuse_rename_common(olddir, oldent, newdir, newent, flags, + FUSE_RENAME2, sizeof(struct fuse_rename2_in)); + if (err == -ENOSYS) { + fc->no_rename2 = 1; + err = -EINVAL; + } + return err; + +} + static int fuse_link(struct dentry *entry, struct inode *newdir, struct dentry *newent) { @@ -829,6 +878,7 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, inc_nlink(inode); spin_unlock(&fc->lock); fuse_invalidate_attr(inode); + fuse_update_ctime(inode); } else if (err == -EINTR) { fuse_invalidate_attr(inode); } @@ -846,6 +896,8 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, attr->size = i_size_read(inode); attr->mtime = inode->i_mtime.tv_sec; attr->mtimensec = inode->i_mtime.tv_nsec; + attr->ctime = inode->i_ctime.tv_sec; + attr->ctimensec = inode->i_ctime.tv_nsec; } stat->dev = inode->i_sb->s_dev; @@ -1504,7 +1556,7 @@ static bool update_mtime(unsigned ivalid, bool trust_local_mtime) } static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg, - bool trust_local_mtime) + bool trust_local_cmtime) { unsigned ivalid = iattr->ia_valid; @@ -1523,13 +1575,18 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg, if (!(ivalid & ATTR_ATIME_SET)) arg->valid |= FATTR_ATIME_NOW; } - if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_mtime)) { + if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) { arg->valid |= FATTR_MTIME; arg->mtime = iattr->ia_mtime.tv_sec; arg->mtimensec = iattr->ia_mtime.tv_nsec; - if (!(ivalid & ATTR_MTIME_SET) && !trust_local_mtime) + if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime) arg->valid |= FATTR_MTIME_NOW; } + if ((ivalid & ATTR_CTIME) && trust_local_cmtime) { + arg->valid |= FATTR_CTIME; + arg->ctime = iattr->ia_ctime.tv_sec; + arg->ctimensec = iattr->ia_ctime.tv_nsec; + } } /* @@ -1597,39 +1654,38 @@ static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_req *req, /* * Flush inode->i_mtime to the server */ -int fuse_flush_mtime(struct file *file, bool nofail) +int fuse_flush_times(struct inode *inode, struct fuse_file *ff) { - struct inode *inode = file->f_mapping->host; - struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_req *req = NULL; + struct fuse_req *req; struct fuse_setattr_in inarg; struct fuse_attr_out outarg; int err; - if (nofail) { - req = fuse_get_req_nofail_nopages(fc, file); - } else { - req = fuse_get_req_nopages(fc); - if (IS_ERR(req)) - return PTR_ERR(req); - } + req = fuse_get_req_nopages(fc); + if (IS_ERR(req)) + return PTR_ERR(req); memset(&inarg, 0, sizeof(inarg)); memset(&outarg, 0, sizeof(outarg)); - inarg.valid |= FATTR_MTIME; + inarg.valid = FATTR_MTIME; inarg.mtime = inode->i_mtime.tv_sec; inarg.mtimensec = inode->i_mtime.tv_nsec; - + if (fc->minor >= 23) { + inarg.valid |= FATTR_CTIME; + inarg.ctime = inode->i_ctime.tv_sec; + inarg.ctimensec = inode->i_ctime.tv_nsec; + } + if (ff) { + inarg.valid |= FATTR_FH; + inarg.fh = ff->fh; + } fuse_setattr_fill(fc, req, inode, &inarg, &outarg); fuse_request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); - if (!err) - clear_bit(FUSE_I_MTIME_DIRTY, &fi->state); - return err; } @@ -1653,7 +1709,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, bool is_wb = fc->writeback_cache; loff_t oldsize; int err; - bool trust_local_mtime = is_wb && S_ISREG(inode->i_mode); + bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode); if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) attr->ia_valid |= ATTR_FORCE; @@ -1678,11 +1734,13 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, if (is_truncate) { fuse_set_nowrite(inode); set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); + if (trust_local_cmtime && attr->ia_size != inode->i_size) + attr->ia_valid |= ATTR_MTIME | ATTR_CTIME; } memset(&inarg, 0, sizeof(inarg)); memset(&outarg, 0, sizeof(outarg)); - iattr_to_fattr(attr, &inarg, trust_local_mtime); + iattr_to_fattr(attr, &inarg, trust_local_cmtime); if (file) { struct fuse_file *ff = file->private_data; inarg.valid |= FATTR_FH; @@ -1711,9 +1769,12 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, spin_lock(&fc->lock); /* the kernel maintains i_mtime locally */ - if (trust_local_mtime && (attr->ia_valid & ATTR_MTIME)) { - inode->i_mtime = attr->ia_mtime; - clear_bit(FUSE_I_MTIME_DIRTY, &fi->state); + if (trust_local_cmtime) { + if (attr->ia_valid & ATTR_MTIME) + inode->i_mtime = attr->ia_mtime; + if (attr->ia_valid & ATTR_CTIME) + inode->i_ctime = attr->ia_ctime; + /* FIXME: clear I_DIRTY_SYNC? */ } fuse_change_attributes_common(inode, &outarg.attr, @@ -1810,8 +1871,10 @@ static int fuse_setxattr(struct dentry *entry, const char *name, fc->no_setxattr = 1; err = -EOPNOTSUPP; } - if (!err) + if (!err) { fuse_invalidate_attr(inode); + fuse_update_ctime(inode); + } return err; } @@ -1941,20 +2004,11 @@ static int fuse_removexattr(struct dentry *entry, const char *name) fc->no_removexattr = 1; err = -EOPNOTSUPP; } - if (!err) + if (!err) { fuse_invalidate_attr(inode); - return err; -} - -static int fuse_update_time(struct inode *inode, struct timespec *now, - int flags) -{ - if (flags & S_MTIME) { - inode->i_mtime = *now; - set_bit(FUSE_I_MTIME_DIRTY, &get_fuse_inode(inode)->state); - BUG_ON(!S_ISREG(inode->i_mode)); + fuse_update_ctime(inode); } - return 0; + return err; } static const struct inode_operations fuse_dir_inode_operations = { @@ -1964,6 +2018,7 @@ static const struct inode_operations fuse_dir_inode_operations = { .unlink = fuse_unlink, .rmdir = fuse_rmdir, .rename = fuse_rename, + .rename2 = fuse_rename2, .link = fuse_link, .setattr = fuse_setattr, .create = fuse_create, @@ -1996,7 +2051,6 @@ static const struct inode_operations fuse_common_inode_operations = { .getxattr = fuse_getxattr, .listxattr = fuse_listxattr, .removexattr = fuse_removexattr, - .update_time = fuse_update_time, }; static const struct inode_operations fuse_symlink_inode_operations = { diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 13f8bde..96d513e 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -223,6 +223,8 @@ void fuse_finish_open(struct inode *inode, struct file *file) i_size_write(inode, 0); spin_unlock(&fc->lock); fuse_invalidate_attr(inode); + if (fc->writeback_cache) + file_update_time(file); } if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache) fuse_link_write_file(file); @@ -232,18 +234,26 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir) { struct fuse_conn *fc = get_fuse_conn(inode); int err; + bool lock_inode = (file->f_flags & O_TRUNC) && + fc->atomic_o_trunc && + fc->writeback_cache; err = generic_file_open(inode, file); if (err) return err; + if (lock_inode) + mutex_lock(&inode->i_mutex); + err = fuse_do_open(fc, get_node_id(inode), file, isdir); - if (err) - return err; - fuse_finish_open(inode, file); + if (!err) + fuse_finish_open(inode, file); - return 0; + if (lock_inode) + mutex_unlock(&inode->i_mutex); + + return err; } static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode) @@ -314,10 +324,7 @@ static int fuse_release(struct inode *inode, struct file *file) /* see fuse_vma_close() for !writeback_cache case */ if (fc->writeback_cache) - filemap_write_and_wait(file->f_mapping); - - if (test_bit(FUSE_I_MTIME_DIRTY, &get_fuse_inode(inode)->state)) - fuse_flush_mtime(file, true); + write_inode_now(inode, 1); fuse_release_common(file, FUSE_RELEASE); @@ -439,7 +446,7 @@ static int fuse_flush(struct file *file, fl_owner_t id) if (fc->no_flush) return 0; - err = filemap_write_and_wait(file->f_mapping); + err = write_inode_now(inode, 1); if (err) return err; @@ -480,13 +487,6 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end, if (is_bad_inode(inode)) return -EIO; - err = filemap_write_and_wait_range(inode->i_mapping, start, end); - if (err) - return err; - - if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir)) - return 0; - mutex_lock(&inode->i_mutex); /* @@ -494,17 +494,17 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end, * wait for all outstanding writes, before sending the FSYNC * request. */ - err = write_inode_now(inode, 0); + err = filemap_write_and_wait_range(inode->i_mapping, start, end); if (err) goto out; fuse_sync_writes(inode); + err = sync_inode_metadata(inode, 1); + if (err) + goto out; - if (test_bit(FUSE_I_MTIME_DIRTY, &get_fuse_inode(inode)->state)) { - int err = fuse_flush_mtime(file, false); - if (err) - goto out; - } + if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir)) + goto out; req = fuse_get_req_nopages(fc); if (IS_ERR(req)) { @@ -1659,13 +1659,13 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req) fuse_writepage_free(fc, req); } -static struct fuse_file *fuse_write_file_get(struct fuse_conn *fc, - struct fuse_inode *fi) +static struct fuse_file *__fuse_write_file_get(struct fuse_conn *fc, + struct fuse_inode *fi) { struct fuse_file *ff = NULL; spin_lock(&fc->lock); - if (!WARN_ON(list_empty(&fi->write_files))) { + if (!list_empty(&fi->write_files)) { ff = list_entry(fi->write_files.next, struct fuse_file, write_entry); fuse_file_get(ff); @@ -1675,6 +1675,29 @@ static struct fuse_file *fuse_write_file_get(struct fuse_conn *fc, return ff; } +static struct fuse_file *fuse_write_file_get(struct fuse_conn *fc, + struct fuse_inode *fi) +{ + struct fuse_file *ff = __fuse_write_file_get(fc, fi); + WARN_ON(!ff); + return ff; +} + +int fuse_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); + struct fuse_file *ff; + int err; + + ff = __fuse_write_file_get(fc, fi); + err = fuse_flush_times(inode, ff); + if (ff) + fuse_file_put(ff, 0); + + return err; +} + static int fuse_writepage_locked(struct page *page) { struct address_space *mapping = page->mapping; @@ -2972,6 +2995,9 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) || (mode & FALLOC_FL_PUNCH_HOLE); + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) + return -EOPNOTSUPP; + if (fc->no_fallocate) return -EOPNOTSUPP; @@ -3017,12 +3043,8 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, if (!(mode & FALLOC_FL_KEEP_SIZE)) { bool changed = fuse_write_update_size(inode, offset + length); - if (changed && fc->writeback_cache) { - struct fuse_inode *fi = get_fuse_inode(inode); - - inode->i_mtime = current_fs_time(inode->i_sb); - set_bit(FUSE_I_MTIME_DIRTY, &fi->state); - } + if (changed && fc->writeback_cache) + file_update_time(file); } if (mode & FALLOC_FL_PUNCH_HOLE) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index a257ed8e..7aa5c75 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -119,8 +119,6 @@ enum { FUSE_I_INIT_RDPLUS, /** An operation changing file size is in progress */ FUSE_I_SIZE_UNSTABLE, - /** i_mtime has been updated locally; a flush to userspace needed */ - FUSE_I_MTIME_DIRTY, }; struct fuse_conn; @@ -544,6 +542,9 @@ struct fuse_conn { /** Is fallocate not implemented by fs? */ unsigned no_fallocate:1; + /** Is rename with flags implemented by fs? */ + unsigned no_rename2:1; + /** Use enhanced/automatic page cache invalidation. */ unsigned auto_inval_data:1; @@ -725,7 +726,7 @@ int fuse_dev_init(void); void fuse_dev_cleanup(void); int fuse_ctl_init(void); -void fuse_ctl_cleanup(void); +void __exit fuse_ctl_cleanup(void); /** * Allocate a request @@ -891,7 +892,8 @@ int fuse_dev_release(struct inode *inode, struct file *file); bool fuse_write_update_size(struct inode *inode, loff_t pos); -int fuse_flush_mtime(struct file *file, bool nofail); +int fuse_flush_times(struct inode *inode, struct fuse_file *ff); +int fuse_write_inode(struct inode *inode, struct writeback_control *wbc); int fuse_do_setattr(struct inode *inode, struct iattr *attr, struct file *file); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 8d61169..754dcf2 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -175,9 +175,9 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) { inode->i_mtime.tv_sec = attr->mtime; inode->i_mtime.tv_nsec = attr->mtimensec; + inode->i_ctime.tv_sec = attr->ctime; + inode->i_ctime.tv_nsec = attr->ctimensec; } - inode->i_ctime.tv_sec = attr->ctime; - inode->i_ctime.tv_nsec = attr->ctimensec; if (attr->blksize != 0) inode->i_blkbits = ilog2(attr->blksize); @@ -256,6 +256,8 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr) inode->i_size = attr->size; inode->i_mtime.tv_sec = attr->mtime; inode->i_mtime.tv_nsec = attr->mtimensec; + inode->i_ctime.tv_sec = attr->ctime; + inode->i_ctime.tv_nsec = attr->ctimensec; if (S_ISREG(inode->i_mode)) { fuse_init_common(inode); fuse_init_file_inode(inode); @@ -303,7 +305,7 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, if ((inode->i_state & I_NEW)) { inode->i_flags |= S_NOATIME; - if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) + if (!fc->writeback_cache || !S_ISREG(attr->mode)) inode->i_flags |= S_NOCMTIME; inode->i_generation = generation; inode->i_data.backing_dev_info = &fc->bdi; @@ -788,6 +790,7 @@ static const struct super_operations fuse_super_operations = { .alloc_inode = fuse_alloc_inode, .destroy_inode = fuse_destroy_inode, .evict_inode = fuse_evict_inode, + .write_inode = fuse_write_inode, .drop_inode = generic_delete_inode, .remount_fs = fuse_remount_fs, .put_super = fuse_put_super, @@ -890,6 +893,11 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->async_dio = 1; if (arg->flags & FUSE_WRITEBACK_CACHE) fc->writeback_cache = 1; + if (arg->time_gran && arg->time_gran <= 1000000000) + fc->sb->s_time_gran = arg->time_gran; + else + fc->sb->s_time_gran = 1000000000; + } else { ra_pages = fc->max_read / PAGE_CACHE_SIZE; fc->no_lock = 1; @@ -996,7 +1004,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) if (sb->s_flags & MS_MANDLOCK) goto err; - sb->s_flags &= ~MS_NOSEC; + sb->s_flags &= ~(MS_NOSEC | MS_I_VERSION); if (!parse_fuse_opt((char *) data, &d, is_bdev)) goto err; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 2040275..e19d4c0 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -1030,6 +1030,11 @@ static int __init init_hugetlbfs_fs(void) int error; int i; + if (!hugepages_supported()) { + pr_info("hugetlbfs: disabling because there are no supported hugepage sizes\n"); + return -ENOTSUPP; + } + error = bdi_init(&hugetlbfs_backing_dev_info); if (error) return error; diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 78f3403..ac127cd 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -232,9 +232,6 @@ static int kernfs_link_sibling(struct kernfs_node *kn) struct rb_node **node = &kn->parent->dir.children.rb_node; struct rb_node *parent = NULL; - if (kernfs_type(kn) == KERNFS_DIR) - kn->parent->dir.subdirs++; - while (*node) { struct kernfs_node *pos; int result; @@ -249,9 +246,15 @@ static int kernfs_link_sibling(struct kernfs_node *kn) else return -EEXIST; } + /* add new node and rebalance the tree */ rb_link_node(&kn->rb, parent, node); rb_insert_color(&kn->rb, &kn->parent->dir.children); + + /* successfully added, account subdir number */ + if (kernfs_type(kn) == KERNFS_DIR) + kn->parent->dir.subdirs++; + return 0; } diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index 8034706..5e9a80c 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -484,6 +484,8 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma) ops = kernfs_ops(of->kn); rc = ops->mmap(of, vma); + if (rc) + goto out_put; /* * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup() @@ -608,6 +610,7 @@ static void kernfs_put_open_node(struct kernfs_node *kn, static int kernfs_fop_open(struct inode *inode, struct file *file) { struct kernfs_node *kn = file->f_path.dentry->d_fsdata; + struct kernfs_root *root = kernfs_root(kn); const struct kernfs_ops *ops; struct kernfs_open_file *of; bool has_read, has_write, has_mmap; @@ -622,14 +625,16 @@ static int kernfs_fop_open(struct inode *inode, struct file *file) has_write = ops->write || ops->mmap; has_mmap = ops->mmap; - /* check perms and supported operations */ - if ((file->f_mode & FMODE_WRITE) && - (!(inode->i_mode & S_IWUGO) || !has_write)) - goto err_out; + /* see the flag definition for details */ + if (root->flags & KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK) { + if ((file->f_mode & FMODE_WRITE) && + (!(inode->i_mode & S_IWUGO) || !has_write)) + goto err_out; - if ((file->f_mode & FMODE_READ) && - (!(inode->i_mode & S_IRUGO) || !has_read)) - goto err_out; + if ((file->f_mode & FMODE_READ) && + (!(inode->i_mode & S_IRUGO) || !has_read)) + goto err_out; + } /* allocate a kernfs_open_file for the file */ error = -ENOMEM; @@ -135,7 +135,7 @@ #define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) #define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK) #define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG)) -#define IS_FILE_PVT(fl) (fl->fl_flags & FL_FILE_PVT) +#define IS_OFDLCK(fl) (fl->fl_flags & FL_OFDLCK) static bool lease_breaking(struct file_lock *fl) { @@ -389,18 +389,6 @@ static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl, fl->fl_ops = NULL; fl->fl_lmops = NULL; - /* Ensure that fl->fl_filp has compatible f_mode */ - switch (l->l_type) { - case F_RDLCK: - if (!(filp->f_mode & FMODE_READ)) - return -EBADF; - break; - case F_WRLCK: - if (!(filp->f_mode & FMODE_WRITE)) - return -EBADF; - break; - } - return assign_type(fl, l->l_type); } @@ -564,7 +552,7 @@ static void __locks_insert_block(struct file_lock *blocker, BUG_ON(!list_empty(&waiter->fl_block)); waiter->fl_next = blocker; list_add_tail(&waiter->fl_block, &blocker->fl_block); - if (IS_POSIX(blocker) && !IS_FILE_PVT(blocker)) + if (IS_POSIX(blocker) && !IS_OFDLCK(blocker)) locks_insert_global_blocked(waiter); } @@ -759,12 +747,12 @@ EXPORT_SYMBOL(posix_test_lock); * of tasks (such as posix threads) sharing the same open file table. * To handle those cases, we just bail out after a few iterations. * - * For FL_FILE_PVT locks, the owner is the filp, not the files_struct. + * For FL_OFDLCK locks, the owner is the filp, not the files_struct. * Because the owner is not even nominally tied to a thread of * execution, the deadlock detection below can't reasonably work well. Just * skip it for those. * - * In principle, we could do a more limited deadlock detection on FL_FILE_PVT + * In principle, we could do a more limited deadlock detection on FL_OFDLCK * locks that just checks for the case where two tasks are attempting to * upgrade from read to write locks on the same inode. */ @@ -791,9 +779,9 @@ static int posix_locks_deadlock(struct file_lock *caller_fl, /* * This deadlock detector can't reasonably detect deadlocks with - * FL_FILE_PVT locks, since they aren't owned by a process, per-se. + * FL_OFDLCK locks, since they aren't owned by a process, per-se. */ - if (IS_FILE_PVT(caller_fl)) + if (IS_OFDLCK(caller_fl)) return 0; while ((block_fl = what_owner_is_waiting_for(block_fl))) { @@ -1391,11 +1379,10 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) restart: break_time = flock->fl_break_time; - if (break_time != 0) { + if (break_time != 0) break_time -= jiffies; - if (break_time == 0) - break_time++; - } + if (break_time == 0) + break_time++; locks_insert_block(flock, new_fl); spin_unlock(&inode->i_lock); error = wait_event_interruptible_timeout(new_fl->fl_wait, @@ -1891,7 +1878,7 @@ EXPORT_SYMBOL_GPL(vfs_test_lock); static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl) { - flock->l_pid = IS_FILE_PVT(fl) ? -1 : fl->fl_pid; + flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid; #if BITS_PER_LONG == 32 /* * Make sure we can represent the posix lock via @@ -1913,7 +1900,7 @@ static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl) #if BITS_PER_LONG == 32 static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl) { - flock->l_pid = IS_FILE_PVT(fl) ? -1 : fl->fl_pid; + flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid; flock->l_start = fl->fl_start; flock->l_len = fl->fl_end == OFFSET_MAX ? 0 : fl->fl_end - fl->fl_start + 1; @@ -1942,13 +1929,13 @@ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l) if (error) goto out; - if (cmd == F_GETLKP) { + if (cmd == F_OFD_GETLK) { error = -EINVAL; if (flock.l_pid != 0) goto out; cmd = F_GETLK; - file_lock.fl_flags |= FL_FILE_PVT; + file_lock.fl_flags |= FL_OFDLCK; file_lock.fl_owner = (fl_owner_t)filp; } @@ -2035,6 +2022,22 @@ static int do_lock_file_wait(struct file *filp, unsigned int cmd, return error; } +/* Ensure that fl->fl_filp has compatible f_mode for F_SETLK calls */ +static int +check_fmode_for_setlk(struct file_lock *fl) +{ + switch (fl->fl_type) { + case F_RDLCK: + if (!(fl->fl_file->f_mode & FMODE_READ)) + return -EBADF; + break; + case F_WRLCK: + if (!(fl->fl_file->f_mode & FMODE_WRITE)) + return -EBADF; + } + return 0; +} + /* Apply the lock described by l to an open file descriptor. * This implements both the F_SETLK and F_SETLKW commands of fcntl(). */ @@ -2072,27 +2075,31 @@ again: if (error) goto out; + error = check_fmode_for_setlk(file_lock); + if (error) + goto out; + /* * If the cmd is requesting file-private locks, then set the - * FL_FILE_PVT flag and override the owner. + * FL_OFDLCK flag and override the owner. */ switch (cmd) { - case F_SETLKP: + case F_OFD_SETLK: error = -EINVAL; if (flock.l_pid != 0) goto out; cmd = F_SETLK; - file_lock->fl_flags |= FL_FILE_PVT; + file_lock->fl_flags |= FL_OFDLCK; file_lock->fl_owner = (fl_owner_t)filp; break; - case F_SETLKPW: + case F_OFD_SETLKW: error = -EINVAL; if (flock.l_pid != 0) goto out; cmd = F_SETLKW; - file_lock->fl_flags |= FL_FILE_PVT; + file_lock->fl_flags |= FL_OFDLCK; file_lock->fl_owner = (fl_owner_t)filp; /* Fallthrough */ case F_SETLKW: @@ -2144,13 +2151,13 @@ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l) if (error) goto out; - if (cmd == F_GETLKP) { + if (cmd == F_OFD_GETLK) { error = -EINVAL; if (flock.l_pid != 0) goto out; cmd = F_GETLK64; - file_lock.fl_flags |= FL_FILE_PVT; + file_lock.fl_flags |= FL_OFDLCK; file_lock.fl_owner = (fl_owner_t)filp; } @@ -2207,27 +2214,31 @@ again: if (error) goto out; + error = check_fmode_for_setlk(file_lock); + if (error) + goto out; + /* * If the cmd is requesting file-private locks, then set the - * FL_FILE_PVT flag and override the owner. + * FL_OFDLCK flag and override the owner. */ switch (cmd) { - case F_SETLKP: + case F_OFD_SETLK: error = -EINVAL; if (flock.l_pid != 0) goto out; cmd = F_SETLK64; - file_lock->fl_flags |= FL_FILE_PVT; + file_lock->fl_flags |= FL_OFDLCK; file_lock->fl_owner = (fl_owner_t)filp; break; - case F_SETLKPW: + case F_OFD_SETLKW: error = -EINVAL; if (flock.l_pid != 0) goto out; cmd = F_SETLKW64; - file_lock->fl_flags |= FL_FILE_PVT; + file_lock->fl_flags |= FL_OFDLCK; file_lock->fl_owner = (fl_owner_t)filp; /* Fallthrough */ case F_SETLKW64: @@ -2413,8 +2424,8 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, if (IS_POSIX(fl)) { if (fl->fl_flags & FL_ACCESS) seq_printf(f, "ACCESS"); - else if (IS_FILE_PVT(fl)) - seq_printf(f, "FLPVT "); + else if (IS_OFDLCK(fl)) + seq_printf(f, "OFDLCK"); else seq_printf(f, "POSIX "); @@ -1542,7 +1542,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path, inode = path->dentry->d_inode; } err = -ENOENT; - if (!inode) + if (!inode || d_is_negative(path->dentry)) goto out_path_put; if (should_follow_link(path->dentry, follow)) { @@ -2249,7 +2249,7 @@ mountpoint_last(struct nameidata *nd, struct path *path) mutex_unlock(&dir->d_inode->i_mutex); done: - if (!dentry->d_inode) { + if (!dentry->d_inode || d_is_negative(dentry)) { error = -ENOENT; dput(dentry); goto out; @@ -2994,7 +2994,7 @@ retry_lookup: finish_lookup: /* we _can_ be in RCU mode here */ error = -ENOENT; - if (d_is_negative(path->dentry)) { + if (!inode || d_is_negative(path->dentry)) { path_to_nameidata(path, nd); goto out; } diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index 6f3f392..b6f4601 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -402,8 +402,10 @@ sort_pacl(struct posix_acl *pacl) * by uid/gid. */ int i, j; - if (pacl->a_count <= 4) - return; /* no users or groups */ + /* no users or groups */ + if (!pacl || pacl->a_count <= 4) + return; + i = 1; while (pacl->a_entries[i].e_tag == ACL_USER) i++; @@ -530,13 +532,12 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags) /* * ACLs with no ACEs are treated differently in the inheritable - * and effective cases: when there are no inheritable ACEs, we - * set a zero-length default posix acl: + * and effective cases: when there are no inheritable ACEs, + * calls ->set_acl with a NULL ACL structure. */ - if (state->empty && (flags & NFS4_ACL_TYPE_DEFAULT)) { - pacl = posix_acl_alloc(0, GFP_KERNEL); - return pacl ? pacl : ERR_PTR(-ENOMEM); - } + if (state->empty && (flags & NFS4_ACL_TYPE_DEFAULT)) + return NULL; + /* * When there are no effective ACEs, the following will end * up setting a 3-element effective posix ACL with all diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 39c8ef8..2c73cae9 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -654,9 +654,11 @@ static struct rpc_clnt *create_backchannel_client(struct rpc_create_args *args) static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses) { + int maxtime = max_cb_time(clp->net); struct rpc_timeout timeparms = { - .to_initval = max_cb_time(clp->net), + .to_initval = maxtime, .to_retries = 0, + .to_maxval = maxtime, }; struct rpc_create_args args = { .net = clp->net, diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3ba6597..32b699b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1078,6 +1078,18 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) return NULL; } clp->cl_name.len = name.len; + INIT_LIST_HEAD(&clp->cl_sessions); + idr_init(&clp->cl_stateids); + atomic_set(&clp->cl_refcount, 0); + clp->cl_cb_state = NFSD4_CB_UNKNOWN; + INIT_LIST_HEAD(&clp->cl_idhash); + INIT_LIST_HEAD(&clp->cl_openowners); + INIT_LIST_HEAD(&clp->cl_delegations); + INIT_LIST_HEAD(&clp->cl_lru); + INIT_LIST_HEAD(&clp->cl_callbacks); + INIT_LIST_HEAD(&clp->cl_revoked); + spin_lock_init(&clp->cl_lock); + rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); return clp; } @@ -1095,6 +1107,7 @@ free_client(struct nfs4_client *clp) WARN_ON_ONCE(atomic_read(&ses->se_ref)); free_session(ses); } + rpc_destroy_wait_queue(&clp->cl_cb_waitq); free_svc_cred(&clp->cl_cred); kfree(clp->cl_name.data); idr_destroy(&clp->cl_stateids); @@ -1347,7 +1360,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name, if (clp == NULL) return NULL; - INIT_LIST_HEAD(&clp->cl_sessions); ret = copy_cred(&clp->cl_cred, &rqstp->rq_cred); if (ret) { spin_lock(&nn->client_lock); @@ -1355,20 +1367,9 @@ static struct nfs4_client *create_client(struct xdr_netobj name, spin_unlock(&nn->client_lock); return NULL; } - idr_init(&clp->cl_stateids); - atomic_set(&clp->cl_refcount, 0); - clp->cl_cb_state = NFSD4_CB_UNKNOWN; - INIT_LIST_HEAD(&clp->cl_idhash); - INIT_LIST_HEAD(&clp->cl_openowners); - INIT_LIST_HEAD(&clp->cl_delegations); - INIT_LIST_HEAD(&clp->cl_lru); - INIT_LIST_HEAD(&clp->cl_callbacks); - INIT_LIST_HEAD(&clp->cl_revoked); - spin_lock_init(&clp->cl_lock); nfsd4_init_callback(&clp->cl_cb_null); clp->cl_time = get_seconds(); clear_bit(0, &clp->cl_cb_slot_busy); - rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); copy_verf(clp, verf); rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa); gen_confirm(clp); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 2723c1b..18881f3 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3627,14 +3627,6 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) /* nfsd4_check_resp_size guarantees enough room for error status */ if (!op->status) op->status = nfsd4_check_resp_size(resp, 0); - if (op->status == nfserr_resource && nfsd4_has_session(&resp->cstate)) { - struct nfsd4_slot *slot = resp->cstate.slot; - - if (slot->sl_flags & NFSD4_SLOT_CACHETHIS) - op->status = nfserr_rep_too_big_to_cache; - else - op->status = nfserr_rep_too_big; - } if (so) { so->so_replay.rp_status = op->status; so->so_replay.rp_buflen = (char *)resp->p - (char *)(statp+1); diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 4e565c8..732648b 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -698,6 +698,8 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) } group->overflow_event = &oevent->fse; + if (force_o_largefile()) + event_f_flags |= O_LARGEFILE; group->fanotify_data.f_flags = event_f_flags; #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS spin_lock_init(&group->fanotify_data.access_lock); diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 9e363e4..0855f77 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -246,6 +246,12 @@ posix_acl_equiv_mode(const struct posix_acl *acl, umode_t *mode_p) umode_t mode = 0; int not_equiv = 0; + /* + * A null ACL can always be presented as mode bits. + */ + if (!acl) + return 0; + FOREACH_ACL_ENTRY(pa, acl, pe) { switch (pa->e_tag) { case ACL_USER_OBJ: diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 28cc1acd..e9ef59b 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -47,12 +47,13 @@ static int sysfs_kf_seq_show(struct seq_file *sf, void *v) ssize_t count; char *buf; - /* acquire buffer and ensure that it's >= PAGE_SIZE */ + /* acquire buffer and ensure that it's >= PAGE_SIZE and clear */ count = seq_get_buf(sf, &buf); if (count < PAGE_SIZE) { seq_commit(sf, -1); return 0; } + memset(buf, 0, PAGE_SIZE); /* * Invoke show(). Control may reach here via seq file lseek even diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index a66ad61..8794423 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -63,7 +63,8 @@ int __init sysfs_init(void) { int err; - sysfs_root = kernfs_create_root(NULL, 0, NULL); + sysfs_root = kernfs_create_root(NULL, KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK, + NULL); if (IS_ERR(sysfs_root)) return PTR_ERR(sysfs_root); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index a126608..a81c7b5 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1556,7 +1556,7 @@ static int ubifs_remount_rw(struct ubifs_info *c) if (c->space_fixup) { err = ubifs_fixup_free_space(c); if (err) - return err; + goto out; } err = check_free_space(c); diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index 01b6a01..abda112 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -213,7 +213,7 @@ xfs_attr_calc_size( * Out of line attribute, cannot double split, but * make room for the attribute value itself. */ - uint dblocks = XFS_B_TO_FSB(mp, valuelen); + uint dblocks = xfs_attr3_rmt_blocks(mp, valuelen); nblks += dblocks; nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK); } @@ -698,11 +698,22 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) trace_xfs_attr_leaf_replace(args); + /* save the attribute state for later removal*/ args->op_flags |= XFS_DA_OP_RENAME; /* an atomic rename */ args->blkno2 = args->blkno; /* set 2nd entry info*/ args->index2 = args->index; args->rmtblkno2 = args->rmtblkno; args->rmtblkcnt2 = args->rmtblkcnt; + args->rmtvaluelen2 = args->rmtvaluelen; + + /* + * clear the remote attr state now that it is saved so that the + * values reflect the state of the attribute we are about to + * add, not the attribute we just found and will remove later. + */ + args->rmtblkno = 0; + args->rmtblkcnt = 0; + args->rmtvaluelen = 0; } /* @@ -794,6 +805,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) args->blkno = args->blkno2; args->rmtblkno = args->rmtblkno2; args->rmtblkcnt = args->rmtblkcnt2; + args->rmtvaluelen = args->rmtvaluelen2; if (args->rmtblkno) { error = xfs_attr_rmtval_remove(args); if (error) @@ -999,13 +1011,22 @@ restart: trace_xfs_attr_node_replace(args); + /* save the attribute state for later removal*/ args->op_flags |= XFS_DA_OP_RENAME; /* atomic rename op */ args->blkno2 = args->blkno; /* set 2nd entry info*/ args->index2 = args->index; args->rmtblkno2 = args->rmtblkno; args->rmtblkcnt2 = args->rmtblkcnt; + args->rmtvaluelen2 = args->rmtvaluelen; + + /* + * clear the remote attr state now that it is saved so that the + * values reflect the state of the attribute we are about to + * add, not the attribute we just found and will remove later. + */ args->rmtblkno = 0; args->rmtblkcnt = 0; + args->rmtvaluelen = 0; } retval = xfs_attr3_leaf_add(blk->bp, state->args); @@ -1133,6 +1154,7 @@ restart: args->blkno = args->blkno2; args->rmtblkno = args->rmtblkno2; args->rmtblkcnt = args->rmtblkcnt2; + args->rmtvaluelen = args->rmtvaluelen2; if (args->rmtblkno) { error = xfs_attr_rmtval_remove(args); if (error) diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index fe9587f..511c2834 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c @@ -1229,6 +1229,7 @@ xfs_attr3_leaf_add_work( name_rmt->valueblk = 0; args->rmtblkno = 1; args->rmtblkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen); + args->rmtvaluelen = args->valuelen; } xfs_trans_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index), @@ -2167,11 +2168,11 @@ xfs_attr3_leaf_lookup_int( if (!xfs_attr_namesp_match(args->flags, entry->flags)) continue; args->index = probe; - args->valuelen = be32_to_cpu(name_rmt->valuelen); + args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen); args->rmtblkno = be32_to_cpu(name_rmt->valueblk); args->rmtblkcnt = xfs_attr3_rmt_blocks( args->dp->i_mount, - args->valuelen); + args->rmtvaluelen); return XFS_ERROR(EEXIST); } } @@ -2220,19 +2221,19 @@ xfs_attr3_leaf_getvalue( name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index); ASSERT(name_rmt->namelen == args->namelen); ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0); - valuelen = be32_to_cpu(name_rmt->valuelen); + args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen); args->rmtblkno = be32_to_cpu(name_rmt->valueblk); args->rmtblkcnt = xfs_attr3_rmt_blocks(args->dp->i_mount, - valuelen); + args->rmtvaluelen); if (args->flags & ATTR_KERNOVAL) { - args->valuelen = valuelen; + args->valuelen = args->rmtvaluelen; return 0; } - if (args->valuelen < valuelen) { - args->valuelen = valuelen; + if (args->valuelen < args->rmtvaluelen) { + args->valuelen = args->rmtvaluelen; return XFS_ERROR(ERANGE); } - args->valuelen = valuelen; + args->valuelen = args->rmtvaluelen; } return 0; } @@ -2519,7 +2520,7 @@ xfs_attr3_leaf_clearflag( ASSERT((entry->flags & XFS_ATTR_LOCAL) == 0); name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index); name_rmt->valueblk = cpu_to_be32(args->rmtblkno); - name_rmt->valuelen = cpu_to_be32(args->valuelen); + name_rmt->valuelen = cpu_to_be32(args->rmtvaluelen); xfs_trans_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt))); } @@ -2677,7 +2678,7 @@ xfs_attr3_leaf_flipflags( ASSERT((entry1->flags & XFS_ATTR_LOCAL) == 0); name_rmt = xfs_attr3_leaf_name_remote(leaf1, args->index); name_rmt->valueblk = cpu_to_be32(args->rmtblkno); - name_rmt->valuelen = cpu_to_be32(args->valuelen); + name_rmt->valuelen = cpu_to_be32(args->rmtvaluelen); xfs_trans_log_buf(args->trans, bp1, XFS_DA_LOGRANGE(leaf1, name_rmt, sizeof(*name_rmt))); } diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 01db96f..833fe5d9 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -447,6 +447,7 @@ xfs_attr3_leaf_list_int( args.dp = context->dp; args.whichfork = XFS_ATTR_FORK; args.valuelen = valuelen; + args.rmtvaluelen = valuelen; args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS); args.rmtblkno = be32_to_cpu(name_rmt->valueblk); args.rmtblkcnt = xfs_attr3_rmt_blocks( diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c index 6e37823..d2e6e94 100644 --- a/fs/xfs/xfs_attr_remote.c +++ b/fs/xfs/xfs_attr_remote.c @@ -337,7 +337,7 @@ xfs_attr_rmtval_get( struct xfs_buf *bp; xfs_dablk_t lblkno = args->rmtblkno; __uint8_t *dst = args->value; - int valuelen = args->valuelen; + int valuelen; int nmap; int error; int blkcnt = args->rmtblkcnt; @@ -347,7 +347,9 @@ xfs_attr_rmtval_get( trace_xfs_attr_rmtval_get(args); ASSERT(!(args->flags & ATTR_KERNOVAL)); + ASSERT(args->rmtvaluelen == args->valuelen); + valuelen = args->rmtvaluelen; while (valuelen > 0) { nmap = ATTR_RMTVALUE_MAPSIZE; error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, @@ -415,7 +417,7 @@ xfs_attr_rmtval_set( * attributes have headers, we can't just do a straight byte to FSB * conversion and have to take the header space into account. */ - blkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen); + blkcnt = xfs_attr3_rmt_blocks(mp, args->rmtvaluelen); error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff, XFS_ATTR_FORK); if (error) @@ -480,7 +482,7 @@ xfs_attr_rmtval_set( */ lblkno = args->rmtblkno; blkcnt = args->rmtblkcnt; - valuelen = args->valuelen; + valuelen = args->rmtvaluelen; while (valuelen > 0) { struct xfs_buf *bp; xfs_daddr_t dblkno; diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h index 6e95ea7..201c609 100644 --- a/fs/xfs/xfs_da_btree.h +++ b/fs/xfs/xfs_da_btree.h @@ -60,10 +60,12 @@ typedef struct xfs_da_args { int index; /* index of attr of interest in blk */ xfs_dablk_t rmtblkno; /* remote attr value starting blkno */ int rmtblkcnt; /* remote attr value block count */ + int rmtvaluelen; /* remote attr value length in bytes */ xfs_dablk_t blkno2; /* blkno of 2nd attr leaf of interest */ int index2; /* index of 2nd attr in blk */ xfs_dablk_t rmtblkno2; /* remote attr value starting blkno */ int rmtblkcnt2; /* remote attr value block count */ + int rmtvaluelen2; /* remote attr value length in bytes */ int op_flags; /* operation flags */ enum xfs_dacmp cmpresult; /* name compare result for lookups */ } xfs_da_args_t; diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index 1399e18..753e467 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c @@ -237,7 +237,7 @@ xfs_fs_nfs_commit_metadata( if (!lsn) return 0; - return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); + return -_xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); } const struct export_operations xfs_export_operations = { diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 951a232..830c1c9 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -155,7 +155,7 @@ xfs_dir_fsync( if (!lsn) return 0; - return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); + return -_xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); } STATIC int @@ -295,7 +295,7 @@ xfs_file_aio_read( xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); if (inode->i_mapping->nrpages) { - ret = -filemap_write_and_wait_range( + ret = filemap_write_and_wait_range( VFS_I(ip)->i_mapping, pos, -1); if (ret) { @@ -837,7 +837,7 @@ xfs_file_fallocate( unsigned blksize_mask = (1 << inode->i_blkbits) - 1; if (offset & blksize_mask || len & blksize_mask) { - error = -EINVAL; + error = EINVAL; goto out_unlock; } @@ -846,7 +846,7 @@ xfs_file_fallocate( * in which case it is effectively a truncate operation */ if (offset + len >= i_size_read(inode)) { - error = -EINVAL; + error = EINVAL; goto out_unlock; } diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index ef1ca01..36d6303 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -72,8 +72,8 @@ xfs_initxattrs( int error = 0; for (xattr = xattr_array; xattr->name != NULL; xattr++) { - error = xfs_attr_set(ip, xattr->name, xattr->value, - xattr->value_len, ATTR_SECURE); + error = -xfs_attr_set(ip, xattr->name, xattr->value, + xattr->value_len, ATTR_SECURE); if (error < 0) break; } @@ -93,8 +93,8 @@ xfs_init_security( struct inode *dir, const struct qstr *qstr) { - return security_inode_init_security(inode, dir, qstr, - &xfs_initxattrs, NULL); + return -security_inode_init_security(inode, dir, qstr, + &xfs_initxattrs, NULL); } static void @@ -124,15 +124,15 @@ xfs_cleanup_inode( xfs_dentry_to_name(&teardown, dentry, 0); xfs_remove(XFS_I(dir), &teardown, XFS_I(inode)); - iput(inode); } STATIC int -xfs_vn_mknod( +xfs_generic_create( struct inode *dir, struct dentry *dentry, umode_t mode, - dev_t rdev) + dev_t rdev, + bool tmpfile) /* unnamed file */ { struct inode *inode; struct xfs_inode *ip = NULL; @@ -156,8 +156,12 @@ xfs_vn_mknod( if (error) return error; - xfs_dentry_to_name(&name, dentry, mode); - error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip); + if (!tmpfile) { + xfs_dentry_to_name(&name, dentry, mode); + error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip); + } else { + error = xfs_create_tmpfile(XFS_I(dir), dentry, mode, &ip); + } if (unlikely(error)) goto out_free_acl; @@ -169,18 +173,22 @@ xfs_vn_mknod( #ifdef CONFIG_XFS_POSIX_ACL if (default_acl) { - error = xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); + error = -xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); if (error) goto out_cleanup_inode; } if (acl) { - error = xfs_set_acl(inode, acl, ACL_TYPE_ACCESS); + error = -xfs_set_acl(inode, acl, ACL_TYPE_ACCESS); if (error) goto out_cleanup_inode; } #endif - d_instantiate(dentry, inode); + if (tmpfile) + d_tmpfile(dentry, inode); + else + d_instantiate(dentry, inode); + out_free_acl: if (default_acl) posix_acl_release(default_acl); @@ -189,11 +197,23 @@ xfs_vn_mknod( return -error; out_cleanup_inode: - xfs_cleanup_inode(dir, inode, dentry); + if (!tmpfile) + xfs_cleanup_inode(dir, inode, dentry); + iput(inode); goto out_free_acl; } STATIC int +xfs_vn_mknod( + struct inode *dir, + struct dentry *dentry, + umode_t mode, + dev_t rdev) +{ + return xfs_generic_create(dir, dentry, mode, rdev, false); +} + +STATIC int xfs_vn_create( struct inode *dir, struct dentry *dentry, @@ -353,6 +373,7 @@ xfs_vn_symlink( out_cleanup_inode: xfs_cleanup_inode(dir, inode, dentry); + iput(inode); out: return -error; } @@ -1053,25 +1074,7 @@ xfs_vn_tmpfile( struct dentry *dentry, umode_t mode) { - int error; - struct xfs_inode *ip; - struct inode *inode; - - error = xfs_create_tmpfile(XFS_I(dir), dentry, mode, &ip); - if (unlikely(error)) - return -error; - - inode = VFS_I(ip); - - error = xfs_init_security(inode, dir, &dentry->d_name); - if (unlikely(error)) { - iput(inode); - return -error; - } - - d_tmpfile(dentry, inode); - - return 0; + return xfs_generic_create(dir, dentry, mode, 0, true); } static const struct inode_operations xfs_inode_operations = { diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 08624dc..a5f8bd9 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -616,11 +616,13 @@ xfs_log_mount( int error = 0; int min_logfsbs; - if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) - xfs_notice(mp, "Mounting Filesystem"); - else { + if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) { + xfs_notice(mp, "Mounting V%d Filesystem", + XFS_SB_VERSION_NUM(&mp->m_sb)); + } else { xfs_notice(mp, -"Mounting filesystem in no-recovery mode. Filesystem will be inconsistent."); +"Mounting V%d filesystem in no-recovery mode. Filesystem will be inconsistent.", + XFS_SB_VERSION_NUM(&mp->m_sb)); ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); } diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 993cb19..944f3d9 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -743,8 +743,6 @@ xfs_mountfs( new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE; if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size)) mp->m_inode_cluster_size = new_size; - xfs_info(mp, "Using inode cluster size of %d bytes", - mp->m_inode_cluster_size); } /* diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 348e4d2..dc977b6 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -843,22 +843,17 @@ xfs_qm_init_quotainfo( qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP); - if ((error = list_lru_init(&qinf->qi_lru))) { - kmem_free(qinf); - mp->m_quotainfo = NULL; - return error; - } + error = -list_lru_init(&qinf->qi_lru); + if (error) + goto out_free_qinf; /* * See if quotainodes are setup, and if not, allocate them, * and change the superblock accordingly. */ - if ((error = xfs_qm_init_quotainos(mp))) { - list_lru_destroy(&qinf->qi_lru); - kmem_free(qinf); - mp->m_quotainfo = NULL; - return error; - } + error = xfs_qm_init_quotainos(mp); + if (error) + goto out_free_lru; INIT_RADIX_TREE(&qinf->qi_uquota_tree, GFP_NOFS); INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_NOFS); @@ -918,7 +913,7 @@ xfs_qm_init_quotainfo( qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit); qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit); qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit); - + xfs_qm_dqdestroy(dqp); } else { qinf->qi_btimelimit = XFS_QM_BTIMELIMIT; @@ -935,6 +930,13 @@ xfs_qm_init_quotainfo( qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE; register_shrinker(&qinf->qi_shrinker); return 0; + +out_free_lru: + list_lru_destroy(&qinf->qi_lru); +out_free_qinf: + kmem_free(qinf); + mp->m_quotainfo = NULL; + return error; } diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/xfs_sb.c index 0c0e41b..8baf61a 100644 --- a/fs/xfs/xfs_sb.c +++ b/fs/xfs/xfs_sb.c @@ -201,10 +201,6 @@ xfs_mount_validate_sb( * write validation, we don't need to check feature masks. */ if (check_version && XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) { - xfs_alert(mp, -"Version 5 superblock detected. This kernel has EXPERIMENTAL support enabled!\n" -"Use of these features in this kernel is at your own risk!"); - if (xfs_sb_has_compat_feature(sbp, XFS_SB_FEAT_COMPAT_UNKNOWN)) { xfs_warn(mp, diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 2053767..3494eff 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1433,11 +1433,11 @@ xfs_fs_fill_super( if (error) goto out_free_fsname; - error = xfs_init_mount_workqueues(mp); + error = -xfs_init_mount_workqueues(mp); if (error) goto out_close_devices; - error = xfs_icsb_init_counters(mp); + error = -xfs_icsb_init_counters(mp); if (error) goto out_destroy_workqueues; |