diff options
Diffstat (limited to 'fs')
136 files changed, 2612 insertions, 1735 deletions
diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig index 55abfd6..6489e1f 100644 --- a/fs/9p/Kconfig +++ b/fs/9p/Kconfig @@ -31,3 +31,16 @@ config 9P_FS_POSIX_ACL If you don't know what Access Control Lists are, say N endif + + +config 9P_FS_SECURITY + bool "9P Security Labels" + depends on 9P_FS + help + Security labels support alternative access control models + implemented by security modules like SELinux. This option + enables an extended attribute handler for file security + labels in the 9P filesystem. + + If you are not using a security module that requires using + extended attributes for file security labels, say N. diff --git a/fs/9p/Makefile b/fs/9p/Makefile index ab8c127..ff7be98 100644 --- a/fs/9p/Makefile +++ b/fs/9p/Makefile @@ -11,7 +11,9 @@ obj-$(CONFIG_9P_FS) := 9p.o v9fs.o \ fid.o \ xattr.o \ - xattr_user.o + xattr_user.o \ + xattr_trusted.o 9p-$(CONFIG_9P_FSCACHE) += cache.o 9p-$(CONFIG_9P_FS_POSIX_ACL) += acl.o +9p-$(CONFIG_9P_FS_SECURITY) += xattr_security.o diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index d86edc8..25b018e 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -1054,13 +1054,11 @@ static int v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { - int err; struct v9fs_session_info *v9ses; struct p9_fid *fid; struct p9_wstat *st; p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry); - err = -EPERM; v9ses = v9fs_dentry2v9ses(dentry); if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { generic_fillattr(dentry->d_inode, stat); diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c index c45e016..3c28cdf 100644 --- a/fs/9p/xattr.c +++ b/fs/9p/xattr.c @@ -167,9 +167,13 @@ ssize_t v9fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) const struct xattr_handler *v9fs_xattr_handlers[] = { &v9fs_xattr_user_handler, + &v9fs_xattr_trusted_handler, #ifdef CONFIG_9P_FS_POSIX_ACL &v9fs_xattr_acl_access_handler, &v9fs_xattr_acl_default_handler, #endif +#ifdef CONFIG_9P_FS_SECURITY + &v9fs_xattr_security_handler, +#endif NULL }; diff --git a/fs/9p/xattr.h b/fs/9p/xattr.h index eec348a..d3e2ea3 100644 --- a/fs/9p/xattr.h +++ b/fs/9p/xattr.h @@ -20,6 +20,8 @@ extern const struct xattr_handler *v9fs_xattr_handlers[]; extern struct xattr_handler v9fs_xattr_user_handler; +extern struct xattr_handler v9fs_xattr_trusted_handler; +extern struct xattr_handler v9fs_xattr_security_handler; extern const struct xattr_handler v9fs_xattr_acl_access_handler; extern const struct xattr_handler v9fs_xattr_acl_default_handler; diff --git a/fs/9p/xattr_security.c b/fs/9p/xattr_security.c new file mode 100644 index 0000000..cb247a1 --- /dev/null +++ b/fs/9p/xattr_security.c @@ -0,0 +1,80 @@ +/* + * Copyright IBM Corporation, 2010 + * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ + + +#include <linux/module.h> +#include <linux/string.h> +#include <linux/fs.h> +#include <linux/slab.h> +#include "xattr.h" + +static int v9fs_xattr_security_get(struct dentry *dentry, const char *name, + void *buffer, size_t size, int type) +{ + int retval; + char *full_name; + size_t name_len; + size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; + + if (name == NULL) + return -EINVAL; + + if (strcmp(name, "") == 0) + return -EINVAL; + + name_len = strlen(name); + full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL); + if (!full_name) + return -ENOMEM; + memcpy(full_name, XATTR_SECURITY_PREFIX, prefix_len); + memcpy(full_name+prefix_len, name, name_len); + full_name[prefix_len + name_len] = '\0'; + + retval = v9fs_xattr_get(dentry, full_name, buffer, size); + kfree(full_name); + return retval; +} + +static int v9fs_xattr_security_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags, int type) +{ + int retval; + char *full_name; + size_t name_len; + size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; + + if (name == NULL) + return -EINVAL; + + if (strcmp(name, "") == 0) + return -EINVAL; + + name_len = strlen(name); + full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL); + if (!full_name) + return -ENOMEM; + memcpy(full_name, XATTR_SECURITY_PREFIX, prefix_len); + memcpy(full_name + prefix_len, name, name_len); + full_name[prefix_len + name_len] = '\0'; + + retval = v9fs_xattr_set(dentry, full_name, value, size, flags); + kfree(full_name); + return retval; +} + +struct xattr_handler v9fs_xattr_security_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .get = v9fs_xattr_security_get, + .set = v9fs_xattr_security_set, +}; diff --git a/fs/9p/xattr_trusted.c b/fs/9p/xattr_trusted.c new file mode 100644 index 0000000..e30d33b --- /dev/null +++ b/fs/9p/xattr_trusted.c @@ -0,0 +1,80 @@ +/* + * Copyright IBM Corporation, 2010 + * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ + + +#include <linux/module.h> +#include <linux/string.h> +#include <linux/fs.h> +#include <linux/slab.h> +#include "xattr.h" + +static int v9fs_xattr_trusted_get(struct dentry *dentry, const char *name, + void *buffer, size_t size, int type) +{ + int retval; + char *full_name; + size_t name_len; + size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; + + if (name == NULL) + return -EINVAL; + + if (strcmp(name, "") == 0) + return -EINVAL; + + name_len = strlen(name); + full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL); + if (!full_name) + return -ENOMEM; + memcpy(full_name, XATTR_TRUSTED_PREFIX, prefix_len); + memcpy(full_name+prefix_len, name, name_len); + full_name[prefix_len + name_len] = '\0'; + + retval = v9fs_xattr_get(dentry, full_name, buffer, size); + kfree(full_name); + return retval; +} + +static int v9fs_xattr_trusted_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags, int type) +{ + int retval; + char *full_name; + size_t name_len; + size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; + + if (name == NULL) + return -EINVAL; + + if (strcmp(name, "") == 0) + return -EINVAL; + + name_len = strlen(name); + full_name = kmalloc(prefix_len + name_len + 1 , GFP_KERNEL); + if (!full_name) + return -ENOMEM; + memcpy(full_name, XATTR_TRUSTED_PREFIX, prefix_len); + memcpy(full_name + prefix_len, name, name_len); + full_name[prefix_len + name_len] = '\0'; + + retval = v9fs_xattr_set(dentry, full_name, value, size, flags); + kfree(full_name); + return retval; +} + +struct xattr_handler v9fs_xattr_trusted_handler = { + .prefix = XATTR_TRUSTED_PREFIX, + .get = v9fs_xattr_trusted_get, + .set = v9fs_xattr_trusted_set, +}; diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index bce8769..89dec7f 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -255,8 +255,6 @@ static int load_aout_binary(struct linux_binprm * bprm) (current->mm->start_data = N_DATADDR(ex)); current->mm->brk = ex.a_bss + (current->mm->start_brk = N_BSSADDR(ex)); - current->mm->free_area_cache = current->mm->mmap_base; - current->mm->cached_hole_size = 0; retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT); if (retval < 0) { diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index f8a0b0e..100edcc 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -738,8 +738,6 @@ static int load_elf_binary(struct linux_binprm *bprm) /* Do this so that we can load the interpreter, if need be. We will change some of these later */ - current->mm->free_area_cache = current->mm->mmap_base; - current->mm->cached_hole_size = 0; retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP), executable_stack); if (retval < 0) { diff --git a/fs/block_dev.c b/fs/block_dev.c index bb43ce0..c7bda5c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -58,17 +58,24 @@ static void bdev_inode_switch_bdi(struct inode *inode, struct backing_dev_info *dst) { struct backing_dev_info *old = inode->i_data.backing_dev_info; + bool wakeup_bdi = false; if (unlikely(dst == old)) /* deadlock avoidance */ return; bdi_lock_two(&old->wb, &dst->wb); spin_lock(&inode->i_lock); inode->i_data.backing_dev_info = dst; - if (inode->i_state & I_DIRTY) + if (inode->i_state & I_DIRTY) { + if (bdi_cap_writeback_dirty(dst) && !wb_has_dirty_io(&dst->wb)) + wakeup_bdi = true; list_move(&inode->i_wb_list, &dst->wb.b_dirty); + } spin_unlock(&inode->i_lock); spin_unlock(&old->wb.list_lock); spin_unlock(&dst->wb.list_lock); + + if (wakeup_bdi) + bdi_wakeup_thread_delayed(dst); } /* Kill _all_ buffers and pagecache , dirty or not.. */ diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index eaf1333..8bc5e8c 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -36,16 +36,23 @@ static int check_extent_in_eb(struct btrfs_key *key, struct extent_buffer *eb, u64 extent_item_pos, struct extent_inode_elem **eie) { - u64 data_offset; - u64 data_len; + u64 offset = 0; struct extent_inode_elem *e; - data_offset = btrfs_file_extent_offset(eb, fi); - data_len = btrfs_file_extent_num_bytes(eb, fi); + if (!btrfs_file_extent_compression(eb, fi) && + !btrfs_file_extent_encryption(eb, fi) && + !btrfs_file_extent_other_encoding(eb, fi)) { + u64 data_offset; + u64 data_len; - if (extent_item_pos < data_offset || - extent_item_pos >= data_offset + data_len) - return 1; + data_offset = btrfs_file_extent_offset(eb, fi); + data_len = btrfs_file_extent_num_bytes(eb, fi); + + if (extent_item_pos < data_offset || + extent_item_pos >= data_offset + data_len) + return 1; + offset = extent_item_pos - data_offset; + } e = kmalloc(sizeof(*e), GFP_NOFS); if (!e) @@ -53,7 +60,7 @@ static int check_extent_in_eb(struct btrfs_key *key, struct extent_buffer *eb, e->next = *eie; e->inum = key->objectid; - e->offset = key->offset + (extent_item_pos - data_offset); + e->offset = key->offset + offset; *eie = e; return 0; @@ -189,7 +196,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, struct extent_buffer *eb; struct btrfs_key key; struct btrfs_file_extent_item *fi; - struct extent_inode_elem *eie = NULL; + struct extent_inode_elem *eie = NULL, *old = NULL; u64 disk_byte; if (level != 0) { @@ -223,6 +230,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, if (disk_byte == wanted_disk_byte) { eie = NULL; + old = NULL; if (extent_item_pos) { ret = check_extent_in_eb(&key, eb, fi, *extent_item_pos, @@ -230,18 +238,20 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, if (ret < 0) break; } - if (!ret) { - ret = ulist_add(parents, eb->start, - (uintptr_t)eie, GFP_NOFS); - if (ret < 0) - break; - if (!extent_item_pos) { - ret = btrfs_next_old_leaf(root, path, - time_seq); - continue; - } + if (ret > 0) + goto next; + ret = ulist_add_merge(parents, eb->start, + (uintptr_t)eie, + (u64 *)&old, GFP_NOFS); + if (ret < 0) + break; + if (!ret && extent_item_pos) { + while (old->next) + old = old->next; + old->next = eie; } } +next: ret = btrfs_next_old_item(root, path, time_seq); } diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 5bf4c39..ed50460 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1271,7 +1271,6 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, BUG_ON(!eb_rewin); } - extent_buffer_get(eb_rewin); btrfs_tree_read_unlock(eb); free_extent_buffer(eb); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0236de7..1204c8e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7466,6 +7466,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int err = 0; int ret; int level; + bool root_dropped = false; path = btrfs_alloc_path(); if (!path) { @@ -7523,6 +7524,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, while (1) { btrfs_tree_lock(path->nodes[level]); btrfs_set_lock_blocking(path->nodes[level]); + path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; ret = btrfs_lookup_extent_info(trans, root, path->nodes[level]->start, @@ -7538,6 +7540,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, break; btrfs_tree_unlock(path->nodes[level]); + path->locks[level] = 0; WARN_ON(wc->refs[level] != 1); level--; } @@ -7552,11 +7555,6 @@ int btrfs_drop_snapshot(struct btrfs_root *root, wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); while (1) { - if (!for_reloc && btrfs_need_cleaner_sleep(root)) { - pr_debug("btrfs: drop snapshot early exit\n"); - err = -EAGAIN; - goto out_end_trans; - } ret = walk_down_tree(trans, root, path, wc); if (ret < 0) { @@ -7584,7 +7582,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root, } BUG_ON(wc->level == 0); - if (btrfs_should_end_transaction(trans, tree_root)) { + if (btrfs_should_end_transaction(trans, tree_root) || + (!for_reloc && btrfs_need_cleaner_sleep(root))) { ret = btrfs_update_root(trans, tree_root, &root->root_key, root_item); @@ -7595,6 +7594,12 @@ int btrfs_drop_snapshot(struct btrfs_root *root, } btrfs_end_transaction_throttle(trans, tree_root); + if (!for_reloc && btrfs_need_cleaner_sleep(root)) { + pr_debug("btrfs: drop snapshot early exit\n"); + err = -EAGAIN; + goto out_free; + } + trans = btrfs_start_transaction(tree_root, 0); if (IS_ERR(trans)) { err = PTR_ERR(trans); @@ -7639,12 +7644,22 @@ int btrfs_drop_snapshot(struct btrfs_root *root, free_extent_buffer(root->commit_root); btrfs_put_fs_root(root); } + root_dropped = true; out_end_trans: btrfs_end_transaction_throttle(trans, tree_root); out_free: kfree(wc); btrfs_free_path(path); out: + /* + * So if we need to stop dropping the snapshot for whatever reason we + * need to make sure to add it back to the dead root list so that we + * keep trying to do the work later. This also cleans up roots if we + * don't have it in the radix (like when we recover after a power fail + * or unmount) so we don't leak memory. + */ + if (root_dropped == false) + btrfs_add_dead_root(root); if (err) btrfs_std_error(root->fs_info, err); return err; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 583d98b..fe443fe 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4048,7 +4048,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, } while (!end) { - u64 offset_in_extent; + u64 offset_in_extent = 0; /* break if the extent we found is outside the range */ if (em->start >= max || extent_map_end(em) < off) @@ -4064,9 +4064,12 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, /* * record the offset from the start of the extent - * for adjusting the disk offset below + * for adjusting the disk offset below. Only do this if the + * extent isn't compressed since our in ram offset may be past + * what we have actually allocated on disk. */ - offset_in_extent = em_start - em->start; + if (!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) + offset_in_extent = em_start - em->start; em_end = extent_map_end(em); em_len = em_end - em_start; emflags = em->flags; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index a005fe2..8e686a4 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -596,20 +596,29 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, if (no_splits) goto next; - if (em->block_start < EXTENT_MAP_LAST_BYTE && - em->start < start) { + if (em->start < start) { split->start = em->start; split->len = start - em->start; - split->orig_start = em->orig_start; - split->block_start = em->block_start; - if (compressed) - split->block_len = em->block_len; - else - split->block_len = split->len; - split->ram_bytes = em->ram_bytes; - split->orig_block_len = max(split->block_len, - em->orig_block_len); + if (em->block_start < EXTENT_MAP_LAST_BYTE) { + split->orig_start = em->orig_start; + split->block_start = em->block_start; + + if (compressed) + split->block_len = em->block_len; + else + split->block_len = split->len; + split->orig_block_len = max(split->block_len, + em->orig_block_len); + split->ram_bytes = em->ram_bytes; + } else { + split->orig_start = split->start; + split->block_len = 0; + split->block_start = em->block_start; + split->orig_block_len = 0; + split->ram_bytes = split->len; + } + split->generation = gen; split->bdev = em->bdev; split->flags = flags; @@ -620,8 +629,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, split = split2; split2 = NULL; } - if (em->block_start < EXTENT_MAP_LAST_BYTE && - testend && em->start + em->len > start + len) { + if (testend && em->start + em->len > start + len) { u64 diff = start + len - em->start; split->start = start + len; @@ -630,18 +638,28 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, split->flags = flags; split->compress_type = em->compress_type; split->generation = gen; - split->orig_block_len = max(em->block_len, + + if (em->block_start < EXTENT_MAP_LAST_BYTE) { + split->orig_block_len = max(em->block_len, em->orig_block_len); - split->ram_bytes = em->ram_bytes; - if (compressed) { - split->block_len = em->block_len; - split->block_start = em->block_start; - split->orig_start = em->orig_start; + split->ram_bytes = em->ram_bytes; + if (compressed) { + split->block_len = em->block_len; + split->block_start = em->block_start; + split->orig_start = em->orig_start; + } else { + split->block_len = split->len; + split->block_start = em->block_start + + diff; + split->orig_start = em->orig_start; + } } else { - split->block_len = split->len; - split->block_start = em->block_start + diff; - split->orig_start = em->orig_start; + split->ram_bytes = split->len; + split->orig_start = split->start; + split->block_len = 0; + split->block_start = em->block_start; + split->orig_block_len = 0; } ret = add_extent_mapping(em_tree, split, modified); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6d1b93c..021694c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2166,16 +2166,23 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id, if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr) continue; - extent_offset = btrfs_file_extent_offset(leaf, extent); - if (key.offset - extent_offset != offset) + /* + * 'offset' refers to the exact key.offset, + * NOT the 'offset' field in btrfs_extent_data_ref, ie. + * (key.offset - extent_offset). + */ + if (key.offset != offset) continue; + extent_offset = btrfs_file_extent_offset(leaf, extent); num_bytes = btrfs_file_extent_num_bytes(leaf, extent); + if (extent_offset >= old->extent_offset + old->offset + old->len || extent_offset + num_bytes <= old->extent_offset + old->offset) continue; + ret = 0; break; } @@ -2187,7 +2194,7 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id, backref->root_id = root_id; backref->inum = inum; - backref->file_pos = offset + extent_offset; + backref->file_pos = offset; backref->num_bytes = num_bytes; backref->extent_offset = extent_offset; backref->generation = btrfs_file_extent_generation(leaf, extent); @@ -2210,7 +2217,8 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path, new->path = path; list_for_each_entry_safe(old, tmp, &new->head, list) { - ret = iterate_inodes_from_logical(old->bytenr, fs_info, + ret = iterate_inodes_from_logical(old->bytenr + + old->extent_offset, fs_info, path, record_one_backref, old); BUG_ON(ret < 0 && ret != -ENOENT); @@ -4391,9 +4399,6 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) int mask = attr->ia_valid; int ret; - if (newsize == oldsize) - return 0; - /* * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a * special case where we need to update the times despite not having @@ -5165,14 +5170,31 @@ next: } /* Reached end of directory/root. Bump pos past the last item. */ - if (key_type == BTRFS_DIR_INDEX_KEY) - /* - * 32-bit glibc will use getdents64, but then strtol - - * so the last number we can serve is this. - */ - ctx->pos = 0x7fffffff; - else - ctx->pos++; + ctx->pos++; + + /* + * Stop new entries from being returned after we return the last + * entry. + * + * New directory entries are assigned a strictly increasing + * offset. This means that new entries created during readdir + * are *guaranteed* to be seen in the future by that readdir. + * This has broken buggy programs which operate on names as + * they're returned by readdir. Until we re-use freed offsets + * we have this hack to stop new entries from being returned + * under the assumption that they'll never reach this huge + * offset. + * + * This is being careful not to overflow 32bit loff_t unless the + * last entry requires it because doing so has broken 32bit apps + * in the past. + */ + if (key_type == BTRFS_DIR_INDEX_KEY) { + if (ctx->pos >= INT_MAX) + ctx->pos = LLONG_MAX; + else + ctx->pos = INT_MAX; + } nopos: ret = 0; err: diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 4ba2a69..64a157b 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2495,7 +2495,7 @@ again: ret = scrub_extent(sctx, extent_logical, extent_len, extent_physical, extent_dev, flags, generation, extent_mirror_num, - extent_physical); + extent_logical - logical + physical); if (ret) goto out; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index d58cce7..af1931a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -983,12 +983,12 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, * a dirty root struct and adds it into the list of dead roots that need to * be deleted */ -int btrfs_add_dead_root(struct btrfs_root *root) +void btrfs_add_dead_root(struct btrfs_root *root) { spin_lock(&root->fs_info->trans_lock); - list_add_tail(&root->root_list, &root->fs_info->dead_roots); + if (list_empty(&root->root_list)) + list_add_tail(&root->root_list, &root->fs_info->dead_roots); spin_unlock(&root->fs_info->trans_lock); - return 0; } /* @@ -1925,7 +1925,7 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root) } root = list_first_entry(&fs_info->dead_roots, struct btrfs_root, root_list); - list_del(&root->root_list); + list_del_init(&root->root_list); spin_unlock(&fs_info->trans_lock); pr_debug("btrfs: cleaner removing %llu\n", diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 005b037..defbc42 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -143,7 +143,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid); int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int btrfs_add_dead_root(struct btrfs_root *root); +void btrfs_add_dead_root(struct btrfs_root *root); int btrfs_defrag_root(struct btrfs_root *root); int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root); int btrfs_commit_transaction(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 2c67914..ff60d89 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3746,8 +3746,9 @@ next_slot: } log_extents: + btrfs_release_path(path); + btrfs_release_path(dst_path); if (fast_search) { - btrfs_release_path(dst_path); ret = btrfs_log_changed_extents(trans, root, inode, dst_path); if (ret) { err = ret; @@ -3764,8 +3765,6 @@ log_extents: } if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { - btrfs_release_path(path); - btrfs_release_path(dst_path); ret = log_directory_changes(trans, root, inode, path, dst_path); if (ret) { err = ret; diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 3d8bf94..fc6f4f3 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -1,7 +1,7 @@ /* * fs/cifs/cifsencrypt.c * - * Copyright (C) International Business Machines Corp., 2005,2006 + * Copyright (C) International Business Machines Corp., 2005,2013 * Author(s): Steve French (sfrench@us.ibm.com) * * This library is free software; you can redistribute it and/or modify @@ -31,6 +31,37 @@ #include <linux/random.h> #include <linux/highmem.h> +static int +cifs_crypto_shash_md5_allocate(struct TCP_Server_Info *server) +{ + int rc; + unsigned int size; + + if (server->secmech.sdescmd5 != NULL) + return 0; /* already allocated */ + + server->secmech.md5 = crypto_alloc_shash("md5", 0, 0); + if (IS_ERR(server->secmech.md5)) { + cifs_dbg(VFS, "could not allocate crypto md5\n"); + rc = PTR_ERR(server->secmech.md5); + server->secmech.md5 = NULL; + return rc; + } + + size = sizeof(struct shash_desc) + + crypto_shash_descsize(server->secmech.md5); + server->secmech.sdescmd5 = kmalloc(size, GFP_KERNEL); + if (!server->secmech.sdescmd5) { + crypto_free_shash(server->secmech.md5); + server->secmech.md5 = NULL; + return -ENOMEM; + } + server->secmech.sdescmd5->shash.tfm = server->secmech.md5; + server->secmech.sdescmd5->shash.flags = 0x0; + + return 0; +} + /* * Calculate and return the CIFS signature based on the mac key and SMB PDU. * The 16 byte signature must be allocated by the caller. Note we only use the @@ -50,8 +81,11 @@ static int cifs_calc_signature(struct smb_rqst *rqst, return -EINVAL; if (!server->secmech.sdescmd5) { - cifs_dbg(VFS, "%s: Can't generate signature\n", __func__); - return -1; + rc = cifs_crypto_shash_md5_allocate(server); + if (rc) { + cifs_dbg(VFS, "%s: Can't alloc md5 crypto\n", __func__); + return -1; + } } rc = crypto_shash_init(&server->secmech.sdescmd5->shash); @@ -388,7 +422,7 @@ find_domain_name(struct cifs_ses *ses, const struct nls_table *nls_cp) if (blobptr + attrsize > blobend) break; if (type == NTLMSSP_AV_NB_DOMAIN_NAME) { - if (!attrsize) + if (!attrsize || attrsize >= CIFS_MAX_DOMAINNAME_LEN) break; if (!ses->domainName) { ses->domainName = @@ -556,6 +590,36 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash) return rc; } +static int crypto_hmacmd5_alloc(struct TCP_Server_Info *server) +{ + int rc; + unsigned int size; + + /* check if already allocated */ + if (server->secmech.sdeschmacmd5) + return 0; + + server->secmech.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0); + if (IS_ERR(server->secmech.hmacmd5)) { + cifs_dbg(VFS, "could not allocate crypto hmacmd5\n"); + rc = PTR_ERR(server->secmech.hmacmd5); + server->secmech.hmacmd5 = NULL; + return rc; + } + + size = sizeof(struct shash_desc) + + crypto_shash_descsize(server->secmech.hmacmd5); + server->secmech.sdeschmacmd5 = kmalloc(size, GFP_KERNEL); + if (!server->secmech.sdeschmacmd5) { + crypto_free_shash(server->secmech.hmacmd5); + server->secmech.hmacmd5 = NULL; + return -ENOMEM; + } + server->secmech.sdeschmacmd5->shash.tfm = server->secmech.hmacmd5; + server->secmech.sdeschmacmd5->shash.flags = 0x0; + + return 0; +} int setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) @@ -606,6 +670,12 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) memcpy(ses->auth_key.response + baselen, tiblob, tilen); + rc = crypto_hmacmd5_alloc(ses->server); + if (rc) { + cifs_dbg(VFS, "could not crypto alloc hmacmd5 rc %d\n", rc); + goto setup_ntlmv2_rsp_ret; + } + /* calculate ntlmv2_hash */ rc = calc_ntlmv2_hash(ses, ntlmv2_hash, nls_cp); if (rc) { @@ -705,123 +775,32 @@ calc_seckey(struct cifs_ses *ses) void cifs_crypto_shash_release(struct TCP_Server_Info *server) { - if (server->secmech.cmacaes) + if (server->secmech.cmacaes) { crypto_free_shash(server->secmech.cmacaes); + server->secmech.cmacaes = NULL; + } - if (server->secmech.hmacsha256) + if (server->secmech.hmacsha256) { crypto_free_shash(server->secmech.hmacsha256); + server->secmech.hmacsha256 = NULL; + } - if (server->secmech.md5) + if (server->secmech.md5) { crypto_free_shash(server->secmech.md5); + server->secmech.md5 = NULL; + } - if (server->secmech.hmacmd5) + if (server->secmech.hmacmd5) { crypto_free_shash(server->secmech.hmacmd5); + server->secmech.hmacmd5 = NULL; + } kfree(server->secmech.sdesccmacaes); - + server->secmech.sdesccmacaes = NULL; kfree(server->secmech.sdeschmacsha256); - + server->secmech.sdeschmacsha256 = NULL; kfree(server->secmech.sdeschmacmd5); - - kfree(server->secmech.sdescmd5); -} - -int -cifs_crypto_shash_allocate(struct TCP_Server_Info *server) -{ - int rc; - unsigned int size; - - server->secmech.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0); - if (IS_ERR(server->secmech.hmacmd5)) { - cifs_dbg(VFS, "could not allocate crypto hmacmd5\n"); - return PTR_ERR(server->secmech.hmacmd5); - } - - server->secmech.md5 = crypto_alloc_shash("md5", 0, 0); - if (IS_ERR(server->secmech.md5)) { - cifs_dbg(VFS, "could not allocate crypto md5\n"); - rc = PTR_ERR(server->secmech.md5); - goto crypto_allocate_md5_fail; - } - - server->secmech.hmacsha256 = crypto_alloc_shash("hmac(sha256)", 0, 0); - if (IS_ERR(server->secmech.hmacsha256)) { - cifs_dbg(VFS, "could not allocate crypto hmacsha256\n"); - rc = PTR_ERR(server->secmech.hmacsha256); - goto crypto_allocate_hmacsha256_fail; - } - - server->secmech.cmacaes = crypto_alloc_shash("cmac(aes)", 0, 0); - if (IS_ERR(server->secmech.cmacaes)) { - cifs_dbg(VFS, "could not allocate crypto cmac-aes"); - rc = PTR_ERR(server->secmech.cmacaes); - goto crypto_allocate_cmacaes_fail; - } - - size = sizeof(struct shash_desc) + - crypto_shash_descsize(server->secmech.hmacmd5); - server->secmech.sdeschmacmd5 = kmalloc(size, GFP_KERNEL); - if (!server->secmech.sdeschmacmd5) { - rc = -ENOMEM; - goto crypto_allocate_hmacmd5_sdesc_fail; - } - server->secmech.sdeschmacmd5->shash.tfm = server->secmech.hmacmd5; - server->secmech.sdeschmacmd5->shash.flags = 0x0; - - size = sizeof(struct shash_desc) + - crypto_shash_descsize(server->secmech.md5); - server->secmech.sdescmd5 = kmalloc(size, GFP_KERNEL); - if (!server->secmech.sdescmd5) { - rc = -ENOMEM; - goto crypto_allocate_md5_sdesc_fail; - } - server->secmech.sdescmd5->shash.tfm = server->secmech.md5; - server->secmech.sdescmd5->shash.flags = 0x0; - - size = sizeof(struct shash_desc) + - crypto_shash_descsize(server->secmech.hmacsha256); - server->secmech.sdeschmacsha256 = kmalloc(size, GFP_KERNEL); - if (!server->secmech.sdeschmacsha256) { - rc = -ENOMEM; - goto crypto_allocate_hmacsha256_sdesc_fail; - } - server->secmech.sdeschmacsha256->shash.tfm = server->secmech.hmacsha256; - server->secmech.sdeschmacsha256->shash.flags = 0x0; - - size = sizeof(struct shash_desc) + - crypto_shash_descsize(server->secmech.cmacaes); - server->secmech.sdesccmacaes = kmalloc(size, GFP_KERNEL); - if (!server->secmech.sdesccmacaes) { - cifs_dbg(VFS, "%s: Can't alloc cmacaes\n", __func__); - rc = -ENOMEM; - goto crypto_allocate_cmacaes_sdesc_fail; - } - server->secmech.sdesccmacaes->shash.tfm = server->secmech.cmacaes; - server->secmech.sdesccmacaes->shash.flags = 0x0; - - return 0; - -crypto_allocate_cmacaes_sdesc_fail: - kfree(server->secmech.sdeschmacsha256); - -crypto_allocate_hmacsha256_sdesc_fail: + server->secmech.sdeschmacmd5 = NULL; kfree(server->secmech.sdescmd5); - -crypto_allocate_md5_sdesc_fail: - kfree(server->secmech.sdeschmacmd5); - -crypto_allocate_hmacmd5_sdesc_fail: - crypto_free_shash(server->secmech.cmacaes); - -crypto_allocate_cmacaes_fail: - crypto_free_shash(server->secmech.hmacsha256); - -crypto_allocate_hmacsha256_fail: - crypto_free_shash(server->secmech.md5); - -crypto_allocate_md5_fail: - crypto_free_shash(server->secmech.hmacmd5); - - return rc; + server->secmech.sdescmd5 = NULL; } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 4bdd547..85ea98d 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -147,18 +147,17 @@ cifs_read_super(struct super_block *sb) goto out_no_root; } + if (cifs_sb_master_tcon(cifs_sb)->nocase) + sb->s_d_op = &cifs_ci_dentry_ops; + else + sb->s_d_op = &cifs_dentry_ops; + sb->s_root = d_make_root(inode); if (!sb->s_root) { rc = -ENOMEM; goto out_no_root; } - /* do that *after* d_make_root() - we want NULL ->d_op for root here */ - if (cifs_sb_master_tcon(cifs_sb)->nocase) - sb->s_d_op = &cifs_ci_dentry_ops; - else - sb->s_d_op = &cifs_dentry_ops; - #ifdef CONFIG_CIFS_NFSD_EXPORT if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) { cifs_dbg(FYI, "export ops supported\n"); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index e66b088..52ca861 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -44,6 +44,7 @@ #define MAX_TREE_SIZE (2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1) #define MAX_SERVER_SIZE 15 #define MAX_SHARE_SIZE 80 +#define CIFS_MAX_DOMAINNAME_LEN 256 /* max domain name length */ #define MAX_USERNAME_SIZE 256 /* reasonable maximum for current servers */ #define MAX_PASSWORD_SIZE 512 /* max for windows seems to be 256 wide chars */ @@ -194,6 +195,7 @@ struct cifs_writedata; struct cifs_io_parms; struct cifs_search_info; struct cifsInodeInfo; +struct cifs_open_parms; struct smb_version_operations { int (*send_cancel)(struct TCP_Server_Info *, void *, @@ -307,9 +309,8 @@ struct smb_version_operations { const char *, const char *, struct cifs_sb_info *); /* open a file for non-posix mounts */ - int (*open)(const unsigned int, struct cifs_tcon *, const char *, int, - int, int, struct cifs_fid *, __u32 *, FILE_ALL_INFO *, - struct cifs_sb_info *); + int (*open)(const unsigned int, struct cifs_open_parms *, + __u32 *, FILE_ALL_INFO *); /* set fid protocol-specific info */ void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32); /* close a file */ @@ -369,6 +370,9 @@ struct smb_version_operations { void (*generate_signingkey)(struct TCP_Server_Info *server); int (*calc_signature)(struct smb_rqst *rqst, struct TCP_Server_Info *server); + int (*query_mf_symlink)(const unsigned char *path, char *pbuf, + unsigned int *pbytes_read, struct cifs_sb_info *cifs_sb, + unsigned int xid); }; struct smb_version_values { @@ -912,6 +916,17 @@ struct cifs_search_info { bool smallBuf:1; /* so we know which buf_release function to call */ }; +struct cifs_open_parms { + struct cifs_tcon *tcon; + struct cifs_sb_info *cifs_sb; + int disposition; + int desired_access; + int create_options; + const char *path; + struct cifs_fid *fid; + bool reconnect:1; +}; + struct cifs_fid { __u16 netfid; #ifdef CONFIG_CIFS_SMB2 diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index c8ff018..b29a012 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -433,7 +433,6 @@ extern int SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *, const struct nls_table *); extern int setup_ntlm_response(struct cifs_ses *, const struct nls_table *); extern int setup_ntlmv2_rsp(struct cifs_ses *, const struct nls_table *); -extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *); extern void cifs_crypto_shash_release(struct TCP_Server_Info *); extern int calc_seckey(struct cifs_ses *); extern void generate_smb3signingkey(struct TCP_Server_Info *); @@ -498,5 +497,7 @@ void cifs_writev_complete(struct work_struct *work); struct cifs_writedata *cifs_writedata_alloc(unsigned int nr_pages, work_func_t complete); void cifs_writedata_release(struct kref *refcount); - +int open_query_close_cifs_symlink(const unsigned char *path, char *pbuf, + unsigned int *pbytes_read, struct cifs_sb_info *cifs_sb, + unsigned int xid); #endif /* _CIFSPROTO_H */ diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index afcb8a1..d67c550 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1675,7 +1675,8 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, if (string == NULL) goto out_nomem; - if (strnlen(string, 256) == 256) { + if (strnlen(string, CIFS_MAX_DOMAINNAME_LEN) + == CIFS_MAX_DOMAINNAME_LEN) { printk(KERN_WARNING "CIFS: domain name too" " long\n"); goto cifs_parse_mount_err; @@ -2108,12 +2109,6 @@ cifs_get_tcp_session(struct smb_vol *volume_info) goto out_err; } - rc = cifs_crypto_shash_allocate(tcp_ses); - if (rc) { - cifs_dbg(VFS, "could not setup hash structures rc %d\n", rc); - goto out_err; - } - tcp_ses->ops = volume_info->ops; tcp_ses->vals = volume_info->vals; cifs_set_net_ns(tcp_ses, get_net(current->nsproxy->net_ns)); @@ -2282,8 +2277,8 @@ cifs_put_smb_ses(struct cifs_ses *ses) #ifdef CONFIG_KEYS -/* strlen("cifs:a:") + INET6_ADDRSTRLEN + 1 */ -#define CIFSCREDS_DESC_SIZE (7 + INET6_ADDRSTRLEN + 1) +/* strlen("cifs:a:") + CIFS_MAX_DOMAINNAME_LEN + 1 */ +#define CIFSCREDS_DESC_SIZE (7 + CIFS_MAX_DOMAINNAME_LEN + 1) /* Populate username and pw fields from keyring if possible */ static int diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 5175aeb..d62ce0d 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -204,6 +204,7 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, struct inode *newinode = NULL; int disposition; struct TCP_Server_Info *server = tcon->ses->server; + struct cifs_open_parms oparms; *oplock = 0; if (tcon->ses->server->oplocks) @@ -319,9 +320,16 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid, if (backup_cred(cifs_sb)) create_options |= CREATE_OPEN_BACKUP_INTENT; - rc = server->ops->open(xid, tcon, full_path, disposition, - desired_access, create_options, fid, oplock, - buf, cifs_sb); + oparms.tcon = tcon; + oparms.cifs_sb = cifs_sb; + oparms.desired_access = desired_access; + oparms.create_options = create_options; + oparms.disposition = disposition; + oparms.path = full_path; + oparms.fid = fid; + oparms.reconnect = false; + + rc = server->ops->open(xid, &oparms, oplock, buf); if (rc) { cifs_dbg(FYI, "cifs_create returned 0x%x\n", rc); goto out; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 91d8629..7e36ae3 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -183,6 +183,7 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, int create_options = CREATE_NOT_DIR; FILE_ALL_INFO *buf; struct TCP_Server_Info *server = tcon->ses->server; + struct cifs_open_parms oparms; if (!server->ops->open) return -ENOSYS; @@ -224,9 +225,16 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb, if (backup_cred(cifs_sb)) create_options |= CREATE_OPEN_BACKUP_INTENT; - rc = server->ops->open(xid, tcon, full_path, disposition, - desired_access, create_options, fid, oplock, buf, - cifs_sb); + oparms.tcon = tcon; + oparms.cifs_sb = cifs_sb; + oparms.desired_access = desired_access; + oparms.create_options = create_options; + oparms.disposition = disposition; + oparms.path = full_path; + oparms.fid = fid; + oparms.reconnect = false; + + rc = server->ops->open(xid, &oparms, oplock, buf); if (rc) goto out; @@ -553,11 +561,10 @@ cifs_relock_file(struct cifsFileInfo *cfile) struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); int rc = 0; - /* we are going to update can_cache_brlcks here - need a write access */ - down_write(&cinode->lock_sem); + down_read(&cinode->lock_sem); if (cinode->can_cache_brlcks) { - /* can cache locks - no need to push them */ - up_write(&cinode->lock_sem); + /* can cache locks - no need to relock */ + up_read(&cinode->lock_sem); return rc; } @@ -568,7 +575,7 @@ cifs_relock_file(struct cifsFileInfo *cfile) else rc = tcon->ses->server->ops->push_mand_locks(cfile); - up_write(&cinode->lock_sem); + up_read(&cinode->lock_sem); return rc; } @@ -587,7 +594,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) int desired_access; int disposition = FILE_OPEN; int create_options = CREATE_NOT_DIR; - struct cifs_fid fid; + struct cifs_open_parms oparms; xid = get_xid(); mutex_lock(&cfile->fh_mutex); @@ -637,9 +644,10 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) rc = cifs_posix_open(full_path, NULL, inode->i_sb, cifs_sb->mnt_file_mode /* ignored */, - oflags, &oplock, &fid.netfid, xid); + oflags, &oplock, &cfile->fid.netfid, xid); if (rc == 0) { cifs_dbg(FYI, "posix reopen succeeded\n"); + oparms.reconnect = true; goto reopen_success; } /* @@ -654,7 +662,16 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) create_options |= CREATE_OPEN_BACKUP_INTENT; if (server->ops->get_lease_key) - server->ops->get_lease_key(inode, &fid); + server->ops->get_lease_key(inode, &cfile->fid); + + oparms.tcon = tcon; + oparms.cifs_sb = cifs_sb; + oparms.desired_access = desired_access; + oparms.create_options = create_options; + oparms.disposition = disposition; + oparms.path = full_path; + oparms.fid = &cfile->fid; + oparms.reconnect = true; /* * Can not refresh inode by passing in file_info buf to be returned by @@ -663,9 +680,14 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) * version of file size can be stale. If we knew for sure that inode was * not dirty locally we could do this. */ - rc = server->ops->open(xid, tcon, full_path, disposition, - desired_access, create_options, &fid, &oplock, - NULL, cifs_sb); + rc = server->ops->open(xid, &oparms, &oplock, NULL); + if (rc == -ENOENT && oparms.reconnect == false) { + /* durable handle timeout is expired - open the file again */ + rc = server->ops->open(xid, &oparms, &oplock, NULL); + /* indicate that we need to relock the file */ + oparms.reconnect = true; + } + if (rc) { mutex_unlock(&cfile->fh_mutex); cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc); @@ -696,8 +718,9 @@ reopen_success: * to the server to get the new inode info. */ - server->ops->set_fid(cfile, &fid, oplock); - cifs_relock_file(cfile); + server->ops->set_fid(cfile, &cfile->fid, oplock); + if (oparms.reconnect) + cifs_relock_file(cfile); reopen_error_exit: kfree(full_path); diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 20efd81..449b6cf 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -558,6 +558,11 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, fattr->cf_mode &= ~(S_IWUGO); fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks); + if (fattr->cf_nlink < 1) { + cifs_dbg(1, "replacing bogus file nlink value %u\n", + fattr->cf_nlink); + fattr->cf_nlink = 1; + } } fattr->cf_uid = cifs_sb->mnt_uid; diff --git a/fs/cifs/link.c b/fs/cifs/link.c index b83c3f5..562044f 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -305,67 +305,89 @@ CIFSCouldBeMFSymlink(const struct cifs_fattr *fattr) } int -CIFSCheckMFSymlink(struct cifs_fattr *fattr, - const unsigned char *path, - struct cifs_sb_info *cifs_sb, unsigned int xid) +open_query_close_cifs_symlink(const unsigned char *path, char *pbuf, + unsigned int *pbytes_read, struct cifs_sb_info *cifs_sb, + unsigned int xid) { int rc; int oplock = 0; __u16 netfid = 0; struct tcon_link *tlink; - struct cifs_tcon *pTcon; + struct cifs_tcon *ptcon; struct cifs_io_parms io_parms; - u8 *buf; - char *pbuf; - unsigned int bytes_read = 0; int buf_type = CIFS_NO_BUFFER; - unsigned int link_len = 0; FILE_ALL_INFO file_info; - if (!CIFSCouldBeMFSymlink(fattr)) - /* it's not a symlink */ - return 0; - tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) return PTR_ERR(tlink); - pTcon = tlink_tcon(tlink); + ptcon = tlink_tcon(tlink); - rc = CIFSSMBOpen(xid, pTcon, path, FILE_OPEN, GENERIC_READ, + rc = CIFSSMBOpen(xid, ptcon, path, FILE_OPEN, GENERIC_READ, CREATE_NOT_DIR, &netfid, &oplock, &file_info, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - if (rc != 0) - goto out; + if (rc != 0) { + cifs_put_tlink(tlink); + return rc; + } if (file_info.EndOfFile != cpu_to_le64(CIFS_MF_SYMLINK_FILE_SIZE)) { - CIFSSMBClose(xid, pTcon, netfid); + CIFSSMBClose(xid, ptcon, netfid); + cifs_put_tlink(tlink); /* it's not a symlink */ - goto out; + return rc; } - buf = kmalloc(CIFS_MF_SYMLINK_FILE_SIZE, GFP_KERNEL); - if (!buf) { - rc = -ENOMEM; - goto out; - } - pbuf = buf; io_parms.netfid = netfid; io_parms.pid = current->tgid; - io_parms.tcon = pTcon; + io_parms.tcon = ptcon; io_parms.offset = 0; io_parms.length = CIFS_MF_SYMLINK_FILE_SIZE; - rc = CIFSSMBRead(xid, &io_parms, &bytes_read, &pbuf, &buf_type); - CIFSSMBClose(xid, pTcon, netfid); - if (rc != 0) { - kfree(buf); + rc = CIFSSMBRead(xid, &io_parms, pbytes_read, &pbuf, &buf_type); + CIFSSMBClose(xid, ptcon, netfid); + cifs_put_tlink(tlink); + return rc; +} + + +int +CIFSCheckMFSymlink(struct cifs_fattr *fattr, + const unsigned char *path, + struct cifs_sb_info *cifs_sb, unsigned int xid) +{ + int rc = 0; + u8 *buf = NULL; + unsigned int link_len = 0; + unsigned int bytes_read = 0; + struct cifs_tcon *ptcon; + + if (!CIFSCouldBeMFSymlink(fattr)) + /* it's not a symlink */ + return 0; + + buf = kmalloc(CIFS_MF_SYMLINK_FILE_SIZE, GFP_KERNEL); + if (!buf) { + rc = -ENOMEM; goto out; } + ptcon = tlink_tcon(cifs_sb_tlink(cifs_sb)); + if ((ptcon->ses) && (ptcon->ses->server->ops->query_mf_symlink)) + rc = ptcon->ses->server->ops->query_mf_symlink(path, buf, + &bytes_read, cifs_sb, xid); + else + goto out; + + if (rc != 0) + goto out; + + if (bytes_read == 0) /* not a symlink */ + goto out; + rc = CIFSParseMFSymlink(buf, bytes_read, &link_len, NULL); - kfree(buf); if (rc == -EINVAL) { /* it's not a symlink */ rc = 0; @@ -381,7 +403,7 @@ CIFSCheckMFSymlink(struct cifs_fattr *fattr, fattr->cf_mode |= S_IFLNK | S_IRWXU | S_IRWXG | S_IRWXO; fattr->cf_dtype = DT_LNK; out: - cifs_put_tlink(tlink); + kfree(buf); return rc; } diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index ab87784..69d2c82 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -111,6 +111,14 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name, return; } + /* + * If we know that the inode will need to be revalidated immediately, + * then don't create a new dentry for it. We'll end up doing an on + * the wire call either way and this spares us an invalidation. + */ + if (fattr->cf_flags & CIFS_FATTR_NEED_REVAL) + return; + dentry = d_alloc(parent, name); if (!dentry) return; diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 79358e3..08dd37b 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -197,7 +197,7 @@ static void unicode_domain_string(char **pbcc_area, struct cifs_ses *ses, bytes_ret = 0; } else bytes_ret = cifs_strtoUTF16((__le16 *) bcc_ptr, ses->domainName, - 256, nls_cp); + CIFS_MAX_DOMAINNAME_LEN, nls_cp); bcc_ptr += 2 * bytes_ret; bcc_ptr += 2; /* account for null terminator */ @@ -255,8 +255,8 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifs_ses *ses, /* copy domain */ if (ses->domainName != NULL) { - strncpy(bcc_ptr, ses->domainName, 256); - bcc_ptr += strnlen(ses->domainName, 256); + strncpy(bcc_ptr, ses->domainName, CIFS_MAX_DOMAINNAME_LEN); + bcc_ptr += strnlen(ses->domainName, CIFS_MAX_DOMAINNAME_LEN); } /* else we will send a null domain name so the server will default to its own domain */ *bcc_ptr = 0; diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index e813f04..6094397 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -674,20 +674,23 @@ cifs_mkdir_setinfo(struct inode *inode, const char *full_path, } static int -cifs_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path, - int disposition, int desired_access, int create_options, - struct cifs_fid *fid, __u32 *oplock, FILE_ALL_INFO *buf, - struct cifs_sb_info *cifs_sb) -{ - if (!(tcon->ses->capabilities & CAP_NT_SMBS)) - return SMBLegacyOpen(xid, tcon, path, disposition, - desired_access, create_options, - &fid->netfid, oplock, buf, - cifs_sb->local_nls, cifs_sb->mnt_cifs_flags +cifs_open_file(const unsigned int xid, struct cifs_open_parms *oparms, + __u32 *oplock, FILE_ALL_INFO *buf) +{ + if (!(oparms->tcon->ses->capabilities & CAP_NT_SMBS)) + return SMBLegacyOpen(xid, oparms->tcon, oparms->path, + oparms->disposition, + oparms->desired_access, + oparms->create_options, + &oparms->fid->netfid, oplock, buf, + oparms->cifs_sb->local_nls, + oparms->cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - return CIFSSMBOpen(xid, tcon, path, disposition, desired_access, - create_options, &fid->netfid, oplock, buf, - cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & + return CIFSSMBOpen(xid, oparms->tcon, oparms->path, + oparms->disposition, oparms->desired_access, + oparms->create_options, &oparms->fid->netfid, oplock, + buf, oparms->cifs_sb->local_nls, + oparms->cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); } @@ -941,6 +944,7 @@ struct smb_version_operations smb1_operations = { .mand_lock = cifs_mand_lock, .mand_unlock_range = cifs_unlock_range, .push_mand_locks = cifs_push_mandatory_locks, + .query_mf_symlink = open_query_close_cifs_symlink, }; struct smb_version_values smb1_values = { diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index 5da1b55..04a81a4 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -40,7 +40,8 @@ smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock) oplock &= 0xFF; if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE) return; - if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE) { + if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE || + oplock == SMB2_OPLOCK_LEVEL_BATCH) { cinode->clientCanCacheAll = true; cinode->clientCanCacheRead = true; cifs_dbg(FYI, "Exclusive Oplock granted on inode %p\n", @@ -57,17 +58,16 @@ smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock) } int -smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path, - int disposition, int desired_access, int create_options, - struct cifs_fid *fid, __u32 *oplock, FILE_ALL_INFO *buf, - struct cifs_sb_info *cifs_sb) +smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, + __u32 *oplock, FILE_ALL_INFO *buf) { int rc; __le16 *smb2_path; struct smb2_file_all_info *smb2_data = NULL; __u8 smb2_oplock[17]; + struct cifs_fid *fid = oparms->fid; - smb2_path = cifs_convert_path_to_utf16(path, cifs_sb); + smb2_path = cifs_convert_path_to_utf16(oparms->path, oparms->cifs_sb); if (smb2_path == NULL) { rc = -ENOMEM; goto out; @@ -80,21 +80,19 @@ smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path, goto out; } - desired_access |= FILE_READ_ATTRIBUTES; - *smb2_oplock = SMB2_OPLOCK_LEVEL_EXCLUSIVE; + oparms->desired_access |= FILE_READ_ATTRIBUTES; + *smb2_oplock = SMB2_OPLOCK_LEVEL_BATCH; - if (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING) + if (oparms->tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING) memcpy(smb2_oplock + 1, fid->lease_key, SMB2_LEASE_KEY_SIZE); - rc = SMB2_open(xid, tcon, smb2_path, &fid->persistent_fid, - &fid->volatile_fid, desired_access, disposition, - 0, 0, smb2_oplock, smb2_data); + rc = SMB2_open(xid, oparms, smb2_path, smb2_oplock, smb2_data); if (rc) goto out; if (buf) { /* open response does not have IndexNumber field - get it */ - rc = SMB2_get_srv_num(xid, tcon, fid->persistent_fid, + rc = SMB2_get_srv_num(xid, oparms->tcon, fid->persistent_fid, fid->volatile_fid, &smb2_data->IndexNumber); if (rc) { diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index fff6dfb..c6ec163 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -41,21 +41,26 @@ static int smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, __u32 desired_access, __u32 create_disposition, - __u32 file_attributes, __u32 create_options, - void *data, int command) + __u32 create_options, void *data, int command) { int rc, tmprc = 0; - u64 persistent_fid, volatile_fid; __le16 *utf16_path; __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; + struct cifs_open_parms oparms; + struct cifs_fid fid; utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb); if (!utf16_path) return -ENOMEM; - rc = SMB2_open(xid, tcon, utf16_path, &persistent_fid, &volatile_fid, - desired_access, create_disposition, file_attributes, - create_options, &oplock, NULL); + oparms.tcon = tcon; + oparms.desired_access = desired_access; + oparms.disposition = create_disposition; + oparms.create_options = create_options; + oparms.fid = &fid; + oparms.reconnect = false; + + rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL); if (rc) { kfree(utf16_path); return rc; @@ -65,8 +70,8 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon, case SMB2_OP_DELETE: break; case SMB2_OP_QUERY_INFO: - tmprc = SMB2_query_info(xid, tcon, persistent_fid, - volatile_fid, + tmprc = SMB2_query_info(xid, tcon, fid.persistent_fid, + fid.volatile_fid, (struct smb2_file_all_info *)data); break; case SMB2_OP_MKDIR: @@ -76,19 +81,21 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon, */ break; case SMB2_OP_RENAME: - tmprc = SMB2_rename(xid, tcon, persistent_fid, volatile_fid, - (__le16 *)data); + tmprc = SMB2_rename(xid, tcon, fid.persistent_fid, + fid.volatile_fid, (__le16 *)data); break; case SMB2_OP_HARDLINK: - tmprc = SMB2_set_hardlink(xid, tcon, persistent_fid, - volatile_fid, (__le16 *)data); + tmprc = SMB2_set_hardlink(xid, tcon, fid.persistent_fid, + fid.volatile_fid, (__le16 *)data); break; case SMB2_OP_SET_EOF: - tmprc = SMB2_set_eof(xid, tcon, persistent_fid, volatile_fid, - current->tgid, (__le64 *)data); + tmprc = SMB2_set_eof(xid, tcon, fid.persistent_fid, + fid.volatile_fid, current->tgid, + (__le64 *)data); break; case SMB2_OP_SET_INFO: - tmprc = SMB2_set_info(xid, tcon, persistent_fid, volatile_fid, + tmprc = SMB2_set_info(xid, tcon, fid.persistent_fid, + fid.volatile_fid, (FILE_BASIC_INFO *)data); break; default: @@ -96,7 +103,7 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon, break; } - rc = SMB2_close(xid, tcon, persistent_fid, volatile_fid); + rc = SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); if (tmprc) rc = tmprc; kfree(utf16_path); @@ -129,8 +136,8 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, return -ENOMEM; rc = smb2_open_op_close(xid, tcon, cifs_sb, full_path, - FILE_READ_ATTRIBUTES, FILE_OPEN, 0, 0, - smb2_data, SMB2_OP_QUERY_INFO); + FILE_READ_ATTRIBUTES, FILE_OPEN, 0, smb2_data, + SMB2_OP_QUERY_INFO); if (rc) goto out; @@ -145,7 +152,7 @@ smb2_mkdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb) { return smb2_open_op_close(xid, tcon, cifs_sb, name, - FILE_WRITE_ATTRIBUTES, FILE_CREATE, 0, + FILE_WRITE_ATTRIBUTES, FILE_CREATE, CREATE_NOT_FILE, NULL, SMB2_OP_MKDIR); } @@ -164,7 +171,7 @@ smb2_mkdir_setinfo(struct inode *inode, const char *name, dosattrs = cifs_i->cifsAttrs | ATTR_READONLY; data.Attributes = cpu_to_le32(dosattrs); tmprc = smb2_open_op_close(xid, tcon, cifs_sb, name, - FILE_WRITE_ATTRIBUTES, FILE_CREATE, 0, + FILE_WRITE_ATTRIBUTES, FILE_CREATE, CREATE_NOT_FILE, &data, SMB2_OP_SET_INFO); if (tmprc == 0) cifs_i->cifsAttrs = dosattrs; @@ -175,7 +182,7 @@ smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb) { return smb2_open_op_close(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN, - 0, CREATE_NOT_FILE | CREATE_DELETE_ON_CLOSE, + CREATE_NOT_FILE | CREATE_DELETE_ON_CLOSE, NULL, SMB2_OP_DELETE); } @@ -184,7 +191,7 @@ smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *cifs_sb) { return smb2_open_op_close(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN, - 0, CREATE_DELETE_ON_CLOSE, NULL, + CREATE_DELETE_ON_CLOSE, NULL, SMB2_OP_DELETE); } @@ -203,7 +210,7 @@ smb2_set_path_attr(const unsigned int xid, struct cifs_tcon *tcon, } rc = smb2_open_op_close(xid, tcon, cifs_sb, from_name, access, - FILE_OPEN, 0, 0, smb2_to_name, command); + FILE_OPEN, 0, smb2_to_name, command); smb2_rename_path: kfree(smb2_to_name); return rc; @@ -234,7 +241,7 @@ smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon, { __le64 eof = cpu_to_le64(size); return smb2_open_op_close(xid, tcon, cifs_sb, full_path, - FILE_WRITE_DATA, FILE_OPEN, 0, 0, &eof, + FILE_WRITE_DATA, FILE_OPEN, 0, &eof, SMB2_OP_SET_EOF); } @@ -250,7 +257,7 @@ smb2_set_file_info(struct inode *inode, const char *full_path, if (IS_ERR(tlink)) return PTR_ERR(tlink); rc = smb2_open_op_close(xid, tlink_tcon(tlink), cifs_sb, full_path, - FILE_WRITE_ATTRIBUTES, FILE_OPEN, 0, 0, buf, + FILE_WRITE_ATTRIBUTES, FILE_OPEN, 0, buf, SMB2_OP_SET_INFO); cifs_put_tlink(tlink); return rc; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 6d15cab..f259e6c 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -213,22 +213,29 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path) { int rc; - __u64 persistent_fid, volatile_fid; __le16 *utf16_path; __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; + struct cifs_open_parms oparms; + struct cifs_fid fid; utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb); if (!utf16_path) return -ENOMEM; - rc = SMB2_open(xid, tcon, utf16_path, &persistent_fid, &volatile_fid, - FILE_READ_ATTRIBUTES, FILE_OPEN, 0, 0, &oplock, NULL); + oparms.tcon = tcon; + oparms.desired_access = FILE_READ_ATTRIBUTES; + oparms.disposition = FILE_OPEN; + oparms.create_options = 0; + oparms.fid = &fid; + oparms.reconnect = false; + + rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL); if (rc) { kfree(utf16_path); return rc; } - rc = SMB2_close(xid, tcon, persistent_fid, volatile_fid); + rc = SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); kfree(utf16_path); return rc; } @@ -443,15 +450,20 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, __le16 *utf16_path; int rc; __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; - __u64 persistent_fid, volatile_fid; + struct cifs_open_parms oparms; utf16_path = cifs_convert_path_to_utf16(path, cifs_sb); if (!utf16_path) return -ENOMEM; - rc = SMB2_open(xid, tcon, utf16_path, &persistent_fid, &volatile_fid, - FILE_READ_ATTRIBUTES | FILE_READ_DATA, FILE_OPEN, 0, 0, - &oplock, NULL); + oparms.tcon = tcon; + oparms.desired_access = FILE_READ_ATTRIBUTES | FILE_READ_DATA; + oparms.disposition = FILE_OPEN; + oparms.create_options = 0; + oparms.fid = fid; + oparms.reconnect = false; + + rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL); kfree(utf16_path); if (rc) { cifs_dbg(VFS, "open dir failed\n"); @@ -460,14 +472,12 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, srch_inf->entries_in_buffer = 0; srch_inf->index_of_last_entry = 0; - fid->persistent_fid = persistent_fid; - fid->volatile_fid = volatile_fid; - rc = SMB2_query_directory(xid, tcon, persistent_fid, volatile_fid, 0, - srch_inf); + rc = SMB2_query_directory(xid, tcon, fid->persistent_fid, + fid->volatile_fid, 0, srch_inf); if (rc) { cifs_dbg(VFS, "query directory failed\n"); - SMB2_close(xid, tcon, persistent_fid, volatile_fid); + SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid); } return rc; } @@ -528,17 +538,25 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon, struct kstatfs *buf) { int rc; - u64 persistent_fid, volatile_fid; __le16 srch_path = 0; /* Null - open root of share */ u8 oplock = SMB2_OPLOCK_LEVEL_NONE; + struct cifs_open_parms oparms; + struct cifs_fid fid; + + oparms.tcon = tcon; + oparms.desired_access = FILE_READ_ATTRIBUTES; + oparms.disposition = FILE_OPEN; + oparms.create_options = 0; + oparms.fid = &fid; + oparms.reconnect = false; - rc = SMB2_open(xid, tcon, &srch_path, &persistent_fid, &volatile_fid, - FILE_READ_ATTRIBUTES, FILE_OPEN, 0, 0, &oplock, NULL); + rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL); if (rc) return rc; buf->f_type = SMB2_MAGIC_NUMBER; - rc = SMB2_QFS_info(xid, tcon, persistent_fid, volatile_fid, buf); - SMB2_close(xid, tcon, persistent_fid, volatile_fid); + rc = SMB2_QFS_info(xid, tcon, fid.persistent_fid, fid.volatile_fid, + buf); + SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); return rc; } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 2b312e4..abc9c28 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -847,29 +847,76 @@ create_lease_buf(u8 *lease_key, u8 oplock) return buf; } +static struct create_durable * +create_durable_buf(void) +{ + struct create_durable *buf; + + buf = kzalloc(sizeof(struct create_durable), GFP_KERNEL); + if (!buf) + return NULL; + + buf->ccontext.DataOffset = cpu_to_le16(offsetof + (struct create_durable, Data)); + buf->ccontext.DataLength = cpu_to_le32(16); + buf->ccontext.NameOffset = cpu_to_le16(offsetof + (struct create_durable, Name)); + buf->ccontext.NameLength = cpu_to_le16(4); + buf->Name[0] = 'D'; + buf->Name[1] = 'H'; + buf->Name[2] = 'n'; + buf->Name[3] = 'Q'; + return buf; +} + +static struct create_durable * +create_reconnect_durable_buf(struct cifs_fid *fid) +{ + struct create_durable *buf; + + buf = kzalloc(sizeof(struct create_durable), GFP_KERNEL); + if (!buf) + return NULL; + + buf->ccontext.DataOffset = cpu_to_le16(offsetof + (struct create_durable, Data)); + buf->ccontext.DataLength = cpu_to_le32(16); + buf->ccontext.NameOffset = cpu_to_le16(offsetof + (struct create_durable, Name)); + buf->ccontext.NameLength = cpu_to_le16(4); + buf->Data.Fid.PersistentFileId = fid->persistent_fid; + buf->Data.Fid.VolatileFileId = fid->volatile_fid; + buf->Name[0] = 'D'; + buf->Name[1] = 'H'; + buf->Name[2] = 'n'; + buf->Name[3] = 'C'; + return buf; +} + static __u8 parse_lease_state(struct smb2_create_rsp *rsp) { char *data_offset; struct create_lease *lc; bool found = false; + unsigned int next = 0; + char *name; - data_offset = (char *)rsp; - data_offset += 4 + le32_to_cpu(rsp->CreateContextsOffset); + data_offset = (char *)rsp + 4 + le32_to_cpu(rsp->CreateContextsOffset); lc = (struct create_lease *)data_offset; do { - char *name = le16_to_cpu(lc->ccontext.NameOffset) + (char *)lc; + lc = (struct create_lease *)((char *)lc + next); + name = le16_to_cpu(lc->ccontext.NameOffset) + (char *)lc; if (le16_to_cpu(lc->ccontext.NameLength) != 4 || strncmp(name, "RqLs", 4)) { - lc = (struct create_lease *)((char *)lc - + le32_to_cpu(lc->ccontext.Next)); + next = le32_to_cpu(lc->ccontext.Next); continue; } if (lc->lcontext.LeaseFlags & SMB2_LEASE_FLAG_BREAK_IN_PROGRESS) return SMB2_OPLOCK_LEVEL_NOCHANGE; found = true; break; - } while (le32_to_cpu(lc->ccontext.Next) != 0); + } while (next != 0); if (!found) return 0; @@ -877,23 +924,74 @@ parse_lease_state(struct smb2_create_rsp *rsp) return smb2_map_lease_to_oplock(lc->lcontext.LeaseState); } +static int +add_lease_context(struct kvec *iov, unsigned int *num_iovec, __u8 *oplock) +{ + struct smb2_create_req *req = iov[0].iov_base; + unsigned int num = *num_iovec; + + iov[num].iov_base = create_lease_buf(oplock+1, *oplock); + if (iov[num].iov_base == NULL) + return -ENOMEM; + iov[num].iov_len = sizeof(struct create_lease); + req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_LEASE; + if (!req->CreateContextsOffset) + req->CreateContextsOffset = cpu_to_le32( + sizeof(struct smb2_create_req) - 4 + + iov[num - 1].iov_len); + req->CreateContextsLength = cpu_to_le32( + le32_to_cpu(req->CreateContextsLength) + + sizeof(struct create_lease)); + inc_rfc1001_len(&req->hdr, sizeof(struct create_lease)); + *num_iovec = num + 1; + return 0; +} + +static int +add_durable_context(struct kvec *iov, unsigned int *num_iovec, + struct cifs_open_parms *oparms) +{ + struct smb2_create_req *req = iov[0].iov_base; + unsigned int num = *num_iovec; + + if (oparms->reconnect) { + iov[num].iov_base = create_reconnect_durable_buf(oparms->fid); + /* indicate that we don't need to relock the file */ + oparms->reconnect = false; + } else + iov[num].iov_base = create_durable_buf(); + if (iov[num].iov_base == NULL) + return -ENOMEM; + iov[num].iov_len = sizeof(struct create_durable); + if (!req->CreateContextsOffset) + req->CreateContextsOffset = + cpu_to_le32(sizeof(struct smb2_create_req) - 4 + + iov[1].iov_len); + req->CreateContextsLength = + cpu_to_le32(le32_to_cpu(req->CreateContextsLength) + + sizeof(struct create_durable)); + inc_rfc1001_len(&req->hdr, sizeof(struct create_durable)); + *num_iovec = num + 1; + return 0; +} + int -SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, - u64 *persistent_fid, u64 *volatile_fid, __u32 desired_access, - __u32 create_disposition, __u32 file_attributes, __u32 create_options, +SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path, __u8 *oplock, struct smb2_file_all_info *buf) { struct smb2_create_req *req; struct smb2_create_rsp *rsp; struct TCP_Server_Info *server; + struct cifs_tcon *tcon = oparms->tcon; struct cifs_ses *ses = tcon->ses; - struct kvec iov[3]; + struct kvec iov[4]; int resp_buftype; int uni_path_len; __le16 *copy_path = NULL; int copy_size; int rc = 0; - int num_iovecs = 2; + unsigned int num_iovecs = 2; + __u32 file_attributes = 0; cifs_dbg(FYI, "create/open\n"); @@ -906,55 +1004,47 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, if (rc) return rc; + if (oparms->create_options & CREATE_OPTION_READONLY) + file_attributes |= ATTR_READONLY; + req->ImpersonationLevel = IL_IMPERSONATION; - req->DesiredAccess = cpu_to_le32(desired_access); + req->DesiredAccess = cpu_to_le32(oparms->desired_access); /* File attributes ignored on open (used in create though) */ req->FileAttributes = cpu_to_le32(file_attributes); req->ShareAccess = FILE_SHARE_ALL_LE; - req->CreateDisposition = cpu_to_le32(create_disposition); - req->CreateOptions = cpu_to_le32(create_options); + req->CreateDisposition = cpu_to_le32(oparms->disposition); + req->CreateOptions = cpu_to_le32(oparms->create_options & CREATE_OPTIONS_MASK); uni_path_len = (2 * UniStrnlen((wchar_t *)path, PATH_MAX)) + 2; - req->NameOffset = cpu_to_le16(sizeof(struct smb2_create_req) - - 8 /* pad */ - 4 /* do not count rfc1001 len field */); + /* do not count rfc1001 len field */ + req->NameOffset = cpu_to_le16(sizeof(struct smb2_create_req) - 4); iov[0].iov_base = (char *)req; /* 4 for rfc1002 length field */ iov[0].iov_len = get_rfc1002_length(req) + 4; /* MUST set path len (NameLength) to 0 opening root of share */ - if (uni_path_len >= 4) { - req->NameLength = cpu_to_le16(uni_path_len - 2); - /* -1 since last byte is buf[0] which is sent below (path) */ - iov[0].iov_len--; - if (uni_path_len % 8 != 0) { - copy_size = uni_path_len / 8 * 8; - if (copy_size < uni_path_len) - copy_size += 8; - - copy_path = kzalloc(copy_size, GFP_KERNEL); - if (!copy_path) - return -ENOMEM; - memcpy((char *)copy_path, (const char *)path, - uni_path_len); - uni_path_len = copy_size; - path = copy_path; - } - - iov[1].iov_len = uni_path_len; - iov[1].iov_base = path; - /* - * -1 since last byte is buf[0] which was counted in - * smb2_buf_len. - */ - inc_rfc1001_len(req, uni_path_len - 1); - } else { - iov[0].iov_len += 7; - req->hdr.smb2_buf_length = cpu_to_be32(be32_to_cpu( - req->hdr.smb2_buf_length) + 8 - 1); - num_iovecs = 1; - req->NameLength = 0; + req->NameLength = cpu_to_le16(uni_path_len - 2); + /* -1 since last byte is buf[0] which is sent below (path) */ + iov[0].iov_len--; + if (uni_path_len % 8 != 0) { + copy_size = uni_path_len / 8 * 8; + if (copy_size < uni_path_len) + copy_size += 8; + + copy_path = kzalloc(copy_size, GFP_KERNEL); + if (!copy_path) + return -ENOMEM; + memcpy((char *)copy_path, (const char *)path, + uni_path_len); + uni_path_len = copy_size; + path = copy_path; } + iov[1].iov_len = uni_path_len; + iov[1].iov_base = path; + /* -1 since last byte is buf[0] which was counted in smb2_buf_len */ + inc_rfc1001_len(req, uni_path_len - 1); + if (!server->oplocks) *oplock = SMB2_OPLOCK_LEVEL_NONE; @@ -962,21 +1052,29 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, *oplock == SMB2_OPLOCK_LEVEL_NONE) req->RequestedOplockLevel = *oplock; else { - iov[num_iovecs].iov_base = create_lease_buf(oplock+1, *oplock); - if (iov[num_iovecs].iov_base == NULL) { + rc = add_lease_context(iov, &num_iovecs, oplock); + if (rc) { cifs_small_buf_release(req); kfree(copy_path); - return -ENOMEM; + return rc; + } + } + + if (*oplock == SMB2_OPLOCK_LEVEL_BATCH) { + /* need to set Next field of lease context if we request it */ + if (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING) { + struct create_context *ccontext = + (struct create_context *)iov[num_iovecs-1].iov_base; + ccontext->Next = + cpu_to_le32(sizeof(struct create_lease)); + } + rc = add_durable_context(iov, &num_iovecs, oparms); + if (rc) { + cifs_small_buf_release(req); + kfree(copy_path); + kfree(iov[num_iovecs-1].iov_base); + return rc; } - iov[num_iovecs].iov_len = sizeof(struct create_lease); - req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_LEASE; - req->CreateContextsOffset = cpu_to_le32( - sizeof(struct smb2_create_req) - 4 - 8 + - iov[num_iovecs-1].iov_len); - req->CreateContextsLength = cpu_to_le32( - sizeof(struct create_lease)); - inc_rfc1001_len(&req->hdr, sizeof(struct create_lease)); - num_iovecs++; } rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buftype, 0); @@ -987,8 +1085,8 @@ SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, __le16 *path, goto creat_exit; } - *persistent_fid = rsp->PersistentFileId; - *volatile_fid = rsp->VolatileFileId; + oparms->fid->persistent_fid = rsp->PersistentFileId; + oparms->fid->volatile_fid = rsp->VolatileFileId; if (buf) { memcpy(buf, &rsp->CreationTime, 32); diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index f31043b..36b0d37 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -428,7 +428,7 @@ struct smb2_create_req { __le16 NameLength; __le32 CreateContextsOffset; __le32 CreateContextsLength; - __u8 Buffer[8]; + __u8 Buffer[0]; } __packed; struct smb2_create_rsp { @@ -485,6 +485,18 @@ struct create_lease { struct lease_context lcontext; } __packed; +struct create_durable { + struct create_context ccontext; + __u8 Name[8]; + union { + __u8 Reserved[16]; + struct { + __u64 PersistentFileId; + __u64 VolatileFileId; + } Fid; + } Data; +} __packed; + /* this goes in the ioctl buffer when doing a copychunk request */ struct copychunk_ioctl { char SourceKey[24]; diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index d4e1eb8..1a5ecbe 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -84,11 +84,9 @@ extern int smb2_create_hardlink(const unsigned int xid, struct cifs_tcon *tcon, const char *from_name, const char *to_name, struct cifs_sb_info *cifs_sb); -extern int smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, - const char *full_path, int disposition, - int desired_access, int create_options, - struct cifs_fid *fid, __u32 *oplock, - FILE_ALL_INFO *buf, struct cifs_sb_info *cifs_sb); +extern int smb2_open_file(const unsigned int xid, + struct cifs_open_parms *oparms, + __u32 *oplock, FILE_ALL_INFO *buf); extern void smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock); extern int smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, const unsigned int xid); @@ -106,11 +104,9 @@ extern int SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, struct cifs_tcon *tcon, const struct nls_table *); extern int SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon); -extern int SMB2_open(const unsigned int xid, struct cifs_tcon *tcon, - __le16 *path, u64 *persistent_fid, u64 *volatile_fid, - __u32 desired_access, __u32 create_disposition, - __u32 file_attributes, __u32 create_options, - __u8 *oplock, struct smb2_file_all_info *buf); +extern int SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, + __le16 *path, __u8 *oplock, + struct smb2_file_all_info *buf); extern int SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, u64 volatile_fid, u32 opcode, bool is_fsctl, char *in_data, u32 indatalen, diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index 09b4fba..4f2300d 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -39,6 +39,82 @@ #include "smb2status.h" #include "smb2glob.h" +static int +smb2_crypto_shash_allocate(struct TCP_Server_Info *server) +{ + int rc; + unsigned int size; + + if (server->secmech.sdeschmacsha256 != NULL) + return 0; /* already allocated */ + + server->secmech.hmacsha256 = crypto_alloc_shash("hmac(sha256)", 0, 0); + if (IS_ERR(server->secmech.hmacsha256)) { + cifs_dbg(VFS, "could not allocate crypto hmacsha256\n"); + rc = PTR_ERR(server->secmech.hmacsha256); + server->secmech.hmacsha256 = NULL; + return rc; + } + + size = sizeof(struct shash_desc) + + crypto_shash_descsize(server->secmech.hmacsha256); + server->secmech.sdeschmacsha256 = kmalloc(size, GFP_KERNEL); + if (!server->secmech.sdeschmacsha256) { + crypto_free_shash(server->secmech.hmacsha256); + server->secmech.hmacsha256 = NULL; + return -ENOMEM; + } + server->secmech.sdeschmacsha256->shash.tfm = server->secmech.hmacsha256; + server->secmech.sdeschmacsha256->shash.flags = 0x0; + + return 0; +} + +static int +smb3_crypto_shash_allocate(struct TCP_Server_Info *server) +{ + unsigned int size; + int rc; + + if (server->secmech.sdesccmacaes != NULL) + return 0; /* already allocated */ + + rc = smb2_crypto_shash_allocate(server); + if (rc) + return rc; + + server->secmech.cmacaes = crypto_alloc_shash("cmac(aes)", 0, 0); + if (IS_ERR(server->secmech.cmacaes)) { + cifs_dbg(VFS, "could not allocate crypto cmac-aes"); + kfree(server->secmech.sdeschmacsha256); + server->secmech.sdeschmacsha256 = NULL; + crypto_free_shash(server->secmech.hmacsha256); + server->secmech.hmacsha256 = NULL; + rc = PTR_ERR(server->secmech.cmacaes); + server->secmech.cmacaes = NULL; + return rc; + } + + size = sizeof(struct shash_desc) + + crypto_shash_descsize(server->secmech.cmacaes); + server->secmech.sdesccmacaes = kmalloc(size, GFP_KERNEL); + if (!server->secmech.sdesccmacaes) { + cifs_dbg(VFS, "%s: Can't alloc cmacaes\n", __func__); + kfree(server->secmech.sdeschmacsha256); + server->secmech.sdeschmacsha256 = NULL; + crypto_free_shash(server->secmech.hmacsha256); + crypto_free_shash(server->secmech.cmacaes); + server->secmech.hmacsha256 = NULL; + server->secmech.cmacaes = NULL; + return -ENOMEM; + } + server->secmech.sdesccmacaes->shash.tfm = server->secmech.cmacaes; + server->secmech.sdesccmacaes->shash.flags = 0x0; + + return 0; +} + + int smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) { @@ -52,6 +128,12 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) memset(smb2_signature, 0x0, SMB2_HMACSHA256_SIZE); memset(smb2_pdu->Signature, 0x0, SMB2_SIGNATURE_SIZE); + rc = smb2_crypto_shash_allocate(server); + if (rc) { + cifs_dbg(VFS, "%s: shah256 alloc failed\n", __func__); + return rc; + } + rc = crypto_shash_setkey(server->secmech.hmacsha256, server->session_key.response, SMB2_NTLMV2_SESSKEY_SIZE); if (rc) { @@ -61,7 +143,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) rc = crypto_shash_init(&server->secmech.sdeschmacsha256->shash); if (rc) { - cifs_dbg(VFS, "%s: Could not init md5\n", __func__); + cifs_dbg(VFS, "%s: Could not init sha256", __func__); return rc; } @@ -129,6 +211,12 @@ generate_smb3signingkey(struct TCP_Server_Info *server) memset(prfhash, 0x0, SMB2_HMACSHA256_SIZE); memset(server->smb3signingkey, 0x0, SMB3_SIGNKEY_SIZE); + rc = smb3_crypto_shash_allocate(server); + if (rc) { + cifs_dbg(VFS, "%s: crypto alloc failed\n", __func__); + goto smb3signkey_ret; + } + rc = crypto_shash_setkey(server->secmech.hmacsha256, server->session_key.response, SMB2_NTLMV2_SESSKEY_SIZE); if (rc) { @@ -210,6 +298,11 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) return rc; } + /* + * we already allocate sdesccmacaes when we init smb3 signing key, + * so unlike smb2 case we do not have to check here if secmech are + * initialized + */ rc = crypto_shash_init(&server->secmech.sdesccmacaes->shash); if (rc) { cifs_dbg(VFS, "%s: Could not init cmac aes\n", __func__); diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 5e7c60c1..277bd1b 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -660,19 +660,15 @@ static int create_default_group(struct config_group *parent_group, struct config_group *group) { int ret; - struct qstr name; struct configfs_dirent *sd; /* We trust the caller holds a reference to parent */ struct dentry *child, *parent = parent_group->cg_item.ci_dentry; if (!group->cg_item.ci_name) group->cg_item.ci_name = group->cg_item.ci_namebuf; - name.name = group->cg_item.ci_name; - name.len = strlen(name.name); - name.hash = full_name_hash(name.name, name.len); ret = -ENOMEM; - child = d_alloc(parent, &name); + child = d_alloc_name(parent, group->cg_item.ci_name); if (child) { d_add(child, NULL); @@ -1650,7 +1646,6 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) { int err; struct config_group *group = &subsys->su_group; - struct qstr name; struct dentry *dentry; struct dentry *root; struct configfs_dirent *sd; @@ -1667,12 +1662,8 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) mutex_lock_nested(&root->d_inode->i_mutex, I_MUTEX_PARENT); - name.name = group->cg_item.ci_name; - name.len = strlen(name.name); - name.hash = full_name_hash(name.name, name.len); - err = -ENOMEM; - dentry = d_alloc(root, &name); + dentry = d_alloc_name(root, group->cg_item.ci_name); if (dentry) { d_add(dentry, NULL); diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 4888cb3..c7c83ff 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -533,8 +533,7 @@ EXPORT_SYMBOL_GPL(debugfs_remove); */ void debugfs_remove_recursive(struct dentry *dentry) { - struct dentry *child; - struct dentry *parent; + struct dentry *child, *next, *parent; if (IS_ERR_OR_NULL(dentry)) return; @@ -544,61 +543,37 @@ void debugfs_remove_recursive(struct dentry *dentry) return; parent = dentry; + down: mutex_lock(&parent->d_inode->i_mutex); + list_for_each_entry_safe(child, next, &parent->d_subdirs, d_u.d_child) { + if (!debugfs_positive(child)) + continue; - while (1) { - /* - * When all dentries under "parent" has been removed, - * walk up the tree until we reach our starting point. - */ - if (list_empty(&parent->d_subdirs)) { - mutex_unlock(&parent->d_inode->i_mutex); - if (parent == dentry) - break; - parent = parent->d_parent; - mutex_lock(&parent->d_inode->i_mutex); - } - child = list_entry(parent->d_subdirs.next, struct dentry, - d_u.d_child); - next_sibling: - - /* - * If "child" isn't empty, walk down the tree and - * remove all its descendants first. - */ + /* perhaps simple_empty(child) makes more sense */ if (!list_empty(&child->d_subdirs)) { mutex_unlock(&parent->d_inode->i_mutex); parent = child; - mutex_lock(&parent->d_inode->i_mutex); - continue; + goto down; } - __debugfs_remove(child, parent); - if (parent->d_subdirs.next == &child->d_u.d_child) { - /* - * Try the next sibling. - */ - if (child->d_u.d_child.next != &parent->d_subdirs) { - child = list_entry(child->d_u.d_child.next, - struct dentry, - d_u.d_child); - goto next_sibling; - } - - /* - * Avoid infinite loop if we fail to remove - * one dentry. - */ - mutex_unlock(&parent->d_inode->i_mutex); - break; - } - simple_release_fs(&debugfs_mount, &debugfs_mount_count); + up: + if (!__debugfs_remove(child, parent)) + simple_release_fs(&debugfs_mount, &debugfs_mount_count); } - parent = dentry->d_parent; + mutex_unlock(&parent->d_inode->i_mutex); + child = parent; + parent = parent->d_parent; mutex_lock(&parent->d_inode->i_mutex); - __debugfs_remove(dentry, parent); + + if (child != dentry) { + next = list_entry(child->d_u.d_child.next, struct dentry, + d_u.d_child); + goto up; + } + + if (!__debugfs_remove(child, parent)) + simple_release_fs(&debugfs_mount, &debugfs_mount_count); mutex_unlock(&parent->d_inode->i_mutex); - simple_release_fs(&debugfs_mount, &debugfs_mount_count); } EXPORT_SYMBOL_GPL(debugfs_remove_recursive); diff --git a/fs/dlm/user.c b/fs/dlm/user.c index 911649a..81214911 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -686,7 +686,6 @@ static int device_close(struct inode *inode, struct file *file) device_remove_lockspace() */ sigprocmask(SIG_SETMASK, &tmpsig, NULL); - recalc_sigpending(); return 0; } diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index cfa109a..d107576 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -37,16 +37,8 @@ #include <asm/unaligned.h> #include "ecryptfs_kernel.h" -static int -ecryptfs_decrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, - struct page *dst_page, int dst_offset, - struct page *src_page, int src_offset, int size, - unsigned char *iv); -static int -ecryptfs_encrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, - struct page *dst_page, int dst_offset, - struct page *src_page, int src_offset, int size, - unsigned char *iv); +#define DECRYPT 0 +#define ENCRYPT 1 /** * ecryptfs_to_hex @@ -336,19 +328,20 @@ static void extent_crypt_complete(struct crypto_async_request *req, int rc) } /** - * encrypt_scatterlist + * crypt_scatterlist * @crypt_stat: Pointer to the crypt_stat struct to initialize. - * @dest_sg: Destination of encrypted data - * @src_sg: Data to be encrypted - * @size: Length of data to be encrypted - * @iv: iv to use during encryption + * @dst_sg: Destination of the data after performing the crypto operation + * @src_sg: Data to be encrypted or decrypted + * @size: Length of data + * @iv: IV to use + * @op: ENCRYPT or DECRYPT to indicate the desired operation * - * Returns the number of bytes encrypted; negative value on error + * Returns the number of bytes encrypted or decrypted; negative value on error */ -static int encrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, - struct scatterlist *dest_sg, - struct scatterlist *src_sg, int size, - unsigned char *iv) +static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, + struct scatterlist *dst_sg, + struct scatterlist *src_sg, int size, + unsigned char *iv, int op) { struct ablkcipher_request *req = NULL; struct extent_crypt_result ecr; @@ -391,9 +384,9 @@ static int encrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, crypt_stat->flags |= ECRYPTFS_KEY_SET; } mutex_unlock(&crypt_stat->cs_tfm_mutex); - ecryptfs_printk(KERN_DEBUG, "Encrypting [%d] bytes.\n", size); - ablkcipher_request_set_crypt(req, src_sg, dest_sg, size, iv); - rc = crypto_ablkcipher_encrypt(req); + ablkcipher_request_set_crypt(req, src_sg, dst_sg, size, iv); + rc = op == ENCRYPT ? crypto_ablkcipher_encrypt(req) : + crypto_ablkcipher_decrypt(req); if (rc == -EINPROGRESS || rc == -EBUSY) { struct extent_crypt_result *ecr = req->base.data; @@ -407,41 +400,43 @@ out: } /** - * ecryptfs_lower_offset_for_extent + * lower_offset_for_page * * Convert an eCryptfs page index into a lower byte offset */ -static void ecryptfs_lower_offset_for_extent(loff_t *offset, loff_t extent_num, - struct ecryptfs_crypt_stat *crypt_stat) +static loff_t lower_offset_for_page(struct ecryptfs_crypt_stat *crypt_stat, + struct page *page) { - (*offset) = ecryptfs_lower_header_size(crypt_stat) - + (crypt_stat->extent_size * extent_num); + return ecryptfs_lower_header_size(crypt_stat) + + (page->index << PAGE_CACHE_SHIFT); } /** - * ecryptfs_encrypt_extent - * @enc_extent_page: Allocated page into which to encrypt the data in - * @page + * crypt_extent * @crypt_stat: crypt_stat containing cryptographic context for the * encryption operation - * @page: Page containing plaintext data extent to encrypt + * @dst_page: The page to write the result into + * @src_page: The page to read from * @extent_offset: Page extent offset for use in generating IV + * @op: ENCRYPT or DECRYPT to indicate the desired operation * - * Encrypts one extent of data. + * Encrypts or decrypts one extent of data. * * Return zero on success; non-zero otherwise */ -static int ecryptfs_encrypt_extent(struct page *enc_extent_page, - struct ecryptfs_crypt_stat *crypt_stat, - struct page *page, - unsigned long extent_offset) +static int crypt_extent(struct ecryptfs_crypt_stat *crypt_stat, + struct page *dst_page, + struct page *src_page, + unsigned long extent_offset, int op) { + pgoff_t page_index = op == ENCRYPT ? src_page->index : dst_page->index; loff_t extent_base; char extent_iv[ECRYPTFS_MAX_IV_BYTES]; + struct scatterlist src_sg, dst_sg; + size_t extent_size = crypt_stat->extent_size; int rc; - extent_base = (((loff_t)page->index) - * (PAGE_CACHE_SIZE / crypt_stat->extent_size)); + extent_base = (((loff_t)page_index) * (PAGE_CACHE_SIZE / extent_size)); rc = ecryptfs_derive_iv(extent_iv, crypt_stat, (extent_base + extent_offset)); if (rc) { @@ -450,15 +445,21 @@ static int ecryptfs_encrypt_extent(struct page *enc_extent_page, (unsigned long long)(extent_base + extent_offset), rc); goto out; } - rc = ecryptfs_encrypt_page_offset(crypt_stat, enc_extent_page, 0, - page, (extent_offset - * crypt_stat->extent_size), - crypt_stat->extent_size, extent_iv); + + sg_init_table(&src_sg, 1); + sg_init_table(&dst_sg, 1); + + sg_set_page(&src_sg, src_page, extent_size, + extent_offset * extent_size); + sg_set_page(&dst_sg, dst_page, extent_size, + extent_offset * extent_size); + + rc = crypt_scatterlist(crypt_stat, &dst_sg, &src_sg, extent_size, + extent_iv, op); if (rc < 0) { - printk(KERN_ERR "%s: Error attempting to encrypt page with " - "page->index = [%ld], extent_offset = [%ld]; " - "rc = [%d]\n", __func__, page->index, extent_offset, - rc); + printk(KERN_ERR "%s: Error attempting to crypt page with " + "page_index = [%ld], extent_offset = [%ld]; " + "rc = [%d]\n", __func__, page_index, extent_offset, rc); goto out; } rc = 0; @@ -489,6 +490,7 @@ int ecryptfs_encrypt_page(struct page *page) char *enc_extent_virt; struct page *enc_extent_page = NULL; loff_t extent_offset; + loff_t lower_offset; int rc = 0; ecryptfs_inode = page->mapping->host; @@ -502,75 +504,35 @@ int ecryptfs_encrypt_page(struct page *page) "encrypted extent\n"); goto out; } - enc_extent_virt = kmap(enc_extent_page); + for (extent_offset = 0; extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); extent_offset++) { - loff_t offset; - - rc = ecryptfs_encrypt_extent(enc_extent_page, crypt_stat, page, - extent_offset); + rc = crypt_extent(crypt_stat, enc_extent_page, page, + extent_offset, ENCRYPT); if (rc) { printk(KERN_ERR "%s: Error encrypting extent; " "rc = [%d]\n", __func__, rc); goto out; } - ecryptfs_lower_offset_for_extent( - &offset, ((((loff_t)page->index) - * (PAGE_CACHE_SIZE - / crypt_stat->extent_size)) - + extent_offset), crypt_stat); - rc = ecryptfs_write_lower(ecryptfs_inode, enc_extent_virt, - offset, crypt_stat->extent_size); - if (rc < 0) { - ecryptfs_printk(KERN_ERR, "Error attempting " - "to write lower page; rc = [%d]" - "\n", rc); - goto out; - } - } - rc = 0; -out: - if (enc_extent_page) { - kunmap(enc_extent_page); - __free_page(enc_extent_page); } - return rc; -} -static int ecryptfs_decrypt_extent(struct page *page, - struct ecryptfs_crypt_stat *crypt_stat, - struct page *enc_extent_page, - unsigned long extent_offset) -{ - loff_t extent_base; - char extent_iv[ECRYPTFS_MAX_IV_BYTES]; - int rc; - - extent_base = (((loff_t)page->index) - * (PAGE_CACHE_SIZE / crypt_stat->extent_size)); - rc = ecryptfs_derive_iv(extent_iv, crypt_stat, - (extent_base + extent_offset)); - if (rc) { - ecryptfs_printk(KERN_ERR, "Error attempting to derive IV for " - "extent [0x%.16llx]; rc = [%d]\n", - (unsigned long long)(extent_base + extent_offset), rc); - goto out; - } - rc = ecryptfs_decrypt_page_offset(crypt_stat, page, - (extent_offset - * crypt_stat->extent_size), - enc_extent_page, 0, - crypt_stat->extent_size, extent_iv); + lower_offset = lower_offset_for_page(crypt_stat, page); + enc_extent_virt = kmap(enc_extent_page); + rc = ecryptfs_write_lower(ecryptfs_inode, enc_extent_virt, lower_offset, + PAGE_CACHE_SIZE); + kunmap(enc_extent_page); if (rc < 0) { - printk(KERN_ERR "%s: Error attempting to decrypt to page with " - "page->index = [%ld], extent_offset = [%ld]; " - "rc = [%d]\n", __func__, page->index, extent_offset, - rc); + ecryptfs_printk(KERN_ERR, + "Error attempting to write lower page; rc = [%d]\n", + rc); goto out; } rc = 0; out: + if (enc_extent_page) { + __free_page(enc_extent_page); + } return rc; } @@ -594,43 +556,33 @@ int ecryptfs_decrypt_page(struct page *page) { struct inode *ecryptfs_inode; struct ecryptfs_crypt_stat *crypt_stat; - char *enc_extent_virt; - struct page *enc_extent_page = NULL; + char *page_virt; unsigned long extent_offset; + loff_t lower_offset; int rc = 0; ecryptfs_inode = page->mapping->host; crypt_stat = &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat); BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)); - enc_extent_page = alloc_page(GFP_USER); - if (!enc_extent_page) { - rc = -ENOMEM; - ecryptfs_printk(KERN_ERR, "Error allocating memory for " - "encrypted extent\n"); + + lower_offset = lower_offset_for_page(crypt_stat, page); + page_virt = kmap(page); + rc = ecryptfs_read_lower(page_virt, lower_offset, PAGE_CACHE_SIZE, + ecryptfs_inode); + kunmap(page); + if (rc < 0) { + ecryptfs_printk(KERN_ERR, + "Error attempting to read lower page; rc = [%d]\n", + rc); goto out; } - enc_extent_virt = kmap(enc_extent_page); + for (extent_offset = 0; extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); extent_offset++) { - loff_t offset; - - ecryptfs_lower_offset_for_extent( - &offset, ((page->index * (PAGE_CACHE_SIZE - / crypt_stat->extent_size)) - + extent_offset), crypt_stat); - rc = ecryptfs_read_lower(enc_extent_virt, offset, - crypt_stat->extent_size, - ecryptfs_inode); - if (rc < 0) { - ecryptfs_printk(KERN_ERR, "Error attempting " - "to read lower page; rc = [%d]" - "\n", rc); - goto out; - } - rc = ecryptfs_decrypt_extent(page, crypt_stat, enc_extent_page, - extent_offset); + rc = crypt_extent(crypt_stat, page, page, + extent_offset, DECRYPT); if (rc) { printk(KERN_ERR "%s: Error encrypting extent; " "rc = [%d]\n", __func__, rc); @@ -638,140 +590,7 @@ int ecryptfs_decrypt_page(struct page *page) } } out: - if (enc_extent_page) { - kunmap(enc_extent_page); - __free_page(enc_extent_page); - } - return rc; -} - -/** - * decrypt_scatterlist - * @crypt_stat: Cryptographic context - * @dest_sg: The destination scatterlist to decrypt into - * @src_sg: The source scatterlist to decrypt from - * @size: The number of bytes to decrypt - * @iv: The initialization vector to use for the decryption - * - * Returns the number of bytes decrypted; negative value on error - */ -static int decrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, - struct scatterlist *dest_sg, - struct scatterlist *src_sg, int size, - unsigned char *iv) -{ - struct ablkcipher_request *req = NULL; - struct extent_crypt_result ecr; - int rc = 0; - - BUG_ON(!crypt_stat || !crypt_stat->tfm - || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED)); - if (unlikely(ecryptfs_verbosity > 0)) { - ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n", - crypt_stat->key_size); - ecryptfs_dump_hex(crypt_stat->key, - crypt_stat->key_size); - } - - init_completion(&ecr.completion); - - mutex_lock(&crypt_stat->cs_tfm_mutex); - req = ablkcipher_request_alloc(crypt_stat->tfm, GFP_NOFS); - if (!req) { - mutex_unlock(&crypt_stat->cs_tfm_mutex); - rc = -ENOMEM; - goto out; - } - - ablkcipher_request_set_callback(req, - CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, - extent_crypt_complete, &ecr); - /* Consider doing this once, when the file is opened */ - if (!(crypt_stat->flags & ECRYPTFS_KEY_SET)) { - rc = crypto_ablkcipher_setkey(crypt_stat->tfm, crypt_stat->key, - crypt_stat->key_size); - if (rc) { - ecryptfs_printk(KERN_ERR, - "Error setting key; rc = [%d]\n", - rc); - mutex_unlock(&crypt_stat->cs_tfm_mutex); - rc = -EINVAL; - goto out; - } - crypt_stat->flags |= ECRYPTFS_KEY_SET; - } - mutex_unlock(&crypt_stat->cs_tfm_mutex); - ecryptfs_printk(KERN_DEBUG, "Decrypting [%d] bytes.\n", size); - ablkcipher_request_set_crypt(req, src_sg, dest_sg, size, iv); - rc = crypto_ablkcipher_decrypt(req); - if (rc == -EINPROGRESS || rc == -EBUSY) { - struct extent_crypt_result *ecr = req->base.data; - - wait_for_completion(&ecr->completion); - rc = ecr->rc; - INIT_COMPLETION(ecr->completion); - } -out: - ablkcipher_request_free(req); return rc; - -} - -/** - * ecryptfs_encrypt_page_offset - * @crypt_stat: The cryptographic context - * @dst_page: The page to encrypt into - * @dst_offset: The offset in the page to encrypt into - * @src_page: The page to encrypt from - * @src_offset: The offset in the page to encrypt from - * @size: The number of bytes to encrypt - * @iv: The initialization vector to use for the encryption - * - * Returns the number of bytes encrypted - */ -static int -ecryptfs_encrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, - struct page *dst_page, int dst_offset, - struct page *src_page, int src_offset, int size, - unsigned char *iv) -{ - struct scatterlist src_sg, dst_sg; - - sg_init_table(&src_sg, 1); - sg_init_table(&dst_sg, 1); - - sg_set_page(&src_sg, src_page, size, src_offset); - sg_set_page(&dst_sg, dst_page, size, dst_offset); - return encrypt_scatterlist(crypt_stat, &dst_sg, &src_sg, size, iv); -} - -/** - * ecryptfs_decrypt_page_offset - * @crypt_stat: The cryptographic context - * @dst_page: The page to decrypt into - * @dst_offset: The offset in the page to decrypt into - * @src_page: The page to decrypt from - * @src_offset: The offset in the page to decrypt from - * @size: The number of bytes to decrypt - * @iv: The initialization vector to use for the decryption - * - * Returns the number of bytes decrypted - */ -static int -ecryptfs_decrypt_page_offset(struct ecryptfs_crypt_stat *crypt_stat, - struct page *dst_page, int dst_offset, - struct page *src_page, int src_offset, int size, - unsigned char *iv) -{ - struct scatterlist src_sg, dst_sg; - - sg_init_table(&src_sg, 1); - sg_set_page(&src_sg, src_page, size, src_offset); - - sg_init_table(&dst_sg, 1); - sg_set_page(&dst_sg, dst_page, size, dst_offset); - - return decrypt_scatterlist(crypt_stat, &dst_sg, &src_sg, size, iv); } #define ECRYPTFS_MAX_SCATTERLIST_LEN 4 diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 24f1105..992cf95 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -49,7 +49,7 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb, unsigned long nr_segs, loff_t pos) { ssize_t rc; - struct path lower; + struct path *path; struct file *file = iocb->ki_filp; rc = generic_file_aio_read(iocb, iov, nr_segs, pos); @@ -60,9 +60,8 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb, if (-EIOCBQUEUED == rc) rc = wait_on_sync_kiocb(iocb); if (rc >= 0) { - lower.dentry = ecryptfs_dentry_to_lower(file->f_path.dentry); - lower.mnt = ecryptfs_dentry_to_lower_mnt(file->f_path.dentry); - touch_atime(&lower); + path = ecryptfs_dentry_to_lower_path(file->f_path.dentry); + touch_atime(path); } return rc; } diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index e924cf4..eb1c597 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -120,16 +120,15 @@ static int ecryptfs_init_lower_file(struct dentry *dentry, struct file **lower_file) { const struct cred *cred = current_cred(); - struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); - struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); + struct path *path = ecryptfs_dentry_to_lower_path(dentry); int rc; - rc = ecryptfs_privileged_open(lower_file, lower_dentry, lower_mnt, + rc = ecryptfs_privileged_open(lower_file, path->dentry, path->mnt, cred); if (rc) { printk(KERN_ERR "Error opening lower file " "for lower_dentry [0x%p] and lower_mnt [0x%p]; " - "rc = [%d]\n", lower_dentry, lower_mnt, rc); + "rc = [%d]\n", path->dentry, path->mnt, rc); (*lower_file) = NULL; } return rc; diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index 49ff8ea0..e57380e 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -247,14 +247,13 @@ int ecryptfs_process_response(struct ecryptfs_daemon *daemon, goto unlock; } msg_size = (sizeof(*msg) + msg->data_len); - msg_ctx->msg = kmalloc(msg_size, GFP_KERNEL); + msg_ctx->msg = kmemdup(msg, msg_size, GFP_KERNEL); if (!msg_ctx->msg) { rc = -ENOMEM; printk(KERN_ERR "%s: Failed to allocate [%zd] bytes of " "GFP_KERNEL memory\n", __func__, msg_size); goto unlock; } - memcpy(msg_ctx->msg, msg, msg_size); msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_DONE; wake_up_process(msg_ctx->task); rc = 0; diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c index 7e787fb..07ab497 100644 --- a/fs/efivarfs/inode.c +++ b/fs/efivarfs/inode.c @@ -155,20 +155,8 @@ static int efivarfs_unlink(struct inode *dir, struct dentry *dentry) return 0; }; -/* - * Handle negative dentry. - */ -static struct dentry *efivarfs_lookup(struct inode *dir, struct dentry *dentry, - unsigned int flags) -{ - if (dentry->d_name.len > NAME_MAX) - return ERR_PTR(-ENAMETOOLONG); - d_add(dentry, NULL); - return NULL; -} - const struct inode_operations efivarfs_dir_inode_operations = { - .lookup = efivarfs_lookup, + .lookup = simple_lookup, .unlink = efivarfs_unlink, .create = efivarfs_create, }; @@ -608,7 +608,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) return -ENOMEM; lru_add_drain(); - tlb_gather_mmu(&tlb, mm, 0); + tlb_gather_mmu(&tlb, mm, old_start, old_end); if (new_end > old_start) { /* * when the old and new regions overlap clear from new_end. @@ -625,7 +625,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) free_pgd_range(&tlb, old_start, old_end, new_end, vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING); } - tlb_finish_mmu(&tlb, new_end, old_end); + tlb_finish_mmu(&tlb, old_start, old_end); /* * Shrink the vma to just the new range. Always succeeds. diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 998ea11..1194b1f 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1780,11 +1780,11 @@ retry: inode->i_op = &ext3_file_inode_operations; inode->i_fop = &ext3_file_operations; ext3_set_aops(inode); + d_tmpfile(dentry, inode); err = ext3_orphan_add(handle, inode); if (err) goto err_drop_inode; mark_inode_dirty(inode); - d_tmpfile(dentry, inode); unlock_new_inode(inode); } ext3_journal_stop(handle); diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 5833939..ddd715e 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -38,8 +38,8 @@ ext4_group_t ext4_get_group_number(struct super_block *sb, ext4_group_t group; if (test_opt2(sb, STD_GROUP_SIZE)) - group = (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) + - block) >> + group = (block - + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) >> (EXT4_BLOCK_SIZE_BITS(sb) + EXT4_CLUSTER_BITS(sb) + 3); else ext4_get_group_no_and_offset(sb, block, &group, NULL); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index b577e45..0ab26fb 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2086,6 +2086,7 @@ extern int ext4_sync_inode(handle_t *, struct inode *); extern void ext4_dirty_inode(struct inode *, int); extern int ext4_change_inode_journal_flag(struct inode *, int); extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); +extern int ext4_inode_attach_jinode(struct inode *inode); extern int ext4_can_truncate(struct inode *inode); extern void ext4_truncate(struct inode *); extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length); diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 72a3600..17ac112 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -255,10 +255,10 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, set_buffer_prio(bh); if (ext4_handle_valid(handle)) { err = jbd2_journal_dirty_metadata(handle, bh); - if (err) { - /* Errors can only happen if there is a bug */ - handle->h_err = err; - __ext4_journal_stop(where, line, handle); + /* Errors can only happen if there is a bug */ + if (WARN_ON_ONCE(err)) { + ext4_journal_abort_handle(where, line, __func__, bh, + handle, err); } } else { if (inode) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 7097b0f..72ba470 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2835,6 +2835,9 @@ again: err = -EIO; break; } + /* Yield here to deal with large extent trees. + * Should be a no-op if we did IO above. */ + cond_resched(); if (WARN_ON(i + 1 > depth)) { err = -EIO; break; @@ -4261,8 +4264,8 @@ got_allocated_blocks: /* not a good idea to call discard here directly, * but otherwise we'd need to call it every free() */ ext4_discard_preallocations(inode); - ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex), - ext4_ext_get_actual_len(&newex), fb_flags); + ext4_free_blocks(handle, inode, NULL, newblock, + EXT4_C2B(sbi, allocated_clusters), fb_flags); goto out2; } @@ -4382,8 +4385,9 @@ out2: } out3: - trace_ext4_ext_map_blocks_exit(inode, flags, map, err ? err : allocated); - + trace_ext4_ext_map_blocks_exit(inode, flags, map, + err ? err : allocated); + ext4_es_lru_add(inode); return err ? err : allocated; } @@ -4405,9 +4409,20 @@ void ext4_ext_truncate(handle_t *handle, struct inode *inode) last_block = (inode->i_size + sb->s_blocksize - 1) >> EXT4_BLOCK_SIZE_BITS(sb); +retry: err = ext4_es_remove_extent(inode, last_block, EXT_MAX_BLOCKS - last_block); + if (err == -ENOMEM) { + cond_resched(); + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto retry; + } + if (err) { + ext4_std_error(inode->i_sb, err); + return; + } err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); + ext4_std_error(inode->i_sb, err); } static void ext4_falloc_update_inode(struct inode *inode, diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index ee018d5..91cb110 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -148,6 +148,8 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t end); static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei, int nr_to_scan); +static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, + struct ext4_inode_info *locked_ei); int __init ext4_init_es(void) { @@ -439,7 +441,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, */ if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) { if (in_range(es->es_lblk, ee_block, ee_len)) { - pr_warn("ES insert assertation failed for " + pr_warn("ES insert assertion failed for " "inode: %lu we can find an extent " "at block [%d/%d/%llu/%c], but we " "want to add an delayed/hole extent " @@ -458,7 +460,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, */ if (es->es_lblk < ee_block || ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) { - pr_warn("ES insert assertation failed for inode: %lu " + pr_warn("ES insert assertion failed for inode: %lu " "ex_status [%d/%d/%llu/%c] != " "es_status [%d/%d/%llu/%c]\n", inode->i_ino, ee_block, ee_len, ee_start, @@ -468,7 +470,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, } if (ee_status ^ es_status) { - pr_warn("ES insert assertation failed for inode: %lu " + pr_warn("ES insert assertion failed for inode: %lu " "ex_status [%d/%d/%llu/%c] != " "es_status [%d/%d/%llu/%c]\n", inode->i_ino, ee_block, ee_len, ee_start, @@ -481,7 +483,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, * that we don't want to add an written/unwritten extent. */ if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) { - pr_warn("ES insert assertation failed for inode: %lu " + pr_warn("ES insert assertion failed for inode: %lu " "can't find an extent at block %d but we want " "to add an written/unwritten extent " "[%d/%d/%llu/%llx]\n", inode->i_ino, @@ -519,7 +521,7 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode, * We want to add a delayed/hole extent but this * block has been allocated. */ - pr_warn("ES insert assertation failed for inode: %lu " + pr_warn("ES insert assertion failed for inode: %lu " "We can find blocks but we want to add a " "delayed/hole extent [%d/%d/%llu/%llx]\n", inode->i_ino, es->es_lblk, es->es_len, @@ -527,13 +529,13 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode, return; } else if (ext4_es_is_written(es)) { if (retval != es->es_len) { - pr_warn("ES insert assertation failed for " + pr_warn("ES insert assertion failed for " "inode: %lu retval %d != es_len %d\n", inode->i_ino, retval, es->es_len); return; } if (map.m_pblk != ext4_es_pblock(es)) { - pr_warn("ES insert assertation failed for " + pr_warn("ES insert assertion failed for " "inode: %lu m_pblk %llu != " "es_pblk %llu\n", inode->i_ino, map.m_pblk, @@ -549,7 +551,7 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode, } } else if (retval == 0) { if (ext4_es_is_written(es)) { - pr_warn("ES insert assertation failed for inode: %lu " + pr_warn("ES insert assertion failed for inode: %lu " "We can't find the block but we want to add " "an written extent [%d/%d/%llu/%llx]\n", inode->i_ino, es->es_lblk, es->es_len, @@ -632,10 +634,8 @@ out: } /* - * ext4_es_insert_extent() adds a space to a extent status tree. - * - * ext4_es_insert_extent is called by ext4_da_write_begin and - * ext4_es_remove_extent. + * ext4_es_insert_extent() adds information to an inode's extent + * status tree. * * Return 0 on success, error code on failure. */ @@ -667,7 +667,13 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, err = __es_remove_extent(inode, lblk, end); if (err != 0) goto error; +retry: err = __es_insert_extent(inode, &newes); + if (err == -ENOMEM && __ext4_es_shrink(EXT4_SB(inode->i_sb), 1, + EXT4_I(inode))) + goto retry; + if (err == -ENOMEM && !ext4_es_is_delayed(&newes)) + err = 0; error: write_unlock(&EXT4_I(inode)->i_es_lock); @@ -746,8 +752,10 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, struct extent_status orig_es; ext4_lblk_t len1, len2; ext4_fsblk_t block; - int err = 0; + int err; +retry: + err = 0; es = __es_tree_search(&tree->root, lblk); if (!es) goto out; @@ -782,6 +790,10 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, if (err) { es->es_lblk = orig_es.es_lblk; es->es_len = orig_es.es_len; + if ((err == -ENOMEM) && + __ext4_es_shrink(EXT4_SB(inode->i_sb), 1, + EXT4_I(inode))) + goto retry; goto out; } } else { @@ -891,22 +903,14 @@ static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a, return -1; } -static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) +static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, + struct ext4_inode_info *locked_ei) { - struct ext4_sb_info *sbi = container_of(shrink, - struct ext4_sb_info, s_es_shrinker); struct ext4_inode_info *ei; struct list_head *cur, *tmp; LIST_HEAD(skiped); - int nr_to_scan = sc->nr_to_scan; int ret, nr_shrunk = 0; - ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); - trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret); - - if (!nr_to_scan) - return ret; - spin_lock(&sbi->s_es_lru_lock); /* @@ -935,7 +939,7 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) continue; } - if (ei->i_es_lru_nr == 0) + if (ei->i_es_lru_nr == 0 || ei == locked_ei) continue; write_lock(&ei->i_es_lock); @@ -954,6 +958,27 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) list_splice_tail(&skiped, &sbi->s_es_lru); spin_unlock(&sbi->s_es_lru_lock); + if (locked_ei && nr_shrunk == 0) + nr_shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan); + + return nr_shrunk; +} + +static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) +{ + struct ext4_sb_info *sbi = container_of(shrink, + struct ext4_sb_info, s_es_shrinker); + int nr_to_scan = sc->nr_to_scan; + int ret, nr_shrunk; + + ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); + trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret); + + if (!nr_to_scan) + return ret; + + nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); + ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret); return ret; diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 6f4cc56..319c9d2 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -219,7 +219,6 @@ static int ext4_file_open(struct inode * inode, struct file * filp) { struct super_block *sb = inode->i_sb; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - struct ext4_inode_info *ei = EXT4_I(inode); struct vfsmount *mnt = filp->f_path.mnt; struct path path; char buf[64], *cp; @@ -259,22 +258,10 @@ static int ext4_file_open(struct inode * inode, struct file * filp) * Set up the jbd2_inode if we are opening the inode for * writing and the journal is present */ - if (sbi->s_journal && !ei->jinode && (filp->f_mode & FMODE_WRITE)) { - struct jbd2_inode *jinode = jbd2_alloc_inode(GFP_KERNEL); - - spin_lock(&inode->i_lock); - if (!ei->jinode) { - if (!jinode) { - spin_unlock(&inode->i_lock); - return -ENOMEM; - } - ei->jinode = jinode; - jbd2_journal_init_jbd_inode(ei->jinode, inode); - jinode = NULL; - } - spin_unlock(&inode->i_lock); - if (unlikely(jinode != NULL)) - jbd2_free_inode(jinode); + if (filp->f_mode & FMODE_WRITE) { + int ret = ext4_inode_attach_jinode(inode); + if (ret < 0) + return ret; } return dquot_file_open(inode, filp); } diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index f03598c..8bf5999 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -734,11 +734,8 @@ repeat_in_this_group: ino = ext4_find_next_zero_bit((unsigned long *) inode_bitmap_bh->b_data, EXT4_INODES_PER_GROUP(sb), ino); - if (ino >= EXT4_INODES_PER_GROUP(sb)) { - if (++group == ngroups) - group = 0; - continue; - } + if (ino >= EXT4_INODES_PER_GROUP(sb)) + goto next_group; if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) { ext4_error(sb, "reserved inode found cleared - " "inode=%lu", ino + 1); @@ -769,6 +766,9 @@ repeat_in_this_group: goto got; /* we grabbed the inode! */ if (ino < EXT4_INODES_PER_GROUP(sb)) goto repeat_in_this_group; +next_group: + if (++group == ngroups) + group = 0; } err = -ENOSPC; goto out; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0188e65..c2ca04e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -465,7 +465,7 @@ static void ext4_map_blocks_es_recheck(handle_t *handle, if (es_map->m_lblk != map->m_lblk || es_map->m_flags != map->m_flags || es_map->m_pblk != map->m_pblk) { - printk("ES cache assertation failed for inode: %lu " + printk("ES cache assertion failed for inode: %lu " "es_cached ex [%d/%d/%llu/%x] != " "found ex [%d/%d/%llu/%x] retval %d flags %x\n", inode->i_ino, es_map->m_lblk, es_map->m_len, @@ -514,10 +514,9 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, "logical block %lu\n", inode->i_ino, flags, map->m_len, (unsigned long) map->m_lblk); - ext4_es_lru_add(inode); - /* Lookup extent status tree firstly */ if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { + ext4_es_lru_add(inode); if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { map->m_pblk = ext4_es_pblock(&es) + map->m_lblk - es.es_lblk; @@ -556,14 +555,13 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, int ret; unsigned long long status; -#ifdef ES_AGGRESSIVE_TEST - if (retval != map->m_len) { - printk("ES len assertation failed for inode: %lu " - "retval %d != map->m_len %d " - "in %s (lookup)\n", inode->i_ino, retval, - map->m_len, __func__); + if (unlikely(retval != map->m_len)) { + ext4_warning(inode->i_sb, + "ES len assertion failed for inode " + "%lu: retval %d != map->m_len %d", + inode->i_ino, retval, map->m_len); + WARN_ON(1); } -#endif status = map->m_flags & EXT4_MAP_UNWRITTEN ? EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; @@ -657,14 +655,13 @@ found: int ret; unsigned long long status; -#ifdef ES_AGGRESSIVE_TEST - if (retval != map->m_len) { - printk("ES len assertation failed for inode: %lu " - "retval %d != map->m_len %d " - "in %s (allocation)\n", inode->i_ino, retval, - map->m_len, __func__); + if (unlikely(retval != map->m_len)) { + ext4_warning(inode->i_sb, + "ES len assertion failed for inode " + "%lu: retval %d != map->m_len %d", + inode->i_ino, retval, map->m_len); + WARN_ON(1); } -#endif /* * If the extent has been zeroed out, we don't need to update @@ -1529,11 +1526,9 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, "logical block %lu\n", inode->i_ino, map->m_len, (unsigned long) map->m_lblk); - ext4_es_lru_add(inode); - /* Lookup extent status tree firstly */ if (ext4_es_lookup_extent(inode, iblock, &es)) { - + ext4_es_lru_add(inode); if (ext4_es_is_hole(&es)) { retval = 0; down_read((&EXT4_I(inode)->i_data_sem)); @@ -1640,14 +1635,13 @@ add_delayed: int ret; unsigned long long status; -#ifdef ES_AGGRESSIVE_TEST - if (retval != map->m_len) { - printk("ES len assertation failed for inode: %lu " - "retval %d != map->m_len %d " - "in %s (lookup)\n", inode->i_ino, retval, - map->m_len, __func__); + if (unlikely(retval != map->m_len)) { + ext4_warning(inode->i_sb, + "ES len assertion failed for inode " + "%lu: retval %d != map->m_len %d", + inode->i_ino, retval, map->m_len); + WARN_ON(1); } -#endif status = map->m_flags & EXT4_MAP_UNWRITTEN ? EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; @@ -2163,7 +2157,7 @@ static int mpage_map_and_submit_extent(handle_t *handle, mpd->io_submit.io_end->offset = ((loff_t)map->m_lblk) << inode->i_blkbits; - while (map->m_len) { + do { err = mpage_map_one_extent(handle, mpd); if (err < 0) { struct super_block *sb = inode->i_sb; @@ -2201,7 +2195,7 @@ static int mpage_map_and_submit_extent(handle_t *handle, err = mpage_map_and_submit_buffers(mpd); if (err < 0) return err; - } + } while (map->m_len); /* Update on-disk size after IO is submitted */ disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT; @@ -3539,6 +3533,18 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) offset; } + if (offset & (sb->s_blocksize - 1) || + (offset + length) & (sb->s_blocksize - 1)) { + /* + * Attach jinode to inode for jbd2 if we do any zeroing of + * partial block + */ + ret = ext4_inode_attach_jinode(inode); + if (ret < 0) + goto out_mutex; + + } + first_block_offset = round_up(offset, sb->s_blocksize); last_block_offset = round_down((offset + length), sb->s_blocksize) - 1; @@ -3607,6 +3613,31 @@ out_mutex: return ret; } +int ext4_inode_attach_jinode(struct inode *inode) +{ + struct ext4_inode_info *ei = EXT4_I(inode); + struct jbd2_inode *jinode; + + if (ei->jinode || !EXT4_SB(inode->i_sb)->s_journal) + return 0; + + jinode = jbd2_alloc_inode(GFP_KERNEL); + spin_lock(&inode->i_lock); + if (!ei->jinode) { + if (!jinode) { + spin_unlock(&inode->i_lock); + return -ENOMEM; + } + ei->jinode = jinode; + jbd2_journal_init_jbd_inode(ei->jinode, inode); + jinode = NULL; + } + spin_unlock(&inode->i_lock); + if (unlikely(jinode != NULL)) + jbd2_free_inode(jinode); + return 0; +} + /* * ext4_truncate() * @@ -3667,6 +3698,12 @@ void ext4_truncate(struct inode *inode) return; } + /* If we zero-out tail of the page, we have to create jinode for jbd2 */ + if (inode->i_size & (inode->i_sb->s_blocksize - 1)) { + if (ext4_inode_attach_jinode(inode) < 0) + return; + } + if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) credits = ext4_writepage_trans_blocks(inode); else diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 9491ac0..c0427e2 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -77,8 +77,10 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2) memswap(ei1->i_data, ei2->i_data, sizeof(ei1->i_data)); memswap(&ei1->i_flags, &ei2->i_flags, sizeof(ei1->i_flags)); memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize)); - memswap(&ei1->i_es_tree, &ei2->i_es_tree, sizeof(ei1->i_es_tree)); - memswap(&ei1->i_es_lru_nr, &ei2->i_es_lru_nr, sizeof(ei1->i_es_lru_nr)); + ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS); + ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS); + ext4_es_lru_del(inode1); + ext4_es_lru_del(inode2); isize = i_size_read(inode1); i_size_write(inode1, i_size_read(inode2)); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index a9ff5e5..4bbbf13b 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4740,11 +4740,16 @@ do_more: * blocks being freed are metadata. these blocks shouldn't * be used until this transaction is committed */ + retry: new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS); if (!new_entry) { - ext4_mb_unload_buddy(&e4b); - err = -ENOMEM; - goto error_return; + /* + * We use a retry loop because + * ext4_free_blocks() is not allowed to fail. + */ + cond_resched(); + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto retry; } new_entry->efd_start_cluster = bit; new_entry->efd_group = block_group; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 234b834..35f55a0 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2316,11 +2316,11 @@ retry: inode->i_op = &ext4_file_inode_operations; inode->i_fop = &ext4_file_operations; ext4_set_aops(inode); + d_tmpfile(dentry, inode); err = ext4_orphan_add(handle, inode); if (err) goto err_drop_inode; mark_inode_dirty(inode); - d_tmpfile(dentry, inode); unlock_new_inode(inode); } if (handle) diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 48786cd..6625d21 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -25,6 +25,7 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/mm.h> +#include <linux/ratelimit.h> #include "ext4_jbd2.h" #include "xattr.h" @@ -55,7 +56,7 @@ void ext4_exit_pageio(void) static void buffer_io_error(struct buffer_head *bh) { char b[BDEVNAME_SIZE]; - printk(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n", + printk_ratelimited(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n", bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr); } @@ -308,6 +309,7 @@ ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end) return io_end; } +/* BIO completion function for page writeback */ static void ext4_end_bio(struct bio *bio, int error) { ext4_io_end_t *io_end = bio->bi_private; @@ -318,18 +320,6 @@ static void ext4_end_bio(struct bio *bio, int error) if (test_bit(BIO_UPTODATE, &bio->bi_flags)) error = 0; - if (io_end->flag & EXT4_IO_END_UNWRITTEN) { - /* - * Link bio into list hanging from io_end. We have to do it - * atomically as bio completions can be racing against each - * other. - */ - bio->bi_private = xchg(&io_end->bio, bio); - } else { - ext4_finish_bio(bio); - bio_put(bio); - } - if (error) { struct inode *inode = io_end->inode; @@ -341,7 +331,24 @@ static void ext4_end_bio(struct bio *bio, int error) (unsigned long long) bi_sector >> (inode->i_blkbits - 9)); } - ext4_put_io_end_defer(io_end); + + if (io_end->flag & EXT4_IO_END_UNWRITTEN) { + /* + * Link bio into list hanging from io_end. We have to do it + * atomically as bio completions can be racing against each + * other. + */ + bio->bi_private = xchg(&io_end->bio, bio); + ext4_put_io_end_defer(io_end); + } else { + /* + * Drop io_end reference early. Inode can get freed once + * we finish the bio. + */ + ext4_put_io_end_defer(io_end); + ext4_finish_bio(bio); + bio_put(bio); + } } void ext4_io_submit(struct ext4_io_submit *io) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 85b3dd6..b59373b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1359,7 +1359,7 @@ static const struct mount_opts { {Opt_delalloc, EXT4_MOUNT_DELALLOC, MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT}, {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, - MOPT_EXT4_ONLY | MOPT_CLEAR | MOPT_EXPLICIT}, + MOPT_EXT4_ONLY | MOPT_CLEAR}, {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, MOPT_EXT4_ONLY | MOPT_SET}, {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT | @@ -1702,12 +1702,6 @@ static inline void ext4_show_quota_options(struct seq_file *seq, if (sbi->s_qf_names[GRPQUOTA]) seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); - - if (test_opt(sb, USRQUOTA)) - seq_puts(seq, ",usrquota"); - - if (test_opt(sb, GRPQUOTA)) - seq_puts(seq, ",grpquota"); #endif } @@ -3489,7 +3483,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } if (test_opt(sb, DIOREAD_NOLOCK)) { ext4_msg(sb, KERN_ERR, "can't mount with " - "both data=journal and delalloc"); + "both data=journal and dioread_nolock"); goto failed_mount; } if (test_opt(sb, DELALLOC)) @@ -3624,10 +3618,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); - /* Do we have standard group size of blocksize * 8 blocks ? */ - if (sbi->s_blocks_per_group == blocksize << 3) - set_opt2(sb, STD_GROUP_SIZE); - for (i = 0; i < 4; i++) sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); sbi->s_def_hash_version = es->s_def_hash_version; @@ -3697,6 +3687,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } + /* Do we have standard group size of clustersize * 8 blocks ? */ + if (sbi->s_blocks_per_group == clustersize << 3) + set_opt2(sb, STD_GROUP_SIZE); + /* * Test whether we have more sectors than will fit in sector_t, * and whether the max offset is addressable by the page cache. @@ -4733,6 +4727,21 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } + if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { + if (test_opt2(sb, EXPLICIT_DELALLOC)) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "both data=journal and delalloc"); + err = -EINVAL; + goto restore_opts; + } + if (test_opt(sb, DIOREAD_NOLOCK)) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "both data=journal and dioread_nolock"); + err = -EINVAL; + goto restore_opts; + } + } + if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) ext4_abort(sb, "Abort forced by user"); @@ -5487,6 +5496,7 @@ static void __exit ext4_exit_fs(void) kset_unregister(ext4_kset); ext4_exit_system_zone(); ext4_exit_pageio(); + ext4_exit_es(); } MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); @@ -730,14 +730,14 @@ static int __init fcntl_init(void) * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY * is defined as O_NONBLOCK on some platforms and not on others. */ - BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( + BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( O_RDONLY | O_WRONLY | O_RDWR | O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC | O_APPEND | /* O_NONBLOCK | */ __O_SYNC | O_DSYNC | FASYNC | O_DIRECT | O_LARGEFILE | O_DIRECTORY | O_NOFOLLOW | O_NOATIME | O_CLOEXEC | - __FMODE_EXEC | O_PATH + __FMODE_EXEC | O_PATH | __O_TMPFILE )); fasync_cache = kmem_cache_create("fasync_cache", diff --git a/fs/file_table.c b/fs/file_table.c index 08e719b..b44e4c5 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -265,18 +265,15 @@ static void __fput(struct file *file) mntput(mnt); } -static DEFINE_SPINLOCK(delayed_fput_lock); -static LIST_HEAD(delayed_fput_list); +static LLIST_HEAD(delayed_fput_list); static void delayed_fput(struct work_struct *unused) { - LIST_HEAD(head); - spin_lock_irq(&delayed_fput_lock); - list_splice_init(&delayed_fput_list, &head); - spin_unlock_irq(&delayed_fput_lock); - while (!list_empty(&head)) { - struct file *f = list_first_entry(&head, struct file, f_u.fu_list); - list_del_init(&f->f_u.fu_list); - __fput(f); + struct llist_node *node = llist_del_all(&delayed_fput_list); + struct llist_node *next; + + for (; node; node = next) { + next = llist_next(node); + __fput(llist_entry(node, struct file, f_u.fu_llist)); } } @@ -306,18 +303,22 @@ void fput(struct file *file) { if (atomic_long_dec_and_test(&file->f_count)) { struct task_struct *task = current; - unsigned long flags; file_sb_list_del(file); if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) { init_task_work(&file->f_u.fu_rcuhead, ____fput); if (!task_work_add(task, &file->f_u.fu_rcuhead, true)) return; + /* + * After this task has run exit_task_work(), + * task_work_add() will fail. free_ipc_ns()-> + * shm_destroy() can do this. Fall through to delayed + * fput to avoid leaking *file. + */ } - spin_lock_irqsave(&delayed_fput_lock, flags); - list_add(&file->f_u.fu_list, &delayed_fput_list); - schedule_work(&delayed_fput_work); - spin_unlock_irqrestore(&delayed_fput_lock, flags); + + if (llist_add(&file->f_u.fu_llist, &delayed_fput_list)) + schedule_work(&delayed_fput_work); } } diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 0eda527..72a5d5b 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1223,30 +1223,46 @@ static int fuse_direntplus_link(struct file *file, if (name.name[1] == '.' && name.len == 2) return 0; } + + if (invalid_nodeid(o->nodeid)) + return -EIO; + if (!fuse_valid_type(o->attr.mode)) + return -EIO; + fc = get_fuse_conn(dir); name.hash = full_name_hash(name.name, name.len); dentry = d_lookup(parent, &name); - if (dentry && dentry->d_inode) { + if (dentry) { inode = dentry->d_inode; - if (get_node_id(inode) == o->nodeid) { + if (!inode) { + d_drop(dentry); + } else if (get_node_id(inode) != o->nodeid || + ((o->attr.mode ^ inode->i_mode) & S_IFMT)) { + err = d_invalidate(dentry); + if (err) + goto out; + } else if (is_bad_inode(inode)) { + err = -EIO; + goto out; + } else { struct fuse_inode *fi; fi = get_fuse_inode(inode); spin_lock(&fc->lock); fi->nlookup++; spin_unlock(&fc->lock); + fuse_change_attributes(inode, &o->attr, + entry_attr_timeout(o), + attr_version); + /* * The other branch to 'found' comes via fuse_iget() * which bumps nlookup inside */ goto found; } - err = d_invalidate(dentry); - if (err) - goto out; dput(dentry); - dentry = NULL; } dentry = d_alloc(parent, &name); @@ -1259,25 +1275,30 @@ static int fuse_direntplus_link(struct file *file, if (!inode) goto out; - alias = d_materialise_unique(dentry, inode); - err = PTR_ERR(alias); - if (IS_ERR(alias)) - goto out; + if (S_ISDIR(inode->i_mode)) { + mutex_lock(&fc->inst_mutex); + alias = fuse_d_add_directory(dentry, inode); + mutex_unlock(&fc->inst_mutex); + err = PTR_ERR(alias); + if (IS_ERR(alias)) { + iput(inode); + goto out; + } + } else { + alias = d_splice_alias(inode, dentry); + } + if (alias) { dput(dentry); dentry = alias; } found: - fuse_change_attributes(inode, &o->attr, entry_attr_timeout(o), - attr_version); - fuse_change_entry_timeout(dentry, o); err = 0; out: - if (dentry) - dput(dentry); + dput(dentry); return err; } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index a3f868a..3442397 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -463,6 +463,14 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb, return inode; } +/* + * Hugetlbfs is not reclaimable; therefore its i_mmap_mutex will never + * be taken from reclaim -- unlike regular filesystems. This needs an + * annotation because huge_pmd_share() does an allocation under + * i_mmap_mutex. + */ +struct lock_class_key hugetlbfs_i_mmap_mutex_key; + static struct inode *hugetlbfs_get_inode(struct super_block *sb, struct inode *dir, umode_t mode, dev_t dev) @@ -474,6 +482,8 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, struct hugetlbfs_inode_info *info; inode->i_ino = get_next_ino(); inode_init_owner(inode, dir, mode); + lockdep_set_class(&inode->i_mapping->i_mmap_mutex, + &hugetlbfs_i_mmap_mutex_key); inode->i_mapping->a_ops = &hugetlbfs_aops; inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 9a55f53..370d7b6 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -346,8 +346,7 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks) printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n", (unsigned long long) blkno, (unsigned long long) nblocks); - jfs_error(ip->i_sb, - "dbFree: block to be freed is outside the map"); + jfs_error(ip->i_sb, "block to be freed is outside the map\n"); return -EIO; } @@ -384,7 +383,7 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks) /* free the blocks. */ if ((rc = dbFreeDmap(bmp, dp, blkno, nb))) { - jfs_error(ip->i_sb, "dbFree: error in block map\n"); + jfs_error(ip->i_sb, "error in block map\n"); release_metapage(mp); IREAD_UNLOCK(ipbmap); return (rc); @@ -441,8 +440,7 @@ dbUpdatePMap(struct inode *ipbmap, printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n", (unsigned long long) blkno, (unsigned long long) nblocks); - jfs_error(ipbmap->i_sb, - "dbUpdatePMap: blocks are outside the map"); + jfs_error(ipbmap->i_sb, "blocks are outside the map\n"); return -EIO; } @@ -726,7 +724,7 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) /* the hint should be within the map */ if (hint >= mapSize) { - jfs_error(ip->i_sb, "dbAlloc: the hint is outside the map"); + jfs_error(ip->i_sb, "the hint is outside the map\n"); return -EIO; } @@ -1057,8 +1055,7 @@ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks) bmp = sbi->bmap; if (lastblkno < 0 || lastblkno >= bmp->db_mapsize) { IREAD_UNLOCK(ipbmap); - jfs_error(ip->i_sb, - "dbExtend: the block is outside the filesystem"); + jfs_error(ip->i_sb, "the block is outside the filesystem\n"); return -EIO; } @@ -1134,8 +1131,7 @@ static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno, u32 mask; if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) { - jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocNext: Corrupt dmap page"); + jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmap page\n"); return -EIO; } @@ -1265,8 +1261,7 @@ dbAllocNear(struct bmap * bmp, s8 *leaf; if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) { - jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocNear: Corrupt dmap page"); + jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmap page\n"); return -EIO; } @@ -1381,8 +1376,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) */ if (l2nb > bmp->db_agl2size) { jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocAG: allocation request is larger than the " - "allocation group size"); + "allocation request is larger than the allocation group size\n"); return -EIO; } @@ -1417,7 +1411,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) (unsigned long long) blkno, (unsigned long long) nblocks); jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocAG: dbAllocCtl failed in free AG"); + "dbAllocCtl failed in free AG\n"); } return (rc); } @@ -1433,8 +1427,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) budmin = dcp->budmin; if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { - jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocAG: Corrupt dmapctl page"); + jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmapctl page\n"); release_metapage(mp); return -EIO; } @@ -1475,7 +1468,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) } if (n == 4) { jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocAG: failed descending stree"); + "failed descending stree\n"); release_metapage(mp); return -EIO; } @@ -1515,8 +1508,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) &blkno))) { if (rc == -ENOSPC) { jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocAG: control page " - "inconsistent"); + "control page inconsistent\n"); return -EIO; } return (rc); @@ -1528,7 +1520,7 @@ dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results) rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); if (rc == -ENOSPC) { jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocAG: unable to allocate blocks"); + "unable to allocate blocks\n"); rc = -EIO; } return (rc); @@ -1587,8 +1579,7 @@ static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results) */ rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results); if (rc == -ENOSPC) { - jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocAny: unable to allocate blocks"); + jfs_error(bmp->db_ipbmap->i_sb, "unable to allocate blocks\n"); return -EIO; } return (rc); @@ -1652,8 +1643,7 @@ s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen) range_cnt = min_t(u64, max_ranges + 1, 32 * 1024); totrim = kmalloc(sizeof(struct range2trim) * range_cnt, GFP_NOFS); if (totrim == NULL) { - jfs_error(bmp->db_ipbmap->i_sb, - "dbDiscardAG: no memory for trim array"); + jfs_error(bmp->db_ipbmap->i_sb, "no memory for trim array\n"); IWRITE_UNLOCK(ipbmap); return 0; } @@ -1682,8 +1672,7 @@ s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen) nblocks = 1 << l2nb; } else { /* Trim any already allocated blocks */ - jfs_error(bmp->db_ipbmap->i_sb, - "dbDiscardAG: -EIO"); + jfs_error(bmp->db_ipbmap->i_sb, "-EIO\n"); break; } @@ -1761,7 +1750,7 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno) if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { jfs_error(bmp->db_ipbmap->i_sb, - "dbFindCtl: Corrupt dmapctl page"); + "Corrupt dmapctl page\n"); release_metapage(mp); return -EIO; } @@ -1782,7 +1771,7 @@ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno) if (rc) { if (lev != level) { jfs_error(bmp->db_ipbmap->i_sb, - "dbFindCtl: dmap inconsistent"); + "dmap inconsistent\n"); return -EIO; } return -ENOSPC; @@ -1906,7 +1895,7 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results) if (dp->tree.stree[ROOT] != L2BPERDMAP) { release_metapage(mp); jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocCtl: the dmap is not all free"); + "the dmap is not all free\n"); rc = -EIO; goto backout; } @@ -1953,7 +1942,7 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results) * to indicate that we have leaked blocks. */ jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocCtl: I/O Error: Block Leakage."); + "I/O Error: Block Leakage\n"); continue; } dp = (struct dmap *) mp->data; @@ -1965,8 +1954,7 @@ dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results) * to indicate that we have leaked blocks. */ release_metapage(mp); - jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocCtl: Block Leakage."); + jfs_error(bmp->db_ipbmap->i_sb, "Block Leakage\n"); continue; } @@ -2263,8 +2251,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno, for (; nwords > 0; nwords -= nw) { if (leaf[word] < BUDMIN) { jfs_error(bmp->db_ipbmap->i_sb, - "dbAllocBits: leaf page " - "corrupt"); + "leaf page corrupt\n"); break; } @@ -2536,8 +2523,7 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level) dcp = (struct dmapctl *) mp->data; if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) { - jfs_error(bmp->db_ipbmap->i_sb, - "dbAdjCtl: Corrupt dmapctl page"); + jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmapctl page\n"); release_metapage(mp); return -EIO; } @@ -2638,8 +2624,7 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level) assert(level == bmp->db_maxlevel); if (bmp->db_maxfreebud != oldroot) { jfs_error(bmp->db_ipbmap->i_sb, - "dbAdjCtl: the maximum free buddy is " - "not the old root"); + "the maximum free buddy is not the old root\n"); } bmp->db_maxfreebud = dcp->stree[ROOT]; } @@ -3481,7 +3466,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) p = BMAPBLKNO + nbperpage; /* L2 page */ l2mp = read_metapage(ipbmap, p, PSIZE, 0); if (!l2mp) { - jfs_error(ipbmap->i_sb, "dbExtendFS: L2 page could not be read"); + jfs_error(ipbmap->i_sb, "L2 page could not be read\n"); return -EIO; } l2dcp = (struct dmapctl *) l2mp->data; @@ -3646,8 +3631,7 @@ int dbExtendFS(struct inode *ipbmap, s64 blkno, s64 nblocks) } } /* for each L1 in a L2 */ - jfs_error(ipbmap->i_sb, - "dbExtendFS: function has not returned as expected"); + jfs_error(ipbmap->i_sb, "function has not returned as expected\n"); errout: if (l0mp) release_metapage(l0mp); @@ -3717,7 +3701,7 @@ void dbFinalizeBmap(struct inode *ipbmap) } if (bmp->db_agpref >= bmp->db_numag) { jfs_error(ipbmap->i_sb, - "cannot find ag with average freespace"); + "cannot find ag with average freespace\n"); } } diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c index 9f4ed13..8743ba9 100644 --- a/fs/jfs/jfs_dtree.c +++ b/fs/jfs/jfs_dtree.c @@ -124,21 +124,21 @@ struct dtsplit { #define DT_PAGE(IP, MP) BT_PAGE(IP, MP, dtpage_t, i_dtroot) /* get page buffer for specified block address */ -#define DT_GETPAGE(IP, BN, MP, SIZE, P, RC)\ -{\ - BT_GETPAGE(IP, BN, MP, dtpage_t, SIZE, P, RC, i_dtroot)\ - if (!(RC))\ - {\ - if (((P)->header.nextindex > (((BN)==0)?DTROOTMAXSLOT:(P)->header.maxslot)) ||\ - ((BN) && ((P)->header.maxslot > DTPAGEMAXSLOT)))\ - {\ - BT_PUTPAGE(MP);\ - jfs_error((IP)->i_sb, "DT_GETPAGE: dtree page corrupt");\ - MP = NULL;\ - RC = -EIO;\ - }\ - }\ -} +#define DT_GETPAGE(IP, BN, MP, SIZE, P, RC) \ +do { \ + BT_GETPAGE(IP, BN, MP, dtpage_t, SIZE, P, RC, i_dtroot); \ + if (!(RC)) { \ + if (((P)->header.nextindex > \ + (((BN) == 0) ? DTROOTMAXSLOT : (P)->header.maxslot)) || \ + ((BN) && ((P)->header.maxslot > DTPAGEMAXSLOT))) { \ + BT_PUTPAGE(MP); \ + jfs_error((IP)->i_sb, \ + "DT_GETPAGE: dtree page corrupt\n"); \ + MP = NULL; \ + RC = -EIO; \ + } \ + } \ +} while (0) /* for consistency */ #define DT_PUTPAGE(MP) BT_PUTPAGE(MP) @@ -776,7 +776,7 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data, /* Something's corrupted, mark filesystem dirty so * chkdsk will fix it. */ - jfs_error(sb, "stack overrun in dtSearch!"); + jfs_error(sb, "stack overrun!\n"); BT_STACK_DUMP(btstack); rc = -EIO; goto out; @@ -3247,8 +3247,7 @@ int jfs_readdir(struct file *file, struct dir_context *ctx) /* Sanity Check */ if (d_namleft == 0) { jfs_error(ip->i_sb, - "JFS:Dtree error: ino = " - "%ld, bn=%Ld, index = %d", + "JFS:Dtree error: ino = %ld, bn=%lld, index = %d\n", (long)ip->i_ino, (long long)bn, i); @@ -3368,7 +3367,7 @@ static int dtReadFirst(struct inode *ip, struct btstack * btstack) */ if (BT_STACK_FULL(btstack)) { DT_PUTPAGE(mp); - jfs_error(ip->i_sb, "dtReadFirst: btstack overrun"); + jfs_error(ip->i_sb, "btstack overrun\n"); BT_STACK_DUMP(btstack); return -EIO; } diff --git a/fs/jfs/jfs_extent.c b/fs/jfs/jfs_extent.c index e5fe850..2ae7d59 100644 --- a/fs/jfs/jfs_extent.c +++ b/fs/jfs/jfs_extent.c @@ -388,7 +388,7 @@ int extHint(struct inode *ip, s64 offset, xad_t * xp) if ((rc == 0) && xlen) { if (xlen != nbperpage) { - jfs_error(ip->i_sb, "extHint: corrupt xtree"); + jfs_error(ip->i_sb, "corrupt xtree\n"); rc = -EIO; } XADaddress(xp, xaddr); diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index f7e042b..f321986 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c @@ -386,7 +386,7 @@ int diRead(struct inode *ip) dp += rel_inode; if (ip->i_ino != le32_to_cpu(dp->di_number)) { - jfs_error(ip->i_sb, "diRead: i_ino != di_number"); + jfs_error(ip->i_sb, "i_ino != di_number\n"); rc = -EIO; } else if (le32_to_cpu(dp->di_nlink) == 0) rc = -ESTALE; @@ -625,7 +625,7 @@ int diWrite(tid_t tid, struct inode *ip) if (!addressPXD(&(jfs_ip->ixpxd)) || (lengthPXD(&(jfs_ip->ixpxd)) != JFS_IP(ipimap)->i_imap->im_nbperiext)) { - jfs_error(ip->i_sb, "diWrite: ixpxd invalid"); + jfs_error(ip->i_sb, "ixpxd invalid\n"); return -EIO; } @@ -893,8 +893,7 @@ int diFree(struct inode *ip) if (iagno >= imap->im_nextiag) { print_hex_dump(KERN_ERR, "imap: ", DUMP_PREFIX_ADDRESS, 16, 4, imap, 32, 0); - jfs_error(ip->i_sb, - "diFree: inum = %d, iagno = %d, nextiag = %d", + jfs_error(ip->i_sb, "inum = %d, iagno = %d, nextiag = %d\n", (uint) inum, iagno, imap->im_nextiag); return -EIO; } @@ -930,15 +929,14 @@ int diFree(struct inode *ip) mask = HIGHORDER >> bitno; if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { - jfs_error(ip->i_sb, - "diFree: wmap shows inode already free"); + jfs_error(ip->i_sb, "wmap shows inode already free\n"); } if (!addressPXD(&iagp->inoext[extno])) { release_metapage(mp); IREAD_UNLOCK(ipimap); AG_UNLOCK(imap, agno); - jfs_error(ip->i_sb, "diFree: invalid inoext"); + jfs_error(ip->i_sb, "invalid inoext\n"); return -EIO; } @@ -950,7 +948,7 @@ int diFree(struct inode *ip) release_metapage(mp); IREAD_UNLOCK(ipimap); AG_UNLOCK(imap, agno); - jfs_error(ip->i_sb, "diFree: numfree > numinos"); + jfs_error(ip->i_sb, "numfree > numinos\n"); return -EIO; } /* @@ -1199,7 +1197,7 @@ int diFree(struct inode *ip) * for the inode being freed. */ if (iagp->pmap[extno] != 0) { - jfs_error(ip->i_sb, "diFree: the pmap does not show inode free"); + jfs_error(ip->i_sb, "the pmap does not show inode free\n"); } iagp->wmap[extno] = 0; PXDlength(&iagp->inoext[extno], 0); @@ -1518,8 +1516,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip) release_metapage(mp); AG_UNLOCK(imap, agno); jfs_error(ip->i_sb, - "diAlloc: can't find free bit " - "in wmap"); + "can't find free bit in wmap\n"); return -EIO; } @@ -1660,7 +1657,7 @@ diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip) numinos = imap->im_agctl[agno].numinos; if (numfree > numinos) { - jfs_error(ip->i_sb, "diAllocAG: numfree > numinos"); + jfs_error(ip->i_sb, "numfree > numinos\n"); return -EIO; } @@ -1811,8 +1808,7 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) if (!iagp->nfreeinos) { IREAD_UNLOCK(imap->im_ipimap); release_metapage(mp); - jfs_error(ip->i_sb, - "diAllocIno: nfreeinos = 0, but iag on freelist"); + jfs_error(ip->i_sb, "nfreeinos = 0, but iag on freelist\n"); return -EIO; } @@ -1824,7 +1820,7 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) IREAD_UNLOCK(imap->im_ipimap); release_metapage(mp); jfs_error(ip->i_sb, - "diAllocIno: free inode not found in summary map"); + "free inode not found in summary map\n"); return -EIO; } @@ -1839,7 +1835,7 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) if (rem >= EXTSPERSUM) { IREAD_UNLOCK(imap->im_ipimap); release_metapage(mp); - jfs_error(ip->i_sb, "diAllocIno: no free extent found"); + jfs_error(ip->i_sb, "no free extent found\n"); return -EIO; } extno = (sword << L2EXTSPERSUM) + rem; @@ -1850,7 +1846,7 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip) if (rem >= INOSPEREXT) { IREAD_UNLOCK(imap->im_ipimap); release_metapage(mp); - jfs_error(ip->i_sb, "diAllocIno: free inode not found"); + jfs_error(ip->i_sb, "free inode not found\n"); return -EIO; } @@ -1936,7 +1932,7 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP); if ((rc = diIAGRead(imap, iagno, &mp))) { IREAD_UNLOCK(imap->im_ipimap); - jfs_error(ip->i_sb, "diAllocExt: error reading iag"); + jfs_error(ip->i_sb, "error reading iag\n"); return rc; } iagp = (struct iag *) mp->data; @@ -1948,8 +1944,7 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) if (sword >= SMAPSZ) { release_metapage(mp); IREAD_UNLOCK(imap->im_ipimap); - jfs_error(ip->i_sb, - "diAllocExt: free ext summary map not found"); + jfs_error(ip->i_sb, "free ext summary map not found\n"); return -EIO; } if (~iagp->extsmap[sword]) @@ -1962,7 +1957,7 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip) if (rem >= EXTSPERSUM) { release_metapage(mp); IREAD_UNLOCK(imap->im_ipimap); - jfs_error(ip->i_sb, "diAllocExt: free extent not found"); + jfs_error(ip->i_sb, "free extent not found\n"); return -EIO; } extno = (sword << L2EXTSPERSUM) + rem; @@ -2081,8 +2076,7 @@ static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino) if (bmp) release_metapage(bmp); - jfs_error(imap->im_ipimap->i_sb, - "diAllocBit: iag inconsistent"); + jfs_error(imap->im_ipimap->i_sb, "iag inconsistent\n"); return -EIO; } @@ -2189,7 +2183,7 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) /* better have free extents. */ if (!iagp->nfreeexts) { - jfs_error(imap->im_ipimap->i_sb, "diNewExt: no free extents"); + jfs_error(imap->im_ipimap->i_sb, "no free extents\n"); return -EIO; } @@ -2261,7 +2255,7 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno) } if (ciagp == NULL) { jfs_error(imap->im_ipimap->i_sb, - "diNewExt: ciagp == NULL"); + "ciagp == NULL\n"); rc = -EIO; goto error_out; } @@ -2498,7 +2492,7 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) IWRITE_UNLOCK(ipimap); IAGFREE_UNLOCK(imap); jfs_error(imap->im_ipimap->i_sb, - "diNewIAG: ipimap->i_size is wrong"); + "ipimap->i_size is wrong\n"); return -EIO; } @@ -2758,8 +2752,7 @@ diUpdatePMap(struct inode *ipimap, iagno = INOTOIAG(inum); /* make sure that the iag is contained within the map */ if (iagno >= imap->im_nextiag) { - jfs_error(ipimap->i_sb, - "diUpdatePMap: the iag is outside the map"); + jfs_error(ipimap->i_sb, "the iag is outside the map\n"); return -EIO; } /* read the iag */ @@ -2788,13 +2781,13 @@ diUpdatePMap(struct inode *ipimap, */ if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { jfs_error(ipimap->i_sb, - "diUpdatePMap: inode %ld not marked as " - "allocated in wmap!", inum); + "inode %ld not marked as allocated in wmap!\n", + inum); } if (!(le32_to_cpu(iagp->pmap[extno]) & mask)) { jfs_error(ipimap->i_sb, - "diUpdatePMap: inode %ld not marked as " - "allocated in pmap!", inum); + "inode %ld not marked as allocated in pmap!\n", + inum); } /* update the bitmap for the extent of the freed inode */ iagp->pmap[extno] &= cpu_to_le32(~mask); @@ -2809,15 +2802,13 @@ diUpdatePMap(struct inode *ipimap, if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) { release_metapage(mp); jfs_error(ipimap->i_sb, - "diUpdatePMap: the inode is not allocated in " - "the working map"); + "the inode is not allocated in the working map\n"); return -EIO; } if ((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) { release_metapage(mp); jfs_error(ipimap->i_sb, - "diUpdatePMap: the inode is not free in the " - "persistent map"); + "the inode is not free in the persistent map\n"); return -EIO; } /* update the bitmap for the extent of the allocated inode */ @@ -2909,8 +2900,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap) iagp = (struct iag *) bp->data; if (le32_to_cpu(iagp->iagnum) != i) { release_metapage(bp); - jfs_error(ipimap->i_sb, - "diExtendFs: unexpected value of iagnum"); + jfs_error(ipimap->i_sb, "unexpected value of iagnum\n"); return -EIO; } @@ -2986,8 +2976,7 @@ int diExtendFS(struct inode *ipimap, struct inode *ipbmap) if (xnuminos != atomic_read(&imap->im_numinos) || xnumfree != atomic_read(&imap->im_numfree)) { - jfs_error(ipimap->i_sb, - "diExtendFs: numinos or numfree incorrect"); + jfs_error(ipimap->i_sb, "numinos or numfree incorrect\n"); return -EIO; } diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 9e3aaff..d165cde 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -647,7 +647,7 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock, if (mp) { if (mp->logical_size != size) { jfs_error(inode->i_sb, - "__get_metapage: mp->logical_size != size"); + "get_mp->logical_size != size\n"); jfs_err("logical_size = %d, size = %d", mp->logical_size, size); dump_stack(); @@ -658,8 +658,7 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock, if (test_bit(META_discard, &mp->flag)) { if (!new) { jfs_error(inode->i_sb, - "__get_metapage: using a " - "discarded metapage"); + "using a discarded metapage\n"); discard_metapage(mp); goto unlock; } diff --git a/fs/jfs/jfs_superblock.h b/fs/jfs/jfs_superblock.h index 884fc21..04847b8 100644 --- a/fs/jfs/jfs_superblock.h +++ b/fs/jfs/jfs_superblock.h @@ -108,6 +108,7 @@ struct jfs_superblock { extern int readSuper(struct super_block *, struct buffer_head **); extern int updateSuper(struct super_block *, uint); +__printf(2, 3) extern void jfs_error(struct super_block *, const char *, ...); extern int jfs_mount(struct super_block *); extern int jfs_mount_rw(struct super_block *, int); diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index 5fcc02e..564c4f2 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -2684,7 +2684,7 @@ void txAbort(tid_t tid, int dirty) * mark filesystem dirty */ if (dirty) - jfs_error(tblk->sb, "txAbort"); + jfs_error(tblk->sb, "\n"); return; } diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c index 6c50871..5ad7748 100644 --- a/fs/jfs/jfs_xtree.c +++ b/fs/jfs/jfs_xtree.c @@ -64,22 +64,23 @@ /* get page buffer for specified block address */ /* ToDo: Replace this ugly macro with a function */ -#define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)\ -{\ - BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot)\ - if (!(RC))\ - {\ - if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) ||\ - (le16_to_cpu((P)->header.nextindex) > le16_to_cpu((P)->header.maxentry)) ||\ - (le16_to_cpu((P)->header.maxentry) > (((BN)==0)?XTROOTMAXSLOT:PSIZE>>L2XTSLOTSIZE)))\ - {\ - jfs_error((IP)->i_sb, "XT_GETPAGE: xtree page corrupt");\ - BT_PUTPAGE(MP);\ - MP = NULL;\ - RC = -EIO;\ - }\ - }\ -} +#define XT_GETPAGE(IP, BN, MP, SIZE, P, RC) \ +do { \ + BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot); \ + if (!(RC)) { \ + if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) || \ + (le16_to_cpu((P)->header.nextindex) > \ + le16_to_cpu((P)->header.maxentry)) || \ + (le16_to_cpu((P)->header.maxentry) > \ + (((BN) == 0) ? XTROOTMAXSLOT : PSIZE >> L2XTSLOTSIZE))) { \ + jfs_error((IP)->i_sb, \ + "XT_GETPAGE: xtree page corrupt\n"); \ + BT_PUTPAGE(MP); \ + MP = NULL; \ + RC = -EIO; \ + } \ + } \ +} while (0) /* for consistency */ #define XT_PUTPAGE(MP) BT_PUTPAGE(MP) @@ -499,7 +500,7 @@ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp, /* push (bn, index) of the parent page/entry */ if (BT_STACK_FULL(btstack)) { - jfs_error(ip->i_sb, "stack overrun in xtSearch!"); + jfs_error(ip->i_sb, "stack overrun!\n"); XT_PUTPAGE(mp); return -EIO; } @@ -1385,7 +1386,7 @@ int xtExtend(tid_t tid, /* transaction id */ if (cmp != 0) { XT_PUTPAGE(mp); - jfs_error(ip->i_sb, "xtExtend: xtSearch did not find extent"); + jfs_error(ip->i_sb, "xtSearch did not find extent\n"); return -EIO; } @@ -1393,7 +1394,7 @@ int xtExtend(tid_t tid, /* transaction id */ xad = &p->xad[index]; if ((offsetXAD(xad) + lengthXAD(xad)) != xoff) { XT_PUTPAGE(mp); - jfs_error(ip->i_sb, "xtExtend: extension is not contiguous"); + jfs_error(ip->i_sb, "extension is not contiguous\n"); return -EIO; } @@ -1552,7 +1553,7 @@ printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", if (cmp != 0) { XT_PUTPAGE(mp); - jfs_error(ip->i_sb, "xtTailgate: couldn't find extent"); + jfs_error(ip->i_sb, "couldn't find extent\n"); return -EIO; } @@ -1560,8 +1561,7 @@ printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n", nextindex = le16_to_cpu(p->header.nextindex); if (index != nextindex - 1) { XT_PUTPAGE(mp); - jfs_error(ip->i_sb, - "xtTailgate: the entry found is not the last entry"); + jfs_error(ip->i_sb, "the entry found is not the last entry\n"); return -EIO; } @@ -1734,7 +1734,7 @@ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) if (cmp != 0) { XT_PUTPAGE(mp); - jfs_error(ip->i_sb, "xtUpdate: Could not find extent"); + jfs_error(ip->i_sb, "Could not find extent\n"); return -EIO; } @@ -1758,7 +1758,7 @@ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) (nxoff + nxlen > xoff + xlen)) { XT_PUTPAGE(mp); jfs_error(ip->i_sb, - "xtUpdate: nXAD in not completely contained within XAD"); + "nXAD in not completely contained within XAD\n"); return -EIO; } @@ -1907,7 +1907,7 @@ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) if (xoff >= nxoff) { XT_PUTPAGE(mp); - jfs_error(ip->i_sb, "xtUpdate: xoff >= nxoff"); + jfs_error(ip->i_sb, "xoff >= nxoff\n"); return -EIO; } /* #endif _JFS_WIP_COALESCE */ @@ -2048,14 +2048,13 @@ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad) if (cmp != 0) { XT_PUTPAGE(mp); - jfs_error(ip->i_sb, "xtUpdate: xtSearch failed"); + jfs_error(ip->i_sb, "xtSearch failed\n"); return -EIO; } if (index0 != index) { XT_PUTPAGE(mp); - jfs_error(ip->i_sb, - "xtUpdate: unexpected value of index"); + jfs_error(ip->i_sb, "unexpected value of index\n"); return -EIO; } } @@ -3650,7 +3649,7 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag) getChild: /* save current parent entry for the child page */ if (BT_STACK_FULL(&btstack)) { - jfs_error(ip->i_sb, "stack overrun in xtTruncate!"); + jfs_error(ip->i_sb, "stack overrun!\n"); XT_PUTPAGE(mp); return -EIO; } @@ -3751,8 +3750,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) if (cmp != 0) { XT_PUTPAGE(mp); - jfs_error(ip->i_sb, - "xtTruncate_pmap: did not find extent"); + jfs_error(ip->i_sb, "did not find extent\n"); return -EIO; } } else { @@ -3851,7 +3849,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size) getChild: /* save current parent entry for the child page */ if (BT_STACK_FULL(&btstack)) { - jfs_error(ip->i_sb, "stack overrun in xtTruncate_pmap!"); + jfs_error(ip->i_sb, "stack overrun!\n"); XT_PUTPAGE(mp); return -EIO; } diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 8b19027..aa8a337 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1176,7 +1176,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (!S_ISDIR(old_ip->i_mode) && new_ip) IWRITE_UNLOCK(new_ip); jfs_error(new_ip->i_sb, - "jfs_rename: new_ip->i_nlink != 0"); + "new_ip->i_nlink != 0\n"); return -EIO; } tblk = tid_to_tblock(tid); diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c index 8d0c1c7..90b3bc2 100644 --- a/fs/jfs/resize.c +++ b/fs/jfs/resize.c @@ -530,7 +530,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize) goto resume; error_out: - jfs_error(sb, "jfs_extendfs"); + jfs_error(sb, "\n"); resume: /* diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 788e0a9..6669aa2 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -92,16 +92,20 @@ static void jfs_handle_error(struct super_block *sb) /* nothing is done for continue beyond marking the superblock dirty */ } -void jfs_error(struct super_block *sb, const char * function, ...) +void jfs_error(struct super_block *sb, const char *fmt, ...) { - static char error_buf[256]; + struct va_format vaf; va_list args; - va_start(args, function); - vsnprintf(error_buf, sizeof(error_buf), function, args); - va_end(args); + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; - pr_err("ERROR: (device %s): %s\n", sb->s_id, error_buf); + pr_err("ERROR: (device %s): %pf: %pV\n", + sb->s_id, __builtin_return_address(0), &vaf); + + va_end(args); jfs_handle_error(sb); } @@ -617,7 +621,7 @@ static int jfs_freeze(struct super_block *sb) txQuiesce(sb); rc = lmLogShutdown(log); if (rc) { - jfs_error(sb, "jfs_freeze: lmLogShutdown failed"); + jfs_error(sb, "lmLogShutdown failed\n"); /* let operations fail rather than hang */ txResume(sb); @@ -646,12 +650,12 @@ static int jfs_unfreeze(struct super_block *sb) if (!(sb->s_flags & MS_RDONLY)) { rc = updateSuper(sb, FM_MOUNT); if (rc) { - jfs_error(sb, "jfs_unfreeze: updateSuper failed"); + jfs_error(sb, "updateSuper failed\n"); goto out; } rc = lmLogInit(log); if (rc) - jfs_error(sb, "jfs_unfreeze: lmLogInit failed"); + jfs_error(sb, "lmLogInit failed\n"); out: txResume(sb); } diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 42d67f9..d3472f4 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -382,7 +382,7 @@ static int ea_read(struct inode *ip, struct jfs_ea_list *ealist) nbytes = sizeDXD(&ji->ea); if (!nbytes) { - jfs_error(sb, "ea_read: nbytes is 0"); + jfs_error(sb, "nbytes is 0\n"); return -EIO; } @@ -482,7 +482,7 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size) current_blocks = 0; } else { if (!(ji->ea.flag & DXD_EXTENT)) { - jfs_error(sb, "ea_get: invalid ea.flag)"); + jfs_error(sb, "invalid ea.flag\n"); return -EIO; } current_blocks = (ea_size + sb->s_blocksize - 1) >> @@ -1089,8 +1089,8 @@ int jfs_removexattr(struct dentry *dentry, const char *name) } #ifdef CONFIG_JFS_SECURITY -int jfs_initxattrs(struct inode *inode, const struct xattr *xattr_array, - void *fs_info) +static int jfs_initxattrs(struct inode *inode, const struct xattr *xattr_array, + void *fs_info) { const struct xattr *xattr; tid_t *tid = fs_info; @@ -61,7 +61,8 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); - d_set_d_op(dentry, &simple_dentry_operations); + if (!dentry->d_sb->s_d_op) + d_set_d_op(dentry, &simple_dentry_operations); d_add(dentry, NULL); return NULL; } diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 01bfe766..41e491b 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -64,12 +64,17 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init) nlm_init->protocol, nlm_version, nlm_init->hostname, nlm_init->noresvport, nlm_init->net); - if (host == NULL) { - lockd_down(nlm_init->net); - return ERR_PTR(-ENOLCK); - } + if (host == NULL) + goto out_nohost; + if (host->h_rpcclnt == NULL && nlm_bind_host(host) == NULL) + goto out_nobind; return host; +out_nobind: + nlmclnt_release_host(host); +out_nohost: + lockd_down(nlm_init->net); + return ERR_PTR(-ENOLCK); } EXPORT_SYMBOL_GPL(nlmclnt_init); diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 9760ecb..acd3947 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -125,14 +125,15 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl) { struct nlm_args *argp = &req->a_args; struct nlm_lock *lock = &argp->lock; + char *nodename = req->a_host->h_rpcclnt->cl_nodename; nlmclnt_next_cookie(&argp->cookie); memcpy(&lock->fh, NFS_FH(file_inode(fl->fl_file)), sizeof(struct nfs_fh)); - lock->caller = utsname()->nodename; + lock->caller = nodename; lock->oh.data = req->a_owner; lock->oh.len = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s", (unsigned int)fl->fl_u.nfs_fl.owner->pid, - utsname()->nodename); + nodename); lock->svid = fl->fl_u.nfs_fl.owner->pid; lock->fl.fl_start = fl->fl_start; lock->fl.fl_end = fl->fl_end; diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 067778b..e066a39 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -951,6 +951,7 @@ nlmsvc_retry_blocked(void) unsigned long timeout = MAX_SCHEDULE_TIMEOUT; struct nlm_block *block; + spin_lock(&nlm_blocked_lock); while (!list_empty(&nlm_blocked) && !kthread_should_stop()) { block = list_entry(nlm_blocked.next, struct nlm_block, b_list); @@ -960,6 +961,7 @@ nlmsvc_retry_blocked(void) timeout = block->b_when - jiffies; break; } + spin_unlock(&nlm_blocked_lock); dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n", block, block->b_when); @@ -969,7 +971,9 @@ nlmsvc_retry_blocked(void) retry_deferred_block(block); } else nlmsvc_grant_blocked(block); + spin_lock(&nlm_blocked_lock); } + spin_unlock(&nlm_blocked_lock); return timeout; } @@ -2977,7 +2977,7 @@ static struct file *path_openat(int dfd, struct filename *pathname, file->f_flags = op->open_flag; - if (unlikely(file->f_flags & O_TMPFILE)) { + if (unlikely(file->f_flags & __O_TMPFILE)) { error = do_tmpfile(dfd, pathname, nd, flags, op, file, &opened); goto out; } @@ -3671,15 +3671,11 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname, if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0) return -EINVAL; /* - * To use null names we require CAP_DAC_READ_SEARCH - * This ensures that not everyone will be able to create - * handlink using the passed filedescriptor. + * Using empty names is equivalent to using AT_SYMLINK_FOLLOW + * on /proc/self/fd/<fd>. */ - if (flags & AT_EMPTY_PATH) { - if (!capable(CAP_DAC_READ_SEARCH)) - return -ENOENT; + if (flags & AT_EMPTY_PATH) how = LOOKUP_EMPTY; - } if (flags & AT_SYMLINK_FOLLOW) how |= LOOKUP_FOLLOW; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 0fac2cb..e474ca2b 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -450,6 +450,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) dentry = d_lookup(parent, &filename); if (dentry != NULL) { if (nfs_same_file(dentry, entry)) { + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); status = nfs_refresh_inode(dentry->d_inode, entry->fattr); if (!status) nfs_setsecurity(dentry->d_inode, entry->fattr, entry->label); @@ -817,7 +818,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) nfs_readdir_descriptor_t my_desc, *desc = &my_desc; struct nfs_open_dir_context *dir_ctx = file->private_data; - int res; + int res = 0; dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", dentry->d_parent->d_name.name, dentry->d_name.name, @@ -839,7 +840,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0; nfs_block_sillyrename(dentry); - res = nfs_revalidate_mapping(inode, file->f_mapping); + if (ctx->pos == 0 || nfs_attribute_cache_expired(inode)) + res = nfs_revalidate_mapping(inode, file->f_mapping); if (res < 0) goto out; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index c93639e..941246f 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -463,7 +463,6 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st unlock_new_inode(inode); } else nfs_refresh_inode(inode, fattr); - nfs_setsecurity(inode, fattr, label); dprintk("NFS: nfs_fhget(%s/%Ld fh_crc=0x%08x ct=%d)\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode), @@ -936,7 +935,7 @@ int nfs_attribute_timeout(struct inode *inode) return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo); } -static int nfs_attribute_cache_expired(struct inode *inode) +int nfs_attribute_cache_expired(struct inode *inode) { if (nfs_have_delegated_attributes(inode)) return 0; @@ -963,9 +962,15 @@ EXPORT_SYMBOL_GPL(nfs_revalidate_inode); static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) { struct nfs_inode *nfsi = NFS_I(inode); - + int ret; + if (mapping->nrpages != 0) { - int ret = invalidate_inode_pages2(mapping); + if (S_ISREG(inode->i_mode)) { + ret = nfs_sync_mapping(mapping); + if (ret < 0) + return ret; + } + ret = invalidate_inode_pages2(mapping); if (ret < 0) return ret; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index cf11799..108a774 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3071,15 +3071,13 @@ struct rpc_clnt * nfs4_proc_lookup_mountpoint(struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { + struct rpc_clnt *client = NFS_CLIENT(dir); int status; - struct rpc_clnt *client = rpc_clone_client(NFS_CLIENT(dir)); status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr, NULL); - if (status < 0) { - rpc_shutdown_client(client); + if (status < 0) return ERR_PTR(status); - } - return client; + return (client == NFS_CLIENT(dir)) ? rpc_clone_client(client) : client; } static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 0abfb846..3850b01 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -999,6 +999,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, __be32 *p; __be32 *q; int len; + uint32_t bmval_len = 2; uint32_t bmval0 = 0; uint32_t bmval1 = 0; uint32_t bmval2 = 0; @@ -1010,7 +1011,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, * = 40 bytes, plus any contribution from variable-length fields * such as owner/group. */ - len = 20; + len = 8; /* Sigh */ if (iap->ia_valid & ATTR_SIZE) @@ -1040,8 +1041,6 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, } len += 4 + (XDR_QUADLEN(owner_grouplen) << 2); } - if (label) - len += 4 + 4 + 4 + (XDR_QUADLEN(label->len) << 2); if (iap->ia_valid & ATTR_ATIME_SET) len += 16; else if (iap->ia_valid & ATTR_ATIME) @@ -1050,15 +1049,22 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, len += 16; else if (iap->ia_valid & ATTR_MTIME) len += 4; + if (label) { + len += 4 + 4 + 4 + (XDR_QUADLEN(label->len) << 2); + bmval_len = 3; + } + + len += bmval_len << 2; p = reserve_space(xdr, len); /* * We write the bitmap length now, but leave the bitmap and the attribute * buffer length to be backfilled at the end of this routine. */ - *p++ = cpu_to_be32(3); + *p++ = cpu_to_be32(bmval_len); q = p; - p += 4; + /* Skip bitmap entries + attrlen */ + p += bmval_len + 1; if (iap->ia_valid & ATTR_SIZE) { bmval0 |= FATTR4_WORD0_SIZE; @@ -1112,10 +1118,11 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, len, ((char *)p - (char *)q) + 4); BUG(); } - len = (char *)p - (char *)q - 16; *q++ = htonl(bmval0); *q++ = htonl(bmval1); - *q++ = htonl(bmval2); + if (bmval_len == 3) + *q++ = htonl(bmval2); + len = (char *)p - (char *)(q + 1); *q = htonl(len); /* out: */ diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 71fdc0d..f6db66d 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2478,6 +2478,10 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server, if (server->flags & NFS_MOUNT_NOAC) sb_mntdata.mntflags |= MS_SYNCHRONOUS; + if (mount_info->cloned != NULL && mount_info->cloned->sb != NULL) + if (mount_info->cloned->sb->s_flags & MS_SYNCHRONOUS) + sb_mntdata.mntflags |= MS_SYNCHRONOUS; + /* Get a superblock - note that we may end up sharing one that already exists */ s = sget(nfs_mod->nfs_fs, compare_super, nfs_set_super, flags, &sb_mntdata); if (IS_ERR(s)) { diff --git a/fs/nfs/write.c b/fs/nfs/write.c index a2c7c28..f1bdb72 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -888,6 +888,28 @@ out: return PageUptodate(page) != 0; } +/* If we know the page is up to date, and we're not using byte range locks (or + * if we have the whole file locked for writing), it may be more efficient to + * extend the write to cover the entire page in order to avoid fragmentation + * inefficiencies. + * + * If the file is opened for synchronous writes or if we have a write delegation + * from the server then we can just skip the rest of the checks. + */ +static int nfs_can_extend_write(struct file *file, struct page *page, struct inode *inode) +{ + if (file->f_flags & O_DSYNC) + return 0; + if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) + return 1; + if (nfs_write_pageuptodate(page, inode) && (inode->i_flock == NULL || + (inode->i_flock->fl_start == 0 && + inode->i_flock->fl_end == OFFSET_MAX && + inode->i_flock->fl_type != F_RDLCK))) + return 1; + return 0; +} + /* * Update and possibly write a cached page of an NFS file. * @@ -908,14 +930,7 @@ int nfs_updatepage(struct file *file, struct page *page, file->f_path.dentry->d_name.name, count, (long long)(page_file_offset(page) + offset)); - /* If we're not using byte range locks, and we know the page - * is up to date, it may be more efficient to extend the write - * to cover the entire page in order to avoid fragmentation - * inefficiencies. - */ - if (nfs_write_pageuptodate(page, inode) && - inode->i_flock == NULL && - !(file->f_flags & O_DSYNC)) { + if (nfs_can_extend_write(file, page, inode)) { count = max(count + offset, nfs_page_length(page)); offset = 0; } diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 430b687..dc8f1ef 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -81,6 +81,22 @@ config NFSD_V4 If unsure, say N. +config NFSD_V4_SECURITY_LABEL + bool "Provide Security Label support for NFSv4 server" + depends on NFSD_V4 && SECURITY + help + + Say Y here if you want enable fine-grained security label attribute + support for NFS version 4. Security labels allow security modules like + SELinux and Smack to label files to facilitate enforcement of their policies. + Without this an NFSv4 mount will have the same label on each file. + + If you do not wish to enable fine-grained security labels SELinux or + Smack policies on NFSv4 files, say N. + + WARNING: there is still a chance of backwards-incompatible protocol changes. + For now we recommend "Y" only for developers and testers." + config NFSD_FAULT_INJECTION bool "NFS server manual fault injection" depends on NFSD_V4 && DEBUG_KERNEL diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 27d74a2..419572f 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -42,6 +42,36 @@ #include "current_stateid.h" #include "netns.h" +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +#include <linux/security.h> + +static inline void +nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval) +{ + struct inode *inode = resfh->fh_dentry->d_inode; + int status; + + mutex_lock(&inode->i_mutex); + status = security_inode_setsecctx(resfh->fh_dentry, + label->data, label->len); + mutex_unlock(&inode->i_mutex); + + if (status) + /* + * XXX: We should really fail the whole open, but we may + * already have created a new file, so it may be too + * late. For now this seems the least of evils: + */ + bmval[2] &= ~FATTR4_WORD2_SECURITY_LABEL; + + return; +} +#else +static inline void +nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval) +{ } +#endif + #define NFSDDBG_FACILITY NFSDDBG_PROC static u32 nfsd_attrmask[] = { @@ -239,6 +269,9 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru (u32 *)open->op_verf.data, &open->op_truncate, &open->op_created); + if (!status && open->op_label.len) + nfsd4_security_inode_setsecctx(resfh, &open->op_label, open->op_bmval); + /* * Following rfc 3530 14.2.16, use the returned bitmask * to indicate which attributes we used to store the @@ -263,7 +296,8 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru nfsd4_set_open_owner_reply_cache(cstate, open, resfh); accmode = NFSD_MAY_NOP; - if (open->op_created) + if (open->op_created || + open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR) accmode |= NFSD_MAY_OWNER_OVERRIDE; status = do_open_permission(rqstp, resfh, open, accmode); set_change_info(&open->op_cinfo, current_fh); @@ -637,6 +671,9 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; + if (create->cr_label.len) + nfsd4_security_inode_setsecctx(&resfh, &create->cr_label, create->cr_bmval); + if (create->cr_acl != NULL) do_set_nfs4_acl(rqstp, &resfh, create->cr_acl, create->cr_bmval); @@ -916,6 +953,11 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, setattr->sa_acl); if (status) goto out; + if (setattr->sa_label.len) + status = nfsd4_set_nfs4_label(rqstp, &cstate->current_fh, + &setattr->sa_label); + if (status) + goto out; status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr, 0, (time_t)0); out: @@ -1251,7 +1293,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, * According to RFC3010, this takes precedence over all other errors. */ status = nfserr_minor_vers_mismatch; - if (args->minorversion > nfsd_supported_minorversion) + if (nfsd_minorversion(args->minorversion, NFSD_TEST) <= 0) goto out; status = nfs41_check_op_ordering(args); @@ -1482,7 +1524,7 @@ static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { return (op_encode_hdr_size + 2 + 1 + /* eir_clientid, eir_sequenceid */\ - 1 + 1 + 0 + /* eir_flags, spr_how, SP4_NONE (for now) */\ + 1 + 1 + 2 + /* eir_flags, spr_how, spo_must_enforce & _allow */\ 2 + /*eir_server_owner.so_minor_id */\ /* eir_server_owner.so_major_id<> */\ XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 +\ diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f170518..43f4229 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -97,19 +97,20 @@ nfs4_lock_state(void) static void free_session(struct nfsd4_session *); -void nfsd4_put_session(struct nfsd4_session *ses) +static bool is_session_dead(struct nfsd4_session *ses) { - atomic_dec(&ses->se_ref); + return ses->se_flags & NFS4_SESSION_DEAD; } -static bool is_session_dead(struct nfsd4_session *ses) +void nfsd4_put_session(struct nfsd4_session *ses) { - return ses->se_flags & NFS4_SESSION_DEAD; + if (atomic_dec_and_test(&ses->se_ref) && is_session_dead(ses)) + free_session(ses); } -static __be32 mark_session_dead_locked(struct nfsd4_session *ses) +static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_by_me) { - if (atomic_read(&ses->se_ref)) + if (atomic_read(&ses->se_ref) > ref_held_by_me) return nfserr_jukebox; ses->se_flags |= NFS4_SESSION_DEAD; return nfs_ok; @@ -364,19 +365,12 @@ static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp) } static struct nfs4_delegation * -alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh, u32 type) +alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh) { struct nfs4_delegation *dp; struct nfs4_file *fp = stp->st_file; dprintk("NFSD alloc_init_deleg\n"); - /* - * Major work on the lease subsystem (for example, to support - * calbacks on stat) will be required before we can support - * write delegations properly. - */ - if (type != NFS4_OPEN_DELEGATE_READ) - return NULL; if (fp->fi_had_conflict) return NULL; if (num_delegations > max_delegations) @@ -397,7 +391,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv INIT_LIST_HEAD(&dp->dl_recall_lru); get_nfs4_file(fp); dp->dl_file = fp; - dp->dl_type = type; + dp->dl_type = NFS4_OPEN_DELEGATE_READ; fh_copy_shallow(&dp->dl_fh, ¤t_fh->fh_handle); dp->dl_time = 0; atomic_set(&dp->dl_count, 1); @@ -1188,6 +1182,9 @@ static int copy_cred(struct svc_cred *target, struct svc_cred *source) target->cr_gid = source->cr_gid; target->cr_group_info = source->cr_group_info; get_group_info(target->cr_group_info); + target->cr_gss_mech = source->cr_gss_mech; + if (source->cr_gss_mech) + gss_mech_get(source->cr_gss_mech); return 0; } @@ -1262,6 +1259,33 @@ same_creds(struct svc_cred *cr1, struct svc_cred *cr2) return 0 == strcmp(cr1->cr_principal, cr2->cr_principal); } +static bool svc_rqst_integrity_protected(struct svc_rqst *rqstp) +{ + struct svc_cred *cr = &rqstp->rq_cred; + u32 service; + + if (!cr->cr_gss_mech) + return false; + service = gss_pseudoflavor_to_service(cr->cr_gss_mech, cr->cr_flavor); + return service == RPC_GSS_SVC_INTEGRITY || + service == RPC_GSS_SVC_PRIVACY; +} + +static bool mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp) +{ + struct svc_cred *cr = &rqstp->rq_cred; + + if (!cl->cl_mach_cred) + return true; + if (cl->cl_cred.cr_gss_mech != cr->cr_gss_mech) + return false; + if (!svc_rqst_integrity_protected(rqstp)) + return false; + if (!cr->cr_principal) + return false; + return 0 == strcmp(cl->cl_cred.cr_principal, cr->cr_principal); +} + static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn) { static u32 current_clientid = 1; @@ -1639,16 +1663,16 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, if (exid->flags & ~EXCHGID4_FLAG_MASK_A) return nfserr_inval; - /* Currently only support SP4_NONE */ switch (exid->spa_how) { + case SP4_MACH_CRED: + if (!svc_rqst_integrity_protected(rqstp)) + return nfserr_inval; case SP4_NONE: break; default: /* checked by xdr code */ WARN_ON_ONCE(1); case SP4_SSV: return nfserr_encr_alg_unsupp; - case SP4_MACH_CRED: - return nfserr_serverfault; /* no excuse :-/ */ } /* Cases below refer to rfc 5661 section 18.35.4: */ @@ -1663,6 +1687,10 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, status = nfserr_inval; goto out; } + if (!mach_creds_match(conf, rqstp)) { + status = nfserr_wrong_cred; + goto out; + } if (!creds_match) { /* case 9 */ status = nfserr_perm; goto out; @@ -1709,7 +1737,8 @@ out_new: status = nfserr_jukebox; goto out; } - new->cl_minorversion = 1; + new->cl_minorversion = cstate->minorversion; + new->cl_mach_cred = (exid->spa_how == SP4_MACH_CRED); gen_clid(new, nn); add_to_unconfirmed(new); @@ -1839,6 +1868,24 @@ static __be32 check_backchannel_attrs(struct nfsd4_channel_attrs *ca) return nfs_ok; } +static __be32 nfsd4_check_cb_sec(struct nfsd4_cb_sec *cbs) +{ + switch (cbs->flavor) { + case RPC_AUTH_NULL: + case RPC_AUTH_UNIX: + return nfs_ok; + default: + /* + * GSS case: the spec doesn't allow us to return this + * error. But it also doesn't allow us not to support + * GSS. + * I'd rather this fail hard than return some error the + * client might think it can already handle: + */ + return nfserr_encr_alg_unsupp; + } +} + __be32 nfsd4_create_session(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, @@ -1854,6 +1901,9 @@ nfsd4_create_session(struct svc_rqst *rqstp, if (cr_ses->flags & ~SESSION4_FLAG_MASK_A) return nfserr_inval; + status = nfsd4_check_cb_sec(&cr_ses->cb_sec); + if (status) + return status; status = check_forechannel_attrs(&cr_ses->fore_channel, nn); if (status) return status; @@ -1874,6 +1924,9 @@ nfsd4_create_session(struct svc_rqst *rqstp, WARN_ON_ONCE(conf && unconf); if (conf) { + status = nfserr_wrong_cred; + if (!mach_creds_match(conf, rqstp)) + goto out_free_conn; cs_slot = &conf->cl_cs_slot; status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); if (status == nfserr_replay_cache) { @@ -1890,6 +1943,9 @@ nfsd4_create_session(struct svc_rqst *rqstp, status = nfserr_clid_inuse; goto out_free_conn; } + status = nfserr_wrong_cred; + if (!mach_creds_match(unconf, rqstp)) + goto out_free_conn; cs_slot = &unconf->cl_cs_slot; status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0); if (status) { @@ -1957,7 +2013,11 @@ __be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp, struct nfsd4_compound_state { struct nfsd4_session *session = cstate->session; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + __be32 status; + status = nfsd4_check_cb_sec(&bc->bc_cb_sec); + if (status) + return status; spin_lock(&nn->client_lock); session->se_cb_prog = bc->bc_cb_program; session->se_cb_sec = bc->bc_cb_sec; @@ -1986,6 +2046,9 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, status = nfserr_badsession; if (!session) goto out; + status = nfserr_wrong_cred; + if (!mach_creds_match(session->se_client, rqstp)) + goto out; status = nfsd4_map_bcts_dir(&bcts->dir); if (status) goto out; @@ -2014,6 +2077,7 @@ nfsd4_destroy_session(struct svc_rqst *r, { struct nfsd4_session *ses; __be32 status; + int ref_held_by_me = 0; struct nfsd_net *nn = net_generic(SVC_NET(r), nfsd_net_id); nfs4_lock_state(); @@ -2021,6 +2085,7 @@ nfsd4_destroy_session(struct svc_rqst *r, if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) { if (!nfsd4_last_compound_op(r)) goto out; + ref_held_by_me++; } dump_sessionid(__func__, &sessionid->sessionid); spin_lock(&nn->client_lock); @@ -2028,17 +2093,22 @@ nfsd4_destroy_session(struct svc_rqst *r, status = nfserr_badsession; if (!ses) goto out_client_lock; - status = mark_session_dead_locked(ses); - if (status) + status = nfserr_wrong_cred; + if (!mach_creds_match(ses->se_client, r)) goto out_client_lock; + nfsd4_get_session_locked(ses); + status = mark_session_dead_locked(ses, 1 + ref_held_by_me); + if (status) + goto out_put_session; unhash_session(ses); spin_unlock(&nn->client_lock); nfsd4_probe_callback_sync(ses->se_client); spin_lock(&nn->client_lock); - free_session(ses); status = nfs_ok; +out_put_session: + nfsd4_put_session(ses); out_client_lock: spin_unlock(&nn->client_lock); out: @@ -2058,26 +2128,31 @@ static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_s return NULL; } -static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses) +static __be32 nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses) { struct nfs4_client *clp = ses->se_client; struct nfsd4_conn *c; + __be32 status = nfs_ok; int ret; spin_lock(&clp->cl_lock); c = __nfsd4_find_conn(new->cn_xprt, ses); - if (c) { - spin_unlock(&clp->cl_lock); - free_conn(new); - return; - } + if (c) + goto out_free; + status = nfserr_conn_not_bound_to_session; + if (clp->cl_mach_cred) + goto out_free; __nfsd4_hash_conn(new, ses); spin_unlock(&clp->cl_lock); ret = nfsd4_register_conn(new); if (ret) /* oops; xprt is already down: */ nfsd4_conn_lost(&new->cn_xpt_user); - return; + return nfs_ok; +out_free: + spin_unlock(&clp->cl_lock); + free_conn(new); + return status; } static bool nfsd4_session_too_many_ops(struct svc_rqst *rqstp, struct nfsd4_session *session) @@ -2169,8 +2244,10 @@ nfsd4_sequence(struct svc_rqst *rqstp, if (status) goto out_put_session; - nfsd4_sequence_check_conn(conn, session); + status = nfsd4_sequence_check_conn(conn, session); conn = NULL; + if (status) + goto out_put_session; /* Success! bump slot seqid */ slot->sl_seqid = seq->seqid; @@ -2232,7 +2309,10 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta status = nfserr_stale_clientid; goto out; } - + if (!mach_creds_match(clp, rqstp)) { + status = nfserr_wrong_cred; + goto out; + } expire_client(clp); out: nfs4_unlock_state(); @@ -2940,13 +3020,13 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int f return fl; } -static int nfs4_setlease(struct nfs4_delegation *dp, int flag) +static int nfs4_setlease(struct nfs4_delegation *dp) { struct nfs4_file *fp = dp->dl_file; struct file_lock *fl; int status; - fl = nfs4_alloc_init_lease(dp, flag); + fl = nfs4_alloc_init_lease(dp, NFS4_OPEN_DELEGATE_READ); if (!fl) return -ENOMEM; fl->fl_file = find_readable_file(fp); @@ -2964,12 +3044,12 @@ static int nfs4_setlease(struct nfs4_delegation *dp, int flag) return 0; } -static int nfs4_set_delegation(struct nfs4_delegation *dp, int flag) +static int nfs4_set_delegation(struct nfs4_delegation *dp) { struct nfs4_file *fp = dp->dl_file; if (!fp->fi_lease) - return nfs4_setlease(dp, flag); + return nfs4_setlease(dp); spin_lock(&recall_lock); if (fp->fi_had_conflict) { spin_unlock(&recall_lock); @@ -3005,6 +3085,9 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) /* * Attempt to hand out a delegation. + * + * Note we don't support write delegations, and won't until the vfs has + * proper support for them. */ static void nfs4_open_delegation(struct net *net, struct svc_fh *fh, @@ -3013,39 +3096,45 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh, struct nfs4_delegation *dp; struct nfs4_openowner *oo = container_of(stp->st_stateowner, struct nfs4_openowner, oo_owner); int cb_up; - int status = 0, flag = 0; + int status = 0; cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client); - flag = NFS4_OPEN_DELEGATE_NONE; open->op_recall = 0; switch (open->op_claim_type) { case NFS4_OPEN_CLAIM_PREVIOUS: if (!cb_up) open->op_recall = 1; - flag = open->op_delegate_type; - if (flag == NFS4_OPEN_DELEGATE_NONE) - goto out; + if (open->op_delegate_type != NFS4_OPEN_DELEGATE_READ) + goto out_no_deleg; break; case NFS4_OPEN_CLAIM_NULL: - /* Let's not give out any delegations till everyone's - * had the chance to reclaim theirs.... */ + /* + * Let's not give out any delegations till everyone's + * had the chance to reclaim theirs.... + */ if (locks_in_grace(net)) - goto out; + goto out_no_deleg; if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED)) - goto out; + goto out_no_deleg; + /* + * Also, if the file was opened for write or + * create, there's a good chance the client's + * about to write to it, resulting in an + * immediate recall (since we don't support + * write delegations): + */ if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) - flag = NFS4_OPEN_DELEGATE_WRITE; - else - flag = NFS4_OPEN_DELEGATE_READ; + goto out_no_deleg; + if (open->op_create == NFS4_OPEN_CREATE) + goto out_no_deleg; break; default: - goto out; + goto out_no_deleg; } - - dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh, flag); + dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh); if (dp == NULL) goto out_no_deleg; - status = nfs4_set_delegation(dp, flag); + status = nfs4_set_delegation(dp); if (status) goto out_free; @@ -3053,24 +3142,23 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh, dprintk("NFSD: delegation stateid=" STATEID_FMT "\n", STATEID_VAL(&dp->dl_stid.sc_stateid)); -out: - open->op_delegate_type = flag; - if (flag == NFS4_OPEN_DELEGATE_NONE) { - if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && - open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) - dprintk("NFSD: WARNING: refusing delegation reclaim\n"); - - /* 4.1 client asking for a delegation? */ - if (open->op_deleg_want) - nfsd4_open_deleg_none_ext(open, status); - } + open->op_delegate_type = NFS4_OPEN_DELEGATE_READ; return; out_free: unhash_stid(&dp->dl_stid); nfs4_put_delegation(dp); out_no_deleg: - flag = NFS4_OPEN_DELEGATE_NONE; - goto out; + open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE; + if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && + open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) { + dprintk("NFSD: WARNING: refusing delegation reclaim\n"); + open->op_recall = 1; + } + + /* 4.1 client asking for a delegation? */ + if (open->op_deleg_want) + nfsd4_open_deleg_none_ext(open, status); + return; } static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open, @@ -3427,7 +3515,7 @@ grace_disallows_io(struct net *net, struct inode *inode) /* Returns true iff a is later than b: */ static bool stateid_generation_after(stateid_t *a, stateid_t *b) { - return (s32)a->si_generation - (s32)b->si_generation > 0; + return (s32)(a->si_generation - b->si_generation) > 0; } static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_session) @@ -4435,7 +4523,6 @@ __be32 nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_locku *locku) { - struct nfs4_lockowner *lo; struct nfs4_ol_stateid *stp; struct file *filp = NULL; struct file_lock *file_lock = NULL; @@ -4468,10 +4555,9 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfserr_jukebox; goto out; } - lo = lockowner(stp->st_stateowner); locks_init_lock(file_lock); file_lock->fl_type = F_UNLCK; - file_lock->fl_owner = (fl_owner_t)lo; + file_lock->fl_owner = (fl_owner_t)lockowner(stp->st_stateowner); file_lock->fl_pid = current->tgid; file_lock->fl_file = filp; file_lock->fl_flags = FL_POSIX; @@ -4490,11 +4576,6 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, update_stateid(&stp->st_stid.sc_stateid); memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); - if (nfsd4_has_session(cstate) && !check_for_locks(stp->st_file, lo)) { - WARN_ON_ONCE(cstate->replay_owner); - release_lockowner(lo); - } - out: nfsd4_bump_seqid(cstate, status); if (!cstate->replay_owner) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 6cd86e0..c2a4701 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -55,6 +55,11 @@ #include "cache.h" #include "netns.h" +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +#include <linux/security.h> +#endif + + #define NFSDDBG_FACILITY NFSDDBG_XDR /* @@ -134,6 +139,19 @@ xdr_error: \ } \ } while (0) +static void next_decode_page(struct nfsd4_compoundargs *argp) +{ + argp->pagelist++; + argp->p = page_address(argp->pagelist[0]); + if (argp->pagelen < PAGE_SIZE) { + argp->end = argp->p + (argp->pagelen>>2); + argp->pagelen = 0; + } else { + argp->end = argp->p + (PAGE_SIZE>>2); + argp->pagelen -= PAGE_SIZE; + } +} + static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) { /* We want more bytes than seem to be available. @@ -161,16 +179,7 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) * guarantee p points to at least nbytes bytes. */ memcpy(p, argp->p, avail); - /* step to next page */ - argp->p = page_address(argp->pagelist[0]); - argp->pagelist++; - if (argp->pagelen < PAGE_SIZE) { - argp->end = argp->p + (argp->pagelen>>2); - argp->pagelen = 0; - } else { - argp->end = argp->p + (PAGE_SIZE>>2); - argp->pagelen -= PAGE_SIZE; - } + next_decode_page(argp); memcpy(((char*)p)+avail, argp->p, (nbytes - avail)); argp->p += XDR_QUADLEN(nbytes - avail); return p; @@ -242,7 +251,8 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) static __be32 nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, - struct iattr *iattr, struct nfs4_acl **acl) + struct iattr *iattr, struct nfs4_acl **acl, + struct xdr_netobj *label) { int expected_len, len = 0; u32 dummy32; @@ -380,6 +390,32 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, goto xdr_error; } } + + label->len = 0; +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL + if (bmval[2] & FATTR4_WORD2_SECURITY_LABEL) { + READ_BUF(4); + len += 4; + READ32(dummy32); /* lfs: we don't use it */ + READ_BUF(4); + len += 4; + READ32(dummy32); /* pi: we don't use it either */ + READ_BUF(4); + len += 4; + READ32(dummy32); + READ_BUF(dummy32); + if (dummy32 > NFSD4_MAX_SEC_LABEL_LEN) + return nfserr_badlabel; + len += (XDR_QUADLEN(dummy32) << 2); + READMEM(buf, dummy32); + label->data = kzalloc(dummy32 + 1, GFP_KERNEL); + if (!label->data) + return nfserr_jukebox; + defer_free(argp, kfree, label->data); + memcpy(label->data, buf, dummy32); + } +#endif + if (bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0 || bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1 || bmval[2] & ~NFSD_WRITEABLE_ATTRS_WORD2) @@ -428,7 +464,11 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_ /* callback_sec_params4 */ READ_BUF(4); READ32(nr_secflavs); - cbs->flavor = (u32)(-1); + if (nr_secflavs) + cbs->flavor = (u32)(-1); + else + /* Is this legal? Be generous, take it to mean AUTH_NONE: */ + cbs->flavor = 0; for (i = 0; i < nr_secflavs; ++i) { READ_BUF(4); READ32(dummy); @@ -576,7 +616,7 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create return status; status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, - &create->cr_acl); + &create->cr_acl, &create->cr_label); if (status) goto out; @@ -827,7 +867,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) case NFS4_CREATE_UNCHECKED: case NFS4_CREATE_GUARDED: status = nfsd4_decode_fattr(argp, open->op_bmval, - &open->op_iattr, &open->op_acl); + &open->op_iattr, &open->op_acl, &open->op_label); if (status) goto out; break; @@ -841,7 +881,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) READ_BUF(NFS4_VERIFIER_SIZE); COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE); status = nfsd4_decode_fattr(argp, open->op_bmval, - &open->op_iattr, &open->op_acl); + &open->op_iattr, &open->op_acl, &open->op_label); if (status) goto out; break; @@ -1063,7 +1103,7 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta if (status) return status; return nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr, - &setattr->sa_acl); + &setattr->sa_acl, &setattr->sa_label); } static __be32 @@ -1567,6 +1607,7 @@ struct nfsd4_minorversion_ops { static struct nfsd4_minorversion_ops nfsd4_minorversion[] = { [0] = { nfsd4_dec_ops, ARRAY_SIZE(nfsd4_dec_ops) }, [1] = { nfsd41_dec_ops, ARRAY_SIZE(nfsd41_dec_ops) }, + [2] = { nfsd41_dec_ops, ARRAY_SIZE(nfsd41_dec_ops) }, }; static __be32 @@ -1953,6 +1994,36 @@ nfsd4_encode_aclname(struct svc_rqst *rqstp, struct nfs4_ace *ace, FATTR4_WORD0_RDATTR_ERROR) #define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +static inline __be32 +nfsd4_encode_security_label(struct svc_rqst *rqstp, void *context, int len, __be32 **pp, int *buflen) +{ + __be32 *p = *pp; + + if (*buflen < ((XDR_QUADLEN(len) << 2) + 4 + 4 + 4)) + return nfserr_resource; + + /* + * For now we use a 0 here to indicate the null translation; in + * the future we may place a call to translation code here. + */ + if ((*buflen -= 8) < 0) + return nfserr_resource; + + WRITE32(0); /* lfs */ + WRITE32(0); /* pi */ + p = xdr_encode_opaque(p, context, len); + *buflen -= (XDR_QUADLEN(len) << 2) + 4; + + *pp = p; + return 0; +} +#else +static inline __be32 +nfsd4_encode_security_label(struct svc_rqst *rqstp, void *context, int len, __be32 **pp, int *buflen) +{ return 0; } +#endif + static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err) { /* As per referral draft: */ @@ -2012,6 +2083,9 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, int err; int aclsupport = 0; struct nfs4_acl *acl = NULL; + void *context = NULL; + int contextlen; + bool contextsupport = false; struct nfsd4_compoundres *resp = rqstp->rq_resp; u32 minorversion = resp->cstate.minorversion; struct path path = { @@ -2065,6 +2139,21 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, } } +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL + if ((bmval[2] & FATTR4_WORD2_SECURITY_LABEL) || + bmval[0] & FATTR4_WORD0_SUPPORTED_ATTRS) { + err = security_inode_getsecctx(dentry->d_inode, + &context, &contextlen); + contextsupport = (err == 0); + if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { + if (err == -EOPNOTSUPP) + bmval2 &= ~FATTR4_WORD2_SECURITY_LABEL; + else if (err) + goto out_nfserr; + } + } +#endif /* CONFIG_NFSD_V4_SECURITY_LABEL */ + if (bmval2) { if ((buflen -= 16) < 0) goto out_resource; @@ -2093,6 +2182,8 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, if (!aclsupport) word0 &= ~FATTR4_WORD0_ACL; + if (!contextsupport) + word2 &= ~FATTR4_WORD2_SECURITY_LABEL; if (!word2) { if ((buflen -= 12) < 0) goto out_resource; @@ -2400,6 +2491,12 @@ out_acl: get_parent_attributes(exp, &stat); WRITE64(stat.ino); } + if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { + status = nfsd4_encode_security_label(rqstp, context, + contextlen, &p, &buflen); + if (status) + goto out; + } if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) { WRITE32(3); WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0); @@ -2412,6 +2509,10 @@ out_acl: status = nfs_ok; out: +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL + if (context) + security_release_secctx(context, contextlen); +#endif /* CONFIG_NFSD_V4_SECURITY_LABEL */ kfree(acl); if (fhp == &tempfh) fh_put(&tempfh); @@ -3176,16 +3277,18 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 { __be32 *p; - RESERVE_SPACE(12); + RESERVE_SPACE(16); if (nfserr) { - WRITE32(2); + WRITE32(3); + WRITE32(0); WRITE32(0); WRITE32(0); } else { - WRITE32(2); + WRITE32(3); WRITE32(setattr->sa_bmval[0]); WRITE32(setattr->sa_bmval[1]); + WRITE32(setattr->sa_bmval[2]); } ADJUST_ARGS(); return nfserr; @@ -3226,6 +3329,14 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w return nfserr; } +static const u32 nfs4_minimal_spo_must_enforce[2] = { + [1] = 1 << (OP_BIND_CONN_TO_SESSION - 32) | + 1 << (OP_EXCHANGE_ID - 32) | + 1 << (OP_CREATE_SESSION - 32) | + 1 << (OP_DESTROY_SESSION - 32) | + 1 << (OP_DESTROY_CLIENTID - 32) +}; + static __be32 nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_exchange_id *exid) @@ -3249,7 +3360,8 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, 8 /* eir_clientid */ + 4 /* eir_sequenceid */ + 4 /* eir_flags */ + - 4 /* spr_how (SP4_NONE) */ + + 4 /* spr_how */ + + 8 /* spo_must_enforce, spo_must_allow */ + 8 /* so_minor_id */ + 4 /* so_major_id.len */ + (XDR_QUADLEN(major_id_sz) * 4) + @@ -3261,9 +3373,21 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, WRITE32(exid->seqid); WRITE32(exid->flags); - /* state_protect4_r. Currently only support SP4_NONE */ - BUG_ON(exid->spa_how != SP4_NONE); WRITE32(exid->spa_how); + switch (exid->spa_how) { + case SP4_NONE: + break; + case SP4_MACH_CRED: + /* spo_must_enforce bitmap: */ + WRITE32(2); + WRITE32(nfs4_minimal_spo_must_enforce[0]); + WRITE32(nfs4_minimal_spo_must_enforce[1]); + /* empty spo_must_allow bitmap: */ + WRITE32(0); + break; + default: + WARN_ON_ONCE(1); + } /* The server_owner struct */ WRITE64(minor_id); /* Minor id */ @@ -3635,13 +3759,17 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; BUG_ON(iov->iov_len > PAGE_SIZE); if (nfsd4_has_session(cs)) { + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct nfs4_client *clp = cs->session->se_client; if (cs->status != nfserr_replay_cache) { nfsd4_store_cache_entry(resp); cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE; } /* Renew the clientid on success and on replay */ - put_client_renew(cs->session->se_client); + spin_lock(&nn->client_lock); nfsd4_put_session(cs->session); + spin_unlock(&nn->client_lock); + put_client_renew(clp); } return 1; } diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index c0d9317..30f34ab 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -24,7 +24,7 @@ /* * nfsd version */ -#define NFSD_SUPPORTED_MINOR_VERSION 1 +#define NFSD_SUPPORTED_MINOR_VERSION 2 /* * Maximum blocksizes supported by daemon under various circumstances. */ @@ -53,7 +53,6 @@ struct readdir_cd { extern struct svc_program nfsd_program; extern struct svc_version nfsd_version2, nfsd_version3, nfsd_version4; -extern u32 nfsd_supported_minorversion; extern struct mutex nfsd_mutex; extern spinlock_t nfsd_drc_lock; extern unsigned long nfsd_drc_max_mem; @@ -328,6 +327,13 @@ void nfsd_lockd_shutdown(void); #define NFSD4_1_SUPPORTED_ATTRS_WORD2 \ (NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT) +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +#define NFSD4_2_SUPPORTED_ATTRS_WORD2 \ + (NFSD4_1_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SECURITY_LABEL) +#else +#define NFSD4_2_SUPPORTED_ATTRS_WORD2 0 +#endif + static inline u32 nfsd_suppattrs0(u32 minorversion) { return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0 @@ -342,8 +348,11 @@ static inline u32 nfsd_suppattrs1(u32 minorversion) static inline u32 nfsd_suppattrs2(u32 minorversion) { - return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD2 - : NFSD4_SUPPORTED_ATTRS_WORD2; + switch (minorversion) { + default: return NFSD4_2_SUPPORTED_ATTRS_WORD2; + case 1: return NFSD4_1_SUPPORTED_ATTRS_WORD2; + case 0: return NFSD4_SUPPORTED_ATTRS_WORD2; + } } /* These will return ERR_INVAL if specified in GETATTR or READDIR. */ @@ -356,7 +365,11 @@ static inline u32 nfsd_suppattrs2(u32 minorversion) #define NFSD_WRITEABLE_ATTRS_WORD1 \ (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \ | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +#define NFSD_WRITEABLE_ATTRS_WORD2 FATTR4_WORD2_SECURITY_LABEL +#else #define NFSD_WRITEABLE_ATTRS_WORD2 0 +#endif #define NFSD_SUPPATTR_EXCLCREAT_WORD0 \ NFSD_WRITEABLE_ATTRS_WORD0 diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 262df5c..760c85a 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -116,7 +116,10 @@ struct svc_program nfsd_program = { }; -u32 nfsd_supported_minorversion; +static bool nfsd_supported_minorversions[NFSD_SUPPORTED_MINOR_VERSION + 1] = { + [0] = 1, + [1] = 1, +}; int nfsd_vers(int vers, enum vers_op change) { @@ -151,15 +154,13 @@ int nfsd_minorversion(u32 minorversion, enum vers_op change) return -1; switch(change) { case NFSD_SET: - nfsd_supported_minorversion = minorversion; + nfsd_supported_minorversions[minorversion] = true; break; case NFSD_CLEAR: - if (minorversion == 0) - return -1; - nfsd_supported_minorversion = minorversion - 1; + nfsd_supported_minorversions[minorversion] = false; break; case NFSD_TEST: - return minorversion <= nfsd_supported_minorversion; + return nfsd_supported_minorversions[minorversion]; case NFSD_AVAIL: return minorversion <= NFSD_SUPPORTED_MINOR_VERSION; } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 274e2a1..424d8f5 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -246,6 +246,7 @@ struct nfs4_client { nfs4_verifier cl_verifier; /* generated by client */ time_t cl_time; /* time of last lease renewal */ struct sockaddr_storage cl_addr; /* client ipaddress */ + bool cl_mach_cred; /* SP4_MACH_CRED in force */ struct svc_cred cl_cred; /* setclientid principal */ clientid_t cl_clientid; /* generated by server */ nfs4_verifier cl_confirm; /* generated by server */ diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index a6bc8a7..c827acb 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -28,6 +28,7 @@ #include <asm/uaccess.h> #include <linux/exportfs.h> #include <linux/writeback.h> +#include <linux/security.h> #ifdef CONFIG_NFSD_V3 #include "xdr3.h" @@ -621,6 +622,33 @@ int nfsd4_is_junction(struct dentry *dentry) return 0; return 1; } +#ifdef CONFIG_NFSD_V4_SECURITY_LABEL +__be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct xdr_netobj *label) +{ + __be32 error; + int host_error; + struct dentry *dentry; + + error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR); + if (error) + return error; + + dentry = fhp->fh_dentry; + + mutex_lock(&dentry->d_inode->i_mutex); + host_error = security_inode_setsecctx(dentry, label->data, label->len); + mutex_unlock(&dentry->d_inode->i_mutex); + return nfserrno(host_error); +} +#else +__be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct xdr_netobj *label) +{ + return nfserr_notsupp; +} +#endif + #endif /* defined(CONFIG_NFSD_V4) */ #ifdef CONFIG_NFSD_V3 @@ -802,9 +830,10 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, flags = O_WRONLY|O_LARGEFILE; } *filp = dentry_open(&path, flags, current_cred()); - if (IS_ERR(*filp)) + if (IS_ERR(*filp)) { host_err = PTR_ERR(*filp); - else { + *filp = NULL; + } else { host_err = ima_file_check(*filp, may_flags); if (may_flags & NFSD_MAY_64BIT_COOKIE) diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 5b58941..a4be2e3 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -39,7 +39,6 @@ typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int); /* nfsd/vfs.c */ -int fh_lock_parent(struct svc_fh *, struct dentry *); int nfsd_racache_init(int); void nfsd_racache_shutdown(void); int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, @@ -56,6 +55,8 @@ int nfsd_mountpoint(struct dentry *, struct svc_export *); __be32 nfsd4_set_nfs4_acl(struct svc_rqst *, struct svc_fh *, struct nfs4_acl *); int nfsd4_get_nfs4_acl(struct svc_rqst *, struct dentry *, struct nfs4_acl **); +__be32 nfsd4_set_nfs4_label(struct svc_rqst *, struct svc_fh *, + struct xdr_netobj *); #endif /* CONFIG_NFSD_V4 */ __be32 nfsd_create(struct svc_rqst *, struct svc_fh *, char *name, int len, struct iattr *attrs, @@ -92,17 +93,13 @@ __be32 nfsd_remove(struct svc_rqst *, struct svc_fh *, char *, int); __be32 nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type, char *name, int len); -int nfsd_truncate(struct svc_rqst *, struct svc_fh *, - unsigned long size); __be32 nfsd_readdir(struct svc_rqst *, struct svc_fh *, loff_t *, struct readdir_cd *, filldir_t); __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *, struct kstatfs *, int access); -int nfsd_notify_change(struct inode *, struct iattr *); __be32 nfsd_permission(struct svc_rqst *, struct svc_export *, struct dentry *, int); -int nfsd_sync_dir(struct dentry *dp); #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 3b271d2..b3ed644 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -40,6 +40,7 @@ #include "state.h" #include "nfsd.h" +#define NFSD4_MAX_SEC_LABEL_LEN 2048 #define NFSD4_MAX_TAGLEN 128 #define XDR_LEN(n) (((n) + 3) & ~3) @@ -118,6 +119,7 @@ struct nfsd4_create { struct iattr cr_iattr; /* request */ struct nfsd4_change_info cr_cinfo; /* response */ struct nfs4_acl *cr_acl; + struct xdr_netobj cr_label; }; #define cr_linklen u.link.namelen #define cr_linkname u.link.name @@ -246,6 +248,7 @@ struct nfsd4_open { struct nfs4_file *op_file; /* used during processing */ struct nfs4_ol_stateid *op_stp; /* used during processing */ struct nfs4_acl *op_acl; + struct xdr_netobj op_label; }; #define op_iattr iattr @@ -330,6 +333,7 @@ struct nfsd4_setattr { u32 sa_bmval[3]; /* request */ struct iattr sa_iattr; /* request */ struct nfs4_acl *sa_acl; + struct xdr_netobj sa_label; }; struct nfsd4_setclientid { diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 79736a2..2abf97b 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -1757,7 +1757,7 @@ try_again: goto out; } else if (ret == 1) { clusters_need = wc->w_clen; - ret = ocfs2_refcount_cow(inode, filp, di_bh, + ret = ocfs2_refcount_cow(inode, di_bh, wc->w_cpos, wc->w_clen, UINT_MAX); if (ret) { mlog_errno(ret); diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index eb760d8..30544ce 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -2153,11 +2153,9 @@ int ocfs2_empty_dir(struct inode *inode) { int ret; struct ocfs2_empty_dir_priv priv = { - .ctx.actor = ocfs2_empty_dir_filldir + .ctx.actor = ocfs2_empty_dir_filldir, }; - memset(&priv, 0, sizeof(priv)); - if (ocfs2_dir_indexed(inode)) { ret = ocfs2_empty_dir_dx(inode, &priv); if (ret) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 41000f2..3261d71 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -370,7 +370,7 @@ static int ocfs2_cow_file_pos(struct inode *inode, if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) goto out; - return ocfs2_refcount_cow(inode, NULL, fe_bh, cpos, 1, cpos+1); + return ocfs2_refcount_cow(inode, fe_bh, cpos, 1, cpos+1); out: return status; @@ -899,7 +899,7 @@ static int ocfs2_zero_extend_get_range(struct inode *inode, zero_clusters = last_cpos - zero_cpos; if (needs_cow) { - rc = ocfs2_refcount_cow(inode, NULL, di_bh, zero_cpos, + rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, zero_clusters, UINT_MAX); if (rc) { mlog_errno(rc); @@ -2078,7 +2078,7 @@ static int ocfs2_prepare_inode_for_refcount(struct inode *inode, *meta_level = 1; - ret = ocfs2_refcount_cow(inode, file, di_bh, cpos, clusters, UINT_MAX); + ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX); if (ret) mlog_errno(ret); out: diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 96f9ac2..0a99273 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -537,7 +537,7 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb, extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth); return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks + - ocfs2_quota_trans_credits(sb) + bits_wanted; + ocfs2_quota_trans_credits(sb); } static inline int ocfs2_calc_symlink_credits(struct super_block *sb) diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index f1fc172..452068b 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -69,7 +69,7 @@ static int __ocfs2_move_extent(handle_t *handle, u64 ino = ocfs2_metadata_cache_owner(context->et.et_ci); u64 old_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cpos); - ret = ocfs2_duplicate_clusters_by_page(handle, context->file, cpos, + ret = ocfs2_duplicate_clusters_by_page(handle, inode, cpos, p_cpos, new_p_cpos, len); if (ret) { mlog_errno(ret); diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 998b17e..a70d604 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -49,7 +49,6 @@ struct ocfs2_cow_context { struct inode *inode; - struct file *file; u32 cow_start; u32 cow_len; struct ocfs2_extent_tree data_et; @@ -66,7 +65,7 @@ struct ocfs2_cow_context { u32 *num_clusters, unsigned int *extent_flags); int (*cow_duplicate_clusters)(handle_t *handle, - struct file *file, + struct inode *inode, u32 cpos, u32 old_cluster, u32 new_cluster, u32 new_len); }; @@ -2922,14 +2921,12 @@ static int ocfs2_clear_cow_buffer(handle_t *handle, struct buffer_head *bh) } int ocfs2_duplicate_clusters_by_page(handle_t *handle, - struct file *file, + struct inode *inode, u32 cpos, u32 old_cluster, u32 new_cluster, u32 new_len) { int ret = 0, partial; - struct inode *inode = file_inode(file); - struct ocfs2_caching_info *ci = INODE_CACHE(inode); - struct super_block *sb = ocfs2_metadata_cache_get_super(ci); + struct super_block *sb = inode->i_sb; u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); struct page *page; pgoff_t page_index; @@ -2965,6 +2962,11 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle, to = map_end & (PAGE_CACHE_SIZE - 1); page = find_or_create_page(mapping, page_index, GFP_NOFS); + if (!page) { + ret = -ENOMEM; + mlog_errno(ret); + break; + } /* * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page @@ -2973,13 +2975,6 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle, if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize) BUG_ON(PageDirty(page)); - if (PageReadahead(page)) { - page_cache_async_readahead(mapping, - &file->f_ra, file, - page, page_index, - readahead_pages); - } - if (!PageUptodate(page)) { ret = block_read_full_page(page, ocfs2_get_block); if (ret) { @@ -2999,7 +2994,8 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle, } } - ocfs2_map_and_dirty_page(inode, handle, from, to, + ocfs2_map_and_dirty_page(inode, + handle, from, to, page, 0, &new_block); mark_page_accessed(page); unlock: @@ -3015,12 +3011,11 @@ unlock: } int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, - struct file *file, + struct inode *inode, u32 cpos, u32 old_cluster, u32 new_cluster, u32 new_len) { int ret = 0; - struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; struct ocfs2_caching_info *ci = INODE_CACHE(inode); int i, blocks = ocfs2_clusters_to_blocks(sb, new_len); @@ -3145,7 +3140,7 @@ static int ocfs2_replace_clusters(handle_t *handle, /*If the old clusters is unwritten, no need to duplicate. */ if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) { - ret = context->cow_duplicate_clusters(handle, context->file, + ret = context->cow_duplicate_clusters(handle, context->inode, cpos, old, new, len); if (ret) { mlog_errno(ret); @@ -3423,35 +3418,12 @@ static int ocfs2_replace_cow(struct ocfs2_cow_context *context) return ret; } -static void ocfs2_readahead_for_cow(struct inode *inode, - struct file *file, - u32 start, u32 len) -{ - struct address_space *mapping; - pgoff_t index; - unsigned long num_pages; - int cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits; - - if (!file) - return; - - mapping = file->f_mapping; - num_pages = (len << cs_bits) >> PAGE_CACHE_SHIFT; - if (!num_pages) - num_pages = 1; - - index = ((loff_t)start << cs_bits) >> PAGE_CACHE_SHIFT; - page_cache_sync_readahead(mapping, &file->f_ra, file, - index, num_pages); -} - /* * Starting at cpos, try to CoW write_len clusters. Don't CoW * past max_cpos. This will stop when it runs into a hole or an * unrefcounted extent. */ static int ocfs2_refcount_cow_hunk(struct inode *inode, - struct file *file, struct buffer_head *di_bh, u32 cpos, u32 write_len, u32 max_cpos) { @@ -3480,8 +3452,6 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode, BUG_ON(cow_len == 0); - ocfs2_readahead_for_cow(inode, file, cow_start, cow_len); - context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS); if (!context) { ret = -ENOMEM; @@ -3503,7 +3473,6 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode, context->ref_root_bh = ref_root_bh; context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page; context->get_clusters = ocfs2_di_get_clusters; - context->file = file; ocfs2_init_dinode_extent_tree(&context->data_et, INODE_CACHE(inode), di_bh); @@ -3532,7 +3501,6 @@ out: * clusters between cpos and cpos+write_len are safe to modify. */ int ocfs2_refcount_cow(struct inode *inode, - struct file *file, struct buffer_head *di_bh, u32 cpos, u32 write_len, u32 max_cpos) { @@ -3552,7 +3520,7 @@ int ocfs2_refcount_cow(struct inode *inode, num_clusters = write_len; if (ext_flags & OCFS2_EXT_REFCOUNTED) { - ret = ocfs2_refcount_cow_hunk(inode, file, di_bh, cpos, + ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos, num_clusters, max_cpos); if (ret) { mlog_errno(ret); diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h index 7754608..6422bbc 100644 --- a/fs/ocfs2/refcounttree.h +++ b/fs/ocfs2/refcounttree.h @@ -53,7 +53,7 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode, int *credits, int *ref_blocks); int ocfs2_refcount_cow(struct inode *inode, - struct file *filep, struct buffer_head *di_bh, + struct buffer_head *di_bh, u32 cpos, u32 write_len, u32 max_cpos); typedef int (ocfs2_post_refcount_func)(struct inode *inode, @@ -85,11 +85,11 @@ int ocfs2_refcount_cow_xattr(struct inode *inode, u32 cpos, u32 write_len, struct ocfs2_post_refcount *post); int ocfs2_duplicate_clusters_by_page(handle_t *handle, - struct file *file, + struct inode *inode, u32 cpos, u32 old_cluster, u32 new_cluster, u32 new_len); int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, - struct file *file, + struct inode *inode, u32 cpos, u32 old_cluster, u32 new_cluster, u32 new_len); int ocfs2_cow_sync_writeback(struct super_block *sb, @@ -823,7 +823,7 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o int lookup_flags = 0; int acc_mode; - if (flags & O_CREAT) + if (flags & (O_CREAT | __O_TMPFILE)) op->mode = (mode & S_IALLUGO) | S_IFREG; else op->mode = 0; @@ -840,10 +840,12 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o if (flags & __O_SYNC) flags |= O_DSYNC; - if (flags & O_TMPFILE) { - if (!(flags & O_CREAT)) + if (flags & __O_TMPFILE) { + if ((flags & O_TMPFILE_MASK) != O_TMPFILE) return -EINVAL; acc_mode = MAY_OPEN | ACC_MODE(flags); + if (!(acc_mode & MAY_WRITE)) + return -EINVAL; } else if (flags & O_PATH) { /* * If we have O_PATH in the open flag. Then we diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index dbf61f6..107d026 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -730,8 +730,16 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma, * of how soft-dirty works. */ pte_t ptent = *pte; - ptent = pte_wrprotect(ptent); - ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY); + + if (pte_present(ptent)) { + ptent = pte_wrprotect(ptent); + ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY); + } else if (is_swap_pte(ptent)) { + ptent = pte_swp_clear_soft_dirty(ptent); + } else if (pte_file(ptent)) { + ptent = pte_file_clear_soft_dirty(ptent); + } + set_pte_at(vma->vm_mm, addr, pte, ptent); #endif } @@ -752,14 +760,15 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); for (; addr != end; pte++, addr += PAGE_SIZE) { ptent = *pte; - if (!pte_present(ptent)) - continue; if (cp->type == CLEAR_REFS_SOFT_DIRTY) { clear_soft_dirty(vma, addr, pte); continue; } + if (!pte_present(ptent)) + continue; + page = vm_normal_page(vma, addr, ptent); if (!page) continue; @@ -859,7 +868,7 @@ typedef struct { } pagemap_entry_t; struct pagemapread { - int pos, len; + int pos, len; /* units: PM_ENTRY_BYTES, not bytes */ pagemap_entry_t *buffer; bool v2; }; @@ -867,7 +876,7 @@ struct pagemapread { #define PAGEMAP_WALK_SIZE (PMD_SIZE) #define PAGEMAP_WALK_MASK (PMD_MASK) -#define PM_ENTRY_BYTES sizeof(u64) +#define PM_ENTRY_BYTES sizeof(pagemap_entry_t) #define PM_STATUS_BITS 3 #define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) #define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET) @@ -930,8 +939,10 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, flags = PM_PRESENT; page = vm_normal_page(vma, addr, pte); } else if (is_swap_pte(pte)) { - swp_entry_t entry = pte_to_swp_entry(pte); - + swp_entry_t entry; + if (pte_swp_soft_dirty(pte)) + flags2 |= __PM_SOFT_DIRTY; + entry = pte_to_swp_entry(pte); frame = swp_type(entry) | (swp_offset(entry) << MAX_SWAPFILES_SHIFT); flags = PM_SWAP; @@ -1116,8 +1127,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, goto out_task; pm.v2 = soft_dirty_cleared; - pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); - pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); + pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); + pm.buffer = kmalloc(pm.len * PM_ENTRY_BYTES, GFP_TEMPORARY); ret = -ENOMEM; if (!pm.buffer) goto out_task; diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 2850317..a1a16eb 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -223,7 +223,7 @@ static inline char *alloc_elfnotes_buf(size_t notes_sz) * regions in the 1st kernel pointed to by PT_LOAD entries) into * virtually contiguous user-space in ELF layout. */ -#ifdef CONFIG_MMU +#if defined(CONFIG_MMU) && !defined(CONFIG_S390) static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) { size_t size = vma->vm_end - vma->vm_start; diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c index 33532f7..a958444 100644 --- a/fs/reiserfs/procfs.c +++ b/fs/reiserfs/procfs.c @@ -19,12 +19,13 @@ /* * LOCKING: * - * We rely on new Alexander Viro's super-block locking. + * These guys are evicted from procfs as the very first step in ->kill_sb(). * */ -static int show_version(struct seq_file *m, struct super_block *sb) +static int show_version(struct seq_file *m, void *unused) { + struct super_block *sb = m->private; char *format; if (REISERFS_SB(sb)->s_properties & (1 << REISERFS_3_6)) { @@ -66,8 +67,9 @@ static int show_version(struct seq_file *m, struct super_block *sb) #define DJP( x ) le32_to_cpu( jp -> x ) #define JF( x ) ( r -> s_journal -> x ) -static int show_super(struct seq_file *m, struct super_block *sb) +static int show_super(struct seq_file *m, void *unused) { + struct super_block *sb = m->private; struct reiserfs_sb_info *r = REISERFS_SB(sb); seq_printf(m, "state: \t%s\n" @@ -128,8 +130,9 @@ static int show_super(struct seq_file *m, struct super_block *sb) return 0; } -static int show_per_level(struct seq_file *m, struct super_block *sb) +static int show_per_level(struct seq_file *m, void *unused) { + struct super_block *sb = m->private; struct reiserfs_sb_info *r = REISERFS_SB(sb); int level; @@ -186,8 +189,9 @@ static int show_per_level(struct seq_file *m, struct super_block *sb) return 0; } -static int show_bitmap(struct seq_file *m, struct super_block *sb) +static int show_bitmap(struct seq_file *m, void *unused) { + struct super_block *sb = m->private; struct reiserfs_sb_info *r = REISERFS_SB(sb); seq_printf(m, "free_block: %lu\n" @@ -218,8 +222,9 @@ static int show_bitmap(struct seq_file *m, struct super_block *sb) return 0; } -static int show_on_disk_super(struct seq_file *m, struct super_block *sb) +static int show_on_disk_super(struct seq_file *m, void *unused) { + struct super_block *sb = m->private; struct reiserfs_sb_info *sb_info = REISERFS_SB(sb); struct reiserfs_super_block *rs = sb_info->s_rs; int hash_code = DFL(s_hash_function_code); @@ -261,8 +266,9 @@ static int show_on_disk_super(struct seq_file *m, struct super_block *sb) return 0; } -static int show_oidmap(struct seq_file *m, struct super_block *sb) +static int show_oidmap(struct seq_file *m, void *unused) { + struct super_block *sb = m->private; struct reiserfs_sb_info *sb_info = REISERFS_SB(sb); struct reiserfs_super_block *rs = sb_info->s_rs; unsigned int mapsize = le16_to_cpu(rs->s_v1.s_oid_cursize); @@ -291,8 +297,9 @@ static int show_oidmap(struct seq_file *m, struct super_block *sb) return 0; } -static int show_journal(struct seq_file *m, struct super_block *sb) +static int show_journal(struct seq_file *m, void *unused) { + struct super_block *sb = m->private; struct reiserfs_sb_info *r = REISERFS_SB(sb); struct reiserfs_super_block *rs = r->s_rs; struct journal_params *jp = &rs->s_v1.s_journal; @@ -383,92 +390,24 @@ static int show_journal(struct seq_file *m, struct super_block *sb) return 0; } -/* iterator */ -static int test_sb(struct super_block *sb, void *data) -{ - return data == sb; -} - -static int set_sb(struct super_block *sb, void *data) -{ - return -ENOENT; -} - -struct reiserfs_seq_private { - struct super_block *sb; - int (*show) (struct seq_file *, struct super_block *); -}; - -static void *r_start(struct seq_file *m, loff_t * pos) -{ - struct reiserfs_seq_private *priv = m->private; - loff_t l = *pos; - - if (l) - return NULL; - - if (IS_ERR(sget(&reiserfs_fs_type, test_sb, set_sb, 0, priv->sb))) - return NULL; - - up_write(&priv->sb->s_umount); - return priv->sb; -} - -static void *r_next(struct seq_file *m, void *v, loff_t * pos) -{ - ++*pos; - if (v) - deactivate_super(v); - return NULL; -} - -static void r_stop(struct seq_file *m, void *v) -{ - if (v) - deactivate_super(v); -} - -static int r_show(struct seq_file *m, void *v) -{ - struct reiserfs_seq_private *priv = m->private; - return priv->show(m, v); -} - -static const struct seq_operations r_ops = { - .start = r_start, - .next = r_next, - .stop = r_stop, - .show = r_show, -}; - static int r_open(struct inode *inode, struct file *file) { - struct reiserfs_seq_private *priv; - int ret = seq_open_private(file, &r_ops, - sizeof(struct reiserfs_seq_private)); - - if (!ret) { - struct seq_file *m = file->private_data; - priv = m->private; - priv->sb = proc_get_parent_data(inode); - priv->show = PDE_DATA(inode); - } - return ret; + return single_open(file, PDE_DATA(inode), + proc_get_parent_data(inode)); } static const struct file_operations r_file_operations = { .open = r_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, - .owner = THIS_MODULE, + .release = single_release, }; static struct proc_dir_entry *proc_info_root = NULL; static const char proc_info_root_name[] = "fs/reiserfs"; static void add_file(struct super_block *sb, char *name, - int (*func) (struct seq_file *, struct super_block *)) + int (*func) (struct seq_file *, void *)) { proc_create_data(name, 0, REISERFS_SB(sb)->procdir, &r_file_operations, func); diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index f8a23c3..e2e202a 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -499,6 +499,7 @@ int remove_save_link(struct inode *inode, int truncate) static void reiserfs_kill_sb(struct super_block *s) { if (REISERFS_SB(s)) { + reiserfs_proc_info_done(s); /* * Force any pending inode evictions to occur now. Any * inodes to be removed that have extended attributes @@ -554,8 +555,6 @@ static void reiserfs_put_super(struct super_block *s) REISERFS_SB(s)->reserved_blocks); } - reiserfs_proc_info_done(s); - reiserfs_write_unlock(s); mutex_destroy(&REISERFS_SB(s)->lock); kfree(s->s_fs_info); diff --git a/fs/select.c b/fs/select.c index 6b14dc7..35d4adc7 100644 --- a/fs/select.c +++ b/fs/select.c @@ -28,6 +28,7 @@ #include <linux/hrtimer.h> #include <linux/sched/rt.h> #include <linux/freezer.h> +#include <net/busy_poll.h> #include <asm/uaccess.h> @@ -386,9 +387,10 @@ get_max: #define POLLEX_SET (POLLPRI) static inline void wait_key_set(poll_table *wait, unsigned long in, - unsigned long out, unsigned long bit) + unsigned long out, unsigned long bit, + unsigned int ll_flag) { - wait->_key = POLLEX_SET; + wait->_key = POLLEX_SET | ll_flag; if (in & bit) wait->_key |= POLLIN_SET; if (out & bit) @@ -402,6 +404,8 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) poll_table *wait; int retval, i, timed_out = 0; unsigned long slack = 0; + unsigned int busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0; + unsigned long busy_end = 0; rcu_read_lock(); retval = max_select_fd(n, fds); @@ -424,6 +428,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) retval = 0; for (;;) { unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; + bool can_busy_loop = false; inp = fds->in; outp = fds->out; exp = fds->ex; rinp = fds->res_in; routp = fds->res_out; rexp = fds->res_ex; @@ -451,7 +456,8 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) f_op = f.file->f_op; mask = DEFAULT_POLLMASK; if (f_op && f_op->poll) { - wait_key_set(wait, in, out, bit); + wait_key_set(wait, in, out, + bit, busy_flag); mask = (*f_op->poll)(f.file, wait); } fdput(f); @@ -470,6 +476,18 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) retval++; wait->_qproc = NULL; } + /* got something, stop busy polling */ + if (retval) { + can_busy_loop = false; + busy_flag = 0; + + /* + * only remember a returned + * POLL_BUSY_LOOP if we asked for it + */ + } else if (busy_flag & mask) + can_busy_loop = true; + } } if (res_in) @@ -488,6 +506,17 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) break; } + /* only if found POLL_BUSY_LOOP sockets && not out of time */ + if (can_busy_loop && !need_resched()) { + if (!busy_end) { + busy_end = busy_loop_end_time(); + continue; + } + if (!busy_loop_timeout(busy_end)) + continue; + } + busy_flag = 0; + /* * If this is the first loop and we have a timeout * given, then we convert to ktime_t and set the to @@ -719,7 +748,9 @@ struct poll_list { * pwait poll_table will be used by the fd-provided poll handler for waiting, * if pwait->_qproc is non-NULL. */ -static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) +static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait, + bool *can_busy_poll, + unsigned int busy_flag) { unsigned int mask; int fd; @@ -733,7 +764,10 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) mask = DEFAULT_POLLMASK; if (f.file->f_op && f.file->f_op->poll) { pwait->_key = pollfd->events|POLLERR|POLLHUP; + pwait->_key |= busy_flag; mask = f.file->f_op->poll(f.file, pwait); + if (mask & busy_flag) + *can_busy_poll = true; } /* Mask out unneeded events. */ mask &= pollfd->events | POLLERR | POLLHUP; @@ -752,6 +786,8 @@ static int do_poll(unsigned int nfds, struct poll_list *list, ktime_t expire, *to = NULL; int timed_out = 0, count = 0; unsigned long slack = 0; + unsigned int busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0; + unsigned long busy_end = 0; /* Optimise the no-wait case */ if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { @@ -764,6 +800,7 @@ static int do_poll(unsigned int nfds, struct poll_list *list, for (;;) { struct poll_list *walk; + bool can_busy_loop = false; for (walk = list; walk != NULL; walk = walk->next) { struct pollfd * pfd, * pfd_end; @@ -778,9 +815,13 @@ static int do_poll(unsigned int nfds, struct poll_list *list, * this. They'll get immediately deregistered * when we break out and return. */ - if (do_pollfd(pfd, pt)) { + if (do_pollfd(pfd, pt, &can_busy_loop, + busy_flag)) { count++; pt->_qproc = NULL; + /* found something, stop busy polling */ + busy_flag = 0; + can_busy_loop = false; } } } @@ -797,6 +838,17 @@ static int do_poll(unsigned int nfds, struct poll_list *list, if (count || timed_out) break; + /* only if found POLL_BUSY_LOOP sockets && not out of time */ + if (can_busy_loop && !need_resched()) { + if (!busy_end) { + busy_end = busy_loop_end_time(); + continue; + } + if (!busy_loop_timeout(busy_end)) + continue; + } + busy_flag = 0; + /* * If this is the first loop and we have a timeout * given, then we convert to ktime_t and set the to @@ -336,19 +336,19 @@ EXPORT_SYMBOL(deactivate_super); * and want to turn it into a full-blown active reference. grab_super() * is called with sb_lock held and drops it. Returns 1 in case of * success, 0 if we had failed (superblock contents was already dead or - * dying when grab_super() had been called). + * dying when grab_super() had been called). Note that this is only + * called for superblocks not in rundown mode (== ones still on ->fs_supers + * of their type), so increment of ->s_count is OK here. */ static int grab_super(struct super_block *s) __releases(sb_lock) { - if (atomic_inc_not_zero(&s->s_active)) { - spin_unlock(&sb_lock); - return 1; - } - /* it's going away */ s->s_count++; spin_unlock(&sb_lock); - /* wait for it to die */ down_write(&s->s_umount); + if ((s->s_flags & MS_BORN) && atomic_inc_not_zero(&s->s_active)) { + put_super(s); + return 1; + } up_write(&s->s_umount); put_super(s); return 0; @@ -463,11 +463,6 @@ retry: destroy_super(s); s = NULL; } - down_write(&old->s_umount); - if (unlikely(!(old->s_flags & MS_BORN))) { - deactivate_locked_super(old); - goto retry; - } return old; } } @@ -660,10 +655,10 @@ restart: if (hlist_unhashed(&sb->s_instances)) continue; if (sb->s_bdev == bdev) { - if (grab_super(sb)) /* drops sb_lock */ - return sb; - else + if (!grab_super(sb)) goto restart; + up_write(&sb->s_umount); + return sb; } } spin_unlock(&sb_lock); diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index aec3d5c..09a1a25 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -20,38 +20,64 @@ static void remove_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, const struct attribute_group *grp) { struct attribute *const* attr; - int i; + struct bin_attribute *const* bin_attr; - for (i = 0, attr = grp->attrs; *attr; i++, attr++) - sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name); + if (grp->attrs) + for (attr = grp->attrs; *attr; attr++) + sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name); + if (grp->bin_attrs) + for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) + sysfs_remove_bin_file(kobj, *bin_attr); } static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, const struct attribute_group *grp, int update) { struct attribute *const* attr; + struct bin_attribute *const* bin_attr; int error = 0, i; - for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) { - umode_t mode = 0; + if (grp->attrs) { + for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) { + umode_t mode = 0; + + /* + * In update mode, we're changing the permissions or + * visibility. Do this by first removing then + * re-adding (if required) the file. + */ + if (update) + sysfs_hash_and_remove(dir_sd, NULL, + (*attr)->name); + if (grp->is_visible) { + mode = grp->is_visible(kobj, *attr, i); + if (!mode) + continue; + } + error = sysfs_add_file_mode(dir_sd, *attr, + SYSFS_KOBJ_ATTR, + (*attr)->mode | mode); + if (unlikely(error)) + break; + } + if (error) { + remove_files(dir_sd, kobj, grp); + goto exit; + } + } - /* in update mode, we're changing the permissions or - * visibility. Do this by first removing then - * re-adding (if required) the file */ - if (update) - sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name); - if (grp->is_visible) { - mode = grp->is_visible(kobj, *attr, i); - if (!mode) - continue; + if (grp->bin_attrs) { + for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) { + if (update) + sysfs_remove_bin_file(kobj, *bin_attr); + error = sysfs_create_bin_file(kobj, *bin_attr); + if (error) + break; } - error = sysfs_add_file_mode(dir_sd, *attr, SYSFS_KOBJ_ATTR, - (*attr)->mode | mode); - if (unlikely(error)) - break; + if (error) + remove_files(dir_sd, kobj, grp); } - if (error) - remove_files(dir_sd, kobj, grp); +exit: return error; } @@ -67,8 +93,8 @@ static int internal_create_group(struct kobject *kobj, int update, /* Updates may happen before the object has been instantiated */ if (unlikely(update && !kobj->sd)) return -EINVAL; - if (!grp->attrs) { - WARN(1, "sysfs: attrs not set by subsystem for group: %s/%s\n", + if (!grp->attrs && !grp->bin_attrs) { + WARN(1, "sysfs: (bin_)attrs not set by subsystem for group: %s/%s\n", kobj->name, grp->name ? "" : grp->name); return -EINVAL; } diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index 31d3cd1..b800fbc 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c @@ -690,6 +690,8 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args) sf = (xfs_attr_shortform_t *)tmpbuffer; xfs_idata_realloc(dp, -size, XFS_ATTR_FORK); + xfs_bmap_local_to_extents_empty(dp, XFS_ATTR_FORK); + bp = NULL; error = xfs_da_grow_inode(args, &blkno); if (error) { diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 8904284..05c698c 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -1161,6 +1161,24 @@ xfs_bmap_extents_to_btree( * since the file data needs to get logged so things will stay consistent. * (The bmap-level manipulations are ok, though). */ +void +xfs_bmap_local_to_extents_empty( + struct xfs_inode *ip, + int whichfork) +{ + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + + ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); + ASSERT(ifp->if_bytes == 0); + ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0); + + xfs_bmap_forkoff_reset(ip->i_mount, ip, whichfork); + ifp->if_flags &= ~XFS_IFINLINE; + ifp->if_flags |= XFS_IFEXTENTS; + XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); +} + + STATIC int /* error */ xfs_bmap_local_to_extents( xfs_trans_t *tp, /* transaction pointer */ @@ -1174,9 +1192,12 @@ xfs_bmap_local_to_extents( struct xfs_inode *ip, struct xfs_ifork *ifp)) { - int error; /* error return value */ + int error = 0; int flags; /* logging flags returned */ xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_alloc_arg_t args; /* allocation arguments */ + xfs_buf_t *bp; /* buffer for extent block */ + xfs_bmbt_rec_host_t *ep; /* extent record pointer */ /* * We don't want to deal with the case of keeping inode data inline yet. @@ -1185,68 +1206,65 @@ xfs_bmap_local_to_extents( ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK)); ifp = XFS_IFORK_PTR(ip, whichfork); ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); + + if (!ifp->if_bytes) { + xfs_bmap_local_to_extents_empty(ip, whichfork); + flags = XFS_ILOG_CORE; + goto done; + } + flags = 0; error = 0; - if (ifp->if_bytes) { - xfs_alloc_arg_t args; /* allocation arguments */ - xfs_buf_t *bp; /* buffer for extent block */ - xfs_bmbt_rec_host_t *ep;/* extent record pointer */ - - ASSERT((ifp->if_flags & - (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE); - memset(&args, 0, sizeof(args)); - args.tp = tp; - args.mp = ip->i_mount; - args.firstblock = *firstblock; - /* - * Allocate a block. We know we need only one, since the - * file currently fits in an inode. - */ - if (*firstblock == NULLFSBLOCK) { - args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino); - args.type = XFS_ALLOCTYPE_START_BNO; - } else { - args.fsbno = *firstblock; - args.type = XFS_ALLOCTYPE_NEAR_BNO; - } - args.total = total; - args.minlen = args.maxlen = args.prod = 1; - error = xfs_alloc_vextent(&args); - if (error) - goto done; - - /* Can't fail, the space was reserved. */ - ASSERT(args.fsbno != NULLFSBLOCK); - ASSERT(args.len == 1); - *firstblock = args.fsbno; - bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); - - /* initialise the block and copy the data */ - init_fn(tp, bp, ip, ifp); - - /* account for the change in fork size and log everything */ - xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); - xfs_bmap_forkoff_reset(args.mp, ip, whichfork); - xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); - xfs_iext_add(ifp, 0, 1); - ep = xfs_iext_get_ext(ifp, 0); - xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM); - trace_xfs_bmap_post_update(ip, 0, - whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0, - _THIS_IP_); - XFS_IFORK_NEXT_SET(ip, whichfork, 1); - ip->i_d.di_nblocks = 1; - xfs_trans_mod_dquot_byino(tp, ip, - XFS_TRANS_DQ_BCOUNT, 1L); - flags |= xfs_ilog_fext(whichfork); + ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == + XFS_IFINLINE); + memset(&args, 0, sizeof(args)); + args.tp = tp; + args.mp = ip->i_mount; + args.firstblock = *firstblock; + /* + * Allocate a block. We know we need only one, since the + * file currently fits in an inode. + */ + if (*firstblock == NULLFSBLOCK) { + args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino); + args.type = XFS_ALLOCTYPE_START_BNO; } else { - ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0); - xfs_bmap_forkoff_reset(ip->i_mount, ip, whichfork); + args.fsbno = *firstblock; + args.type = XFS_ALLOCTYPE_NEAR_BNO; } - ifp->if_flags &= ~XFS_IFINLINE; - ifp->if_flags |= XFS_IFEXTENTS; - XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); + args.total = total; + args.minlen = args.maxlen = args.prod = 1; + error = xfs_alloc_vextent(&args); + if (error) + goto done; + + /* Can't fail, the space was reserved. */ + ASSERT(args.fsbno != NULLFSBLOCK); + ASSERT(args.len == 1); + *firstblock = args.fsbno; + bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); + + /* initialise the block and copy the data */ + init_fn(tp, bp, ip, ifp); + + /* account for the change in fork size and log everything */ + xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); + xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); + xfs_bmap_local_to_extents_empty(ip, whichfork); flags |= XFS_ILOG_CORE; + + xfs_iext_add(ifp, 0, 1); + ep = xfs_iext_get_ext(ifp, 0); + xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM); + trace_xfs_bmap_post_update(ip, 0, + whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0, + _THIS_IP_); + XFS_IFORK_NEXT_SET(ip, whichfork, 1); + ip->i_d.di_nblocks = 1; + xfs_trans_mod_dquot_byino(tp, ip, + XFS_TRANS_DQ_BCOUNT, 1L); + flags |= xfs_ilog_fext(whichfork); + done: *logflagsp = flags; return error; @@ -1323,25 +1341,6 @@ xfs_bmap_add_attrfork_extents( } /* - * Block initialisation function for local to extent format conversion. - * - * This shouldn't actually be called by anyone, so make sure debug kernels cause - * a noticable failure. - */ -STATIC void -xfs_bmap_local_to_extents_init_fn( - struct xfs_trans *tp, - struct xfs_buf *bp, - struct xfs_inode *ip, - struct xfs_ifork *ifp) -{ - ASSERT(0); - bp->b_ops = &xfs_bmbt_buf_ops; - memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); - xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF); -} - -/* * Called from xfs_bmap_add_attrfork to handle local format files. Each * different data fork content type needs a different callout to do the * conversion. Some are basic and only require special block initialisation @@ -1381,9 +1380,9 @@ xfs_bmap_add_attrfork_local( flags, XFS_DATA_FORK, xfs_symlink_local_to_remote); - return xfs_bmap_local_to_extents(tp, ip, firstblock, 1, flags, - XFS_DATA_FORK, - xfs_bmap_local_to_extents_init_fn); + /* should only be called for types that support local format data */ + ASSERT(0); + return EFSCORRUPTED; } /* @@ -4907,20 +4906,19 @@ xfs_bmapi_write( orig_mval = mval; orig_nmap = *nmap; #endif + whichfork = (flags & XFS_BMAPI_ATTRFORK) ? + XFS_ATTR_FORK : XFS_DATA_FORK; ASSERT(*nmap >= 1); ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); ASSERT(!(flags & XFS_BMAPI_IGSTATE)); ASSERT(tp != NULL); ASSERT(len > 0); - - whichfork = (flags & XFS_BMAPI_ATTRFORK) ? - XFS_ATTR_FORK : XFS_DATA_FORK; + ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); if (unlikely(XFS_TEST_ERROR( (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE && - XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL), + XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp); return XFS_ERROR(EFSCORRUPTED); @@ -4933,37 +4931,6 @@ xfs_bmapi_write( XFS_STATS_INC(xs_blk_mapw); - if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) { - /* - * XXX (dgc): This assumes we are only called for inodes that - * contain content neutral data in local format. Anything that - * contains caller-specific data in local format that needs - * transformation to move to a block format needs to do the - * conversion to extent format itself. - * - * Directory data forks and attribute forks handle this - * themselves, but with the addition of metadata verifiers every - * data fork in local format now contains caller specific data - * and as such conversion through this function is likely to be - * broken. - * - * The only likely user of this branch is for remote symlinks, - * but we cannot overwrite the data fork contents of the symlink - * (EEXIST occurs higher up the stack) and so it will never go - * from local format to extent format here. Hence I don't think - * this branch is ever executed intentionally and we should - * consider removing it and asserting that xfs_bmapi_write() - * cannot be called directly on local format forks. i.e. callers - * are completely responsible for local to extent format - * conversion, not xfs_bmapi_write(). - */ - error = xfs_bmap_local_to_extents(tp, ip, firstblock, total, - &bma.logflags, whichfork, - xfs_bmap_local_to_extents_init_fn); - if (error) - goto error0; - } - if (*firstblock == NULLFSBLOCK) { if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE) bma.minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1; diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 5f469c3..1cf1292 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h @@ -172,6 +172,7 @@ void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, #endif int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); +void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork); void xfs_bmap_add_free(xfs_fsblock_t bno, xfs_filblks_t len, struct xfs_bmap_free *flist, struct xfs_mount *mp); void xfs_bmap_cancel(struct xfs_bmap_free *flist); diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index f7a0e95..e5869b5 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h @@ -39,6 +39,9 @@ typedef struct xfs_timestamp { * There is a very similar struct icdinode in xfs_inode which matches the * layout of the first 96 bytes of this structure, but is kept in native * format instead of big endian. + * + * Note: di_flushiter is only used by v1/2 inodes - it's effectively a zeroed + * padding field for v3 inodes. */ typedef struct xfs_dinode { __be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */ @@ -132,9 +135,6 @@ typedef enum xfs_dinode_fmt { #define XFS_LITINO(mp, version) \ ((int)(((mp)->m_sb.sb_inodesize) - xfs_dinode_size(version))) -#define XFS_BROOT_SIZE_ADJ(ip) \ - (XFS_BMBT_BLOCK_LEN((ip)->i_mount) - sizeof(xfs_bmdr_block_t)) - /* * Inode data & attribute fork sizes, per inode. */ diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c index 09aea02..5e7fbd7 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/xfs_dir2_block.c @@ -29,6 +29,7 @@ #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_inode_item.h" +#include "xfs_bmap.h" #include "xfs_buf_item.h" #include "xfs_dir2.h" #include "xfs_dir2_format.h" @@ -1164,13 +1165,15 @@ xfs_dir2_sf_to_block( __be16 *tagp; /* end of data entry */ xfs_trans_t *tp; /* transaction pointer */ struct xfs_name name; + struct xfs_ifork *ifp; trace_xfs_dir2_sf_to_block(args); dp = args->dp; tp = args->trans; mp = dp->i_mount; - ASSERT(dp->i_df.if_flags & XFS_IFINLINE); + ifp = XFS_IFORK_PTR(dp, XFS_DATA_FORK); + ASSERT(ifp->if_flags & XFS_IFINLINE); /* * Bomb out if the shortform directory is way too short. */ @@ -1179,22 +1182,23 @@ xfs_dir2_sf_to_block( return XFS_ERROR(EIO); } - oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + oldsfp = (xfs_dir2_sf_hdr_t *)ifp->if_u1.if_data; - ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); - ASSERT(dp->i_df.if_u1.if_data != NULL); + ASSERT(ifp->if_bytes == dp->i_d.di_size); + ASSERT(ifp->if_u1.if_data != NULL); ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(oldsfp->i8count)); + ASSERT(dp->i_d.di_nextents == 0); /* * Copy the directory into a temporary buffer. * Then pitch the incore inode data so we can make extents. */ - sfp = kmem_alloc(dp->i_df.if_bytes, KM_SLEEP); - memcpy(sfp, oldsfp, dp->i_df.if_bytes); + sfp = kmem_alloc(ifp->if_bytes, KM_SLEEP); + memcpy(sfp, oldsfp, ifp->if_bytes); - xfs_idata_realloc(dp, -dp->i_df.if_bytes, XFS_DATA_FORK); + xfs_idata_realloc(dp, -ifp->if_bytes, XFS_DATA_FORK); + xfs_bmap_local_to_extents_empty(dp, XFS_DATA_FORK); dp->i_d.di_size = 0; - xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); /* * Add block 0 to the inode. diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index f01012d..0adf27e 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -936,6 +936,7 @@ xfs_qm_dqput_final( { struct xfs_quotainfo *qi = dqp->q_mount->m_quotainfo; struct xfs_dquot *gdqp; + struct xfs_dquot *pdqp; trace_xfs_dqput_free(dqp); @@ -949,21 +950,29 @@ xfs_qm_dqput_final( /* * If we just added a udquot to the freelist, then we want to release - * the gdquot reference that it (probably) has. Otherwise it'll keep - * the gdquot from getting reclaimed. + * the gdquot/pdquot reference that it (probably) has. Otherwise it'll + * keep the gdquot/pdquot from getting reclaimed. */ gdqp = dqp->q_gdquot; if (gdqp) { xfs_dqlock(gdqp); dqp->q_gdquot = NULL; } + + pdqp = dqp->q_pdquot; + if (pdqp) { + xfs_dqlock(pdqp); + dqp->q_pdquot = NULL; + } xfs_dqunlock(dqp); /* - * If we had a group quota hint, release it now. + * If we had a group/project quota hint, release it now. */ if (gdqp) xfs_qm_dqput(gdqp); + if (pdqp) + xfs_qm_dqput(pdqp); } /* diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h index b596626..55abbca 100644 --- a/fs/xfs/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h @@ -53,6 +53,7 @@ typedef struct xfs_dquot { xfs_fileoff_t q_fileoffset; /* offset in quotas file */ struct xfs_dquot*q_gdquot; /* group dquot, hint only */ + struct xfs_dquot*q_pdquot; /* project dquot, hint only */ xfs_disk_dquot_t q_core; /* actual usage & quotas */ xfs_dq_logitem_t q_logitem; /* dquot log item */ xfs_qcnt_t q_res_bcount; /* total regular nblks used+reserved */ @@ -118,8 +119,9 @@ static inline int xfs_this_quota_on(struct xfs_mount *mp, int type) case XFS_DQ_USER: return XFS_IS_UQUOTA_ON(mp); case XFS_DQ_GROUP: + return XFS_IS_GQUOTA_ON(mp); case XFS_DQ_PROJ: - return XFS_IS_OQUOTA_ON(mp); + return XFS_IS_PQUOTA_ON(mp); default: return 0; } @@ -131,8 +133,9 @@ static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type) case XFS_DQ_USER: return ip->i_udquot; case XFS_DQ_GROUP: - case XFS_DQ_PROJ: return ip->i_gdquot; + case XFS_DQ_PROJ: + return ip->i_pdquot; default: return NULL; } diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 9560dc1f..3f90e1c 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -337,6 +337,7 @@ xfs_iget_cache_miss( iflags |= XFS_IDONTCACHE; ip->i_udquot = NULL; ip->i_gdquot = NULL; + ip->i_pdquot = NULL; xfs_iflags_set(ip, iflags); /* insert the new inode */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 9ecfe1e..bb262c2 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -896,7 +896,6 @@ xfs_dinode_to_disk( to->di_projid_lo = cpu_to_be16(from->di_projid_lo); to->di_projid_hi = cpu_to_be16(from->di_projid_hi); memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); - to->di_flushiter = cpu_to_be16(from->di_flushiter); to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec); to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec); @@ -924,6 +923,9 @@ xfs_dinode_to_disk( to->di_lsn = cpu_to_be64(from->di_lsn); memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); uuid_copy(&to->di_uuid, &from->di_uuid); + to->di_flushiter = 0; + } else { + to->di_flushiter = cpu_to_be16(from->di_flushiter); } } @@ -1029,10 +1031,14 @@ xfs_dinode_calc_crc( /* * Read the disk inode attributes into the in-core inode structure. * - * If we are initialising a new inode and we are not utilising the - * XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new inode core - * with a random generation number. If we are keeping inodes around, we need to - * read the inode cluster to get the existing generation number off disk. + * For version 5 superblocks, if we are initialising a new inode and we are not + * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new + * inode core with a random generation number. If we are keeping inodes around, + * we need to read the inode cluster to get the existing generation number off + * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode + * format) then log recovery is dependent on the di_flushiter field being + * initialised from the current on-disk value and hence we must also read the + * inode off disk. */ int xfs_iread( @@ -1054,6 +1060,7 @@ xfs_iread( /* shortcut IO on inode allocation if possible */ if ((iget_flags & XFS_IGET_CREATE) && + xfs_sb_version_hascrc(&mp->m_sb) && !(mp->m_flags & XFS_MOUNT_IKEEP)) { /* initialise the on-disk inode core */ memset(&ip->i_d, 0, sizeof(ip->i_d)); @@ -2156,8 +2163,8 @@ xfs_iroot_realloc( np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, (int)new_size); ifp->if_broot_bytes = (int)new_size; - ASSERT(ifp->if_broot_bytes <= - XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ(ip)); + ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <= + XFS_IFORK_SIZE(ip, whichfork)); memmove(np, op, cur_max * (uint)sizeof(xfs_dfsbno_t)); return; } @@ -2210,8 +2217,9 @@ xfs_iroot_realloc( kmem_free(ifp->if_broot); ifp->if_broot = new_broot; ifp->if_broot_bytes = (int)new_size; - ASSERT(ifp->if_broot_bytes <= - XFS_IFORK_SIZE(ip, whichfork) + XFS_BROOT_SIZE_ADJ(ip)); + if (ifp->if_broot) + ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <= + XFS_IFORK_SIZE(ip, whichfork)); return; } @@ -2522,9 +2530,8 @@ xfs_iflush_fork( if ((iip->ili_fields & brootflag[whichfork]) && (ifp->if_broot_bytes > 0)) { ASSERT(ifp->if_broot != NULL); - ASSERT(ifp->if_broot_bytes <= - (XFS_IFORK_SIZE(ip, whichfork) + - XFS_BROOT_SIZE_ADJ(ip))); + ASSERT(XFS_BMAP_BMDR_SPACE(ifp->if_broot) <= + XFS_IFORK_SIZE(ip, whichfork)); xfs_bmbt_to_bmdr(mp, ifp->if_broot, ifp->if_broot_bytes, (xfs_bmdr_block_t *)cp, XFS_DFORK_SIZE(dip, mp, whichfork)); @@ -2882,12 +2889,18 @@ xfs_iflush_int( __func__, ip->i_ino, ip->i_d.di_forkoff, ip); goto corrupt_out; } + /* - * bump the flush iteration count, used to detect flushes which - * postdate a log record during recovery. This is redundant as we now - * log every change and hence this can't happen. Still, it doesn't hurt. + * Inode item log recovery for v1/v2 inodes are dependent on the + * di_flushiter count for correct sequencing. We bump the flush + * iteration count so we can detect flushes which postdate a log record + * during recovery. This is redundant as we now log every change and + * hence this can't happen but we need to still do it to ensure + * backwards compatibility with old kernels that predate logging all + * inode changes. */ - ip->i_d.di_flushiter++; + if (ip->i_d.di_version < 3) + ip->i_d.di_flushiter++; /* * Copy the dirty parts of the inode into the on-disk diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 9112979..b55fd34 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -250,6 +250,7 @@ typedef struct xfs_inode { struct xfs_mount *i_mount; /* fs mount struct ptr */ struct xfs_dquot *i_udquot; /* user dquot */ struct xfs_dquot *i_gdquot; /* group dquot */ + struct xfs_dquot *i_pdquot; /* project dquot */ /* Inode location stuff */ xfs_ino_t i_ino; /* inode number (agno/agino)*/ diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 5e99968..6e2bca5d 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -248,7 +248,7 @@ xfs_open_by_handle( goto out_dput; } - fd = get_unused_fd(); + fd = get_unused_fd_flags(0); if (fd < 0) { error = fd; goto out_dput; @@ -928,7 +928,7 @@ xfs_ioctl_setattr( struct xfs_trans *tp; unsigned int lock_flags = 0; struct xfs_dquot *udqp = NULL; - struct xfs_dquot *gdqp = NULL; + struct xfs_dquot *pdqp = NULL; struct xfs_dquot *olddquot = NULL; int code; @@ -957,7 +957,7 @@ xfs_ioctl_setattr( if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) { code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid, ip->i_d.di_gid, fa->fsx_projid, - XFS_QMOPT_PQUOTA, &udqp, &gdqp); + XFS_QMOPT_PQUOTA, &udqp, NULL, &pdqp); if (code) return code; } @@ -994,8 +994,8 @@ xfs_ioctl_setattr( XFS_IS_PQUOTA_ON(mp) && xfs_get_projid(ip) != fa->fsx_projid) { ASSERT(tp); - code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, - capable(CAP_FOWNER) ? + code = xfs_qm_vop_chown_reserve(tp, ip, udqp, NULL, + pdqp, capable(CAP_FOWNER) ? XFS_QMOPT_FORCE_RES : 0); if (code) /* out of quota */ goto error_return; @@ -1113,7 +1113,7 @@ xfs_ioctl_setattr( if (xfs_get_projid(ip) != fa->fsx_projid) { if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) { olddquot = xfs_qm_vop_chown(tp, ip, - &ip->i_gdquot, gdqp); + &ip->i_pdquot, pdqp); } xfs_set_projid(ip, fa->fsx_projid); @@ -1160,13 +1160,13 @@ xfs_ioctl_setattr( */ xfs_qm_dqrele(olddquot); xfs_qm_dqrele(udqp); - xfs_qm_dqrele(gdqp); + xfs_qm_dqrele(pdqp); return code; error_return: xfs_qm_dqrele(udqp); - xfs_qm_dqrele(gdqp); + xfs_qm_dqrele(pdqp); xfs_trans_cancel(tp, 0); if (lock_flags) xfs_iunlock(ip, lock_flags); diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index c69bbc4..96dda62 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -467,9 +467,6 @@ xfs_setattr_mode( ASSERT(tp); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) - mode &= ~S_ISGID; - ip->i_d.di_mode &= S_IFMT; ip->i_d.di_mode |= mode & ~S_IFMT; @@ -495,15 +492,18 @@ xfs_setattr_nonsize( trace_xfs_setattr(ip); - if (mp->m_flags & XFS_MOUNT_RDONLY) - return XFS_ERROR(EROFS); + /* If acls are being inherited, we already have this checked */ + if (!(flags & XFS_ATTR_NOACL)) { + if (mp->m_flags & XFS_MOUNT_RDONLY) + return XFS_ERROR(EROFS); - if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(EIO); - error = -inode_change_ok(inode, iattr); - if (error) - return XFS_ERROR(error); + error = -inode_change_ok(inode, iattr); + if (error) + return XFS_ERROR(error); + } ASSERT((mask & ATTR_SIZE) == 0); @@ -539,7 +539,7 @@ xfs_setattr_nonsize( ASSERT(udqp == NULL); ASSERT(gdqp == NULL); error = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip), - qflags, &udqp, &gdqp); + qflags, &udqp, &gdqp, NULL); if (error) return error; } @@ -575,7 +575,7 @@ xfs_setattr_nonsize( (XFS_IS_GQUOTA_ON(mp) && igid != gid))) { ASSERT(tp); error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, - capable(CAP_FOWNER) ? + NULL, capable(CAP_FOWNER) ? XFS_QMOPT_FORCE_RES : 0); if (error) /* out of quota */ goto out_trans_cancel; diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index bc92c53..b93e14b 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -221,7 +221,6 @@ xfs_bulkstat( char __user *ubufp; /* pointer into user's buffer */ int ubelem; /* spaces used in user's buffer */ int ubused; /* bytes used by formatter */ - xfs_buf_t *bp; /* ptr to on-disk inode cluster buf */ /* * Get the last inode value, see if there's nothing to do. @@ -263,7 +262,6 @@ xfs_bulkstat( rval = 0; while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) { cond_resched(); - bp = NULL; error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); if (error) { /* @@ -436,27 +434,7 @@ xfs_bulkstat( irbp->ir_freecount < XFS_INODES_PER_CHUNK; chunkidx++, clustidx++, agino++) { ASSERT(chunkidx < XFS_INODES_PER_CHUNK); - /* - * Recompute agbno if this is the - * first inode of the cluster. - * - * Careful with clustidx. There can be - * multiple clusters per chunk, a single - * cluster per chunk or a cluster that has - * inodes represented from several different - * chunks (if blocksize is large). - * - * Because of this, the starting clustidx is - * initialized to zero in this loop but must - * later be reset after reading in the cluster - * buffer. - */ - if ((chunkidx & (nicluster - 1)) == 0) { - agbno = XFS_AGINO_TO_AGBNO(mp, - irbp->ir_startino) + - ((chunkidx & nimask) >> - mp->m_sb.sb_inopblog); - } + ino = XFS_AGINO_TO_INO(mp, agno, agino); /* * Skip if this inode is free. @@ -502,10 +480,6 @@ xfs_bulkstat( cond_resched(); } - - if (bp) - xfs_buf_relse(bp); - /* * Set up for the next loop iteration. */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 6fcc910a..7681b19 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2592,8 +2592,16 @@ xlog_recover_inode_pass2( goto error; } - /* Skip replay when the on disk inode is newer than the log one */ - if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) { + /* + * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes + * are transactional and if ordering is necessary we can determine that + * more accurately by the LSN field in the V3 inode core. Don't trust + * the inode versions we might be changing them here - use the + * superblock flag to determine whether we need to look at di_flushiter + * to skip replay when the on disk inode is newer than the log one + */ + if (!xfs_sb_version_hascrc(&mp->m_sb) && + dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) { /* * Deal with the wrap case, DI_MAX_FLUSH is less * than smaller numbers @@ -2608,6 +2616,7 @@ xlog_recover_inode_pass2( goto error; } } + /* Take the opportunity to reset the flush iteration count */ dicp->di_flushiter = 0; diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 7a3e007..d320794 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -137,6 +137,7 @@ xfs_qm_dqpurge( struct xfs_mount *mp = dqp->q_mount; struct xfs_quotainfo *qi = mp->m_quotainfo; struct xfs_dquot *gdqp = NULL; + struct xfs_dquot *pdqp = NULL; xfs_dqlock(dqp); if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) { @@ -145,8 +146,7 @@ xfs_qm_dqpurge( } /* - * If this quota has a group hint attached, prepare for releasing it - * now. + * If this quota has a hint attached, prepare for releasing it now. */ gdqp = dqp->q_gdquot; if (gdqp) { @@ -154,6 +154,12 @@ xfs_qm_dqpurge( dqp->q_gdquot = NULL; } + pdqp = dqp->q_pdquot; + if (pdqp) { + xfs_dqlock(pdqp); + dqp->q_pdquot = NULL; + } + dqp->dq_flags |= XFS_DQ_FREEING; xfs_dqflock(dqp); @@ -208,6 +214,8 @@ xfs_qm_dqpurge( if (gdqp) xfs_qm_dqput(gdqp); + if (pdqp) + xfs_qm_dqput(pdqp); return 0; } @@ -364,6 +372,10 @@ xfs_qm_unmount_quotas( IRELE(mp->m_quotainfo->qi_gquotaip); mp->m_quotainfo->qi_gquotaip = NULL; } + if (mp->m_quotainfo->qi_pquotaip) { + IRELE(mp->m_quotainfo->qi_pquotaip); + mp->m_quotainfo->qi_pquotaip = NULL; + } } } @@ -410,7 +422,10 @@ xfs_qm_dqattach_one( * be reclaimed as long as we have a ref from inode and we * hold the ilock. */ - dqp = udqhint->q_gdquot; + if (type == XFS_DQ_GROUP) + dqp = udqhint->q_gdquot; + else + dqp = udqhint->q_pdquot; if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) { ASSERT(*IO_idqpp == NULL); @@ -453,28 +468,42 @@ xfs_qm_dqattach_one( /* - * Given a udquot and gdquot, attach a ptr to the group dquot in the - * udquot as a hint for future lookups. + * Given a udquot and group/project type, attach the group/project + * dquot pointer to the udquot as a hint for future lookups. */ STATIC void -xfs_qm_dqattach_grouphint( - xfs_dquot_t *udq, - xfs_dquot_t *gdq) +xfs_qm_dqattach_hint( + struct xfs_inode *ip, + int type) { - xfs_dquot_t *tmp; + struct xfs_dquot **dqhintp; + struct xfs_dquot *dqp; + struct xfs_dquot *udq = ip->i_udquot; + + ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ); xfs_dqlock(udq); - tmp = udq->q_gdquot; - if (tmp) { - if (tmp == gdq) + if (type == XFS_DQ_GROUP) { + dqp = ip->i_gdquot; + dqhintp = &udq->q_gdquot; + } else { + dqp = ip->i_pdquot; + dqhintp = &udq->q_pdquot; + } + + if (*dqhintp) { + struct xfs_dquot *tmp; + + if (*dqhintp == dqp) goto done; - udq->q_gdquot = NULL; + tmp = *dqhintp; + *dqhintp = NULL; xfs_qm_dqrele(tmp); } - udq->q_gdquot = xfs_qm_dqhold(gdq); + *dqhintp = xfs_qm_dqhold(dqp); done: xfs_dqunlock(udq); } @@ -527,12 +556,8 @@ xfs_qm_dqattach_locked( } ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - if (XFS_IS_OQUOTA_ON(mp)) { - error = XFS_IS_GQUOTA_ON(mp) ? - xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, - flags & XFS_QMOPT_DQALLOC, - ip->i_udquot, &ip->i_gdquot) : - xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ, + if (XFS_IS_GQUOTA_ON(mp)) { + error = xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, flags & XFS_QMOPT_DQALLOC, ip->i_udquot, &ip->i_gdquot); /* @@ -544,14 +569,28 @@ xfs_qm_dqattach_locked( nquotas++; } + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + if (XFS_IS_PQUOTA_ON(mp)) { + error = xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ, + flags & XFS_QMOPT_DQALLOC, + ip->i_udquot, &ip->i_pdquot); + /* + * Don't worry about the udquot that we may have + * attached above. It'll get detached, if not already. + */ + if (error) + goto done; + nquotas++; + } + /* - * Attach this group quota to the user quota as a hint. + * Attach this group/project quota to the user quota as a hint. * This WON'T, in general, result in a thrash. */ - if (nquotas == 2) { + if (nquotas > 1 && ip->i_udquot) { ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - ASSERT(ip->i_udquot); - ASSERT(ip->i_gdquot); + ASSERT(ip->i_gdquot || !XFS_IS_GQUOTA_ON(mp)); + ASSERT(ip->i_pdquot || !XFS_IS_PQUOTA_ON(mp)); /* * We do not have i_udquot locked at this point, but this check @@ -560,7 +599,10 @@ xfs_qm_dqattach_locked( * succeed in general. */ if (ip->i_udquot->q_gdquot != ip->i_gdquot) - xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot); + xfs_qm_dqattach_hint(ip, XFS_DQ_GROUP); + + if (ip->i_udquot->q_pdquot != ip->i_pdquot) + xfs_qm_dqattach_hint(ip, XFS_DQ_PROJ); } done: @@ -568,8 +610,10 @@ xfs_qm_dqattach_locked( if (!error) { if (XFS_IS_UQUOTA_ON(mp)) ASSERT(ip->i_udquot); - if (XFS_IS_OQUOTA_ON(mp)) + if (XFS_IS_GQUOTA_ON(mp)) ASSERT(ip->i_gdquot); + if (XFS_IS_PQUOTA_ON(mp)) + ASSERT(ip->i_pdquot); } ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); #endif @@ -602,7 +646,7 @@ void xfs_qm_dqdetach( xfs_inode_t *ip) { - if (!(ip->i_udquot || ip->i_gdquot)) + if (!(ip->i_udquot || ip->i_gdquot || ip->i_pdquot)) return; trace_xfs_dquot_dqdetach(ip); @@ -616,6 +660,10 @@ xfs_qm_dqdetach( xfs_qm_dqrele(ip->i_gdquot); ip->i_gdquot = NULL; } + if (ip->i_pdquot) { + xfs_qm_dqrele(ip->i_pdquot); + ip->i_pdquot = NULL; + } } int @@ -660,6 +708,7 @@ xfs_qm_init_quotainfo( INIT_RADIX_TREE(&qinf->qi_uquota_tree, GFP_NOFS); INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_NOFS); + INIT_RADIX_TREE(&qinf->qi_pquota_tree, GFP_NOFS); mutex_init(&qinf->qi_tree_lock); INIT_LIST_HEAD(&qinf->qi_lru_list); @@ -761,6 +810,10 @@ xfs_qm_destroy_quotainfo( IRELE(qi->qi_gquotaip); qi->qi_gquotaip = NULL; } + if (qi->qi_pquotaip) { + IRELE(qi->qi_pquotaip); + qi->qi_pquotaip = NULL; + } mutex_destroy(&qi->qi_quotaofflock); kmem_free(qi); mp->m_quotainfo = NULL; @@ -1269,13 +1322,14 @@ xfs_qm_quotacheck( LIST_HEAD (buffer_list); struct xfs_inode *uip = mp->m_quotainfo->qi_uquotaip; struct xfs_inode *gip = mp->m_quotainfo->qi_gquotaip; + struct xfs_inode *pip = mp->m_quotainfo->qi_pquotaip; count = INT_MAX; structsz = 1; lastino = 0; flags = 0; - ASSERT(uip || gip); + ASSERT(uip || gip || pip); ASSERT(XFS_IS_QUOTA_RUNNING(mp)); xfs_notice(mp, "Quotacheck needed: Please wait."); @@ -1294,13 +1348,19 @@ xfs_qm_quotacheck( } if (gip) { - error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ? - XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA, + error = xfs_qm_dqiterate(mp, gip, XFS_QMOPT_GQUOTA, &buffer_list); if (error) goto error_return; - flags |= XFS_IS_GQUOTA_ON(mp) ? - XFS_GQUOTA_CHKD : XFS_PQUOTA_CHKD; + flags |= XFS_GQUOTA_CHKD; + } + + if (pip) { + error = xfs_qm_dqiterate(mp, pip, XFS_QMOPT_PQUOTA, + &buffer_list); + if (error) + goto error_return; + flags |= XFS_PQUOTA_CHKD; } do { @@ -1397,6 +1457,7 @@ xfs_qm_init_quotainos( { struct xfs_inode *uip = NULL; struct xfs_inode *gip = NULL; + struct xfs_inode *pip = NULL; int error; __int64_t sbflags = 0; uint flags = 0; @@ -1415,7 +1476,7 @@ xfs_qm_init_quotainos( if (error) return XFS_ERROR(error); } - if (XFS_IS_OQUOTA_ON(mp) && + if (XFS_IS_GQUOTA_ON(mp) && mp->m_sb.sb_gquotino != NULLFSINO) { ASSERT(mp->m_sb.sb_gquotino > 0); error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, @@ -1423,6 +1484,15 @@ xfs_qm_init_quotainos( if (error) goto error_rele; } + /* XXX: Use gquotino for now */ + if (XFS_IS_PQUOTA_ON(mp) && + mp->m_sb.sb_gquotino != NULLFSINO) { + ASSERT(mp->m_sb.sb_gquotino > 0); + error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, + 0, 0, &pip); + if (error) + goto error_rele; + } } else { flags |= XFS_QMOPT_SBVERSION; sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | @@ -1430,7 +1500,7 @@ xfs_qm_init_quotainos( } /* - * Create the two inodes, if they don't exist already. The changes + * Create the three inodes, if they don't exist already. The changes * made above will get added to a transaction and logged in one of * the qino_alloc calls below. If the device is readonly, * temporarily switch to read-write to do this. @@ -1444,17 +1514,27 @@ xfs_qm_init_quotainos( flags &= ~XFS_QMOPT_SBVERSION; } - if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) { - flags |= (XFS_IS_GQUOTA_ON(mp) ? - XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA); + if (XFS_IS_GQUOTA_ON(mp) && gip == NULL) { error = xfs_qm_qino_alloc(mp, &gip, - sbflags | XFS_SB_GQUOTINO, flags); + sbflags | XFS_SB_GQUOTINO, + flags | XFS_QMOPT_GQUOTA); + if (error) + goto error_rele; + + flags &= ~XFS_QMOPT_SBVERSION; + } + if (XFS_IS_PQUOTA_ON(mp) && pip == NULL) { + /* XXX: Use XFS_SB_GQUOTINO for now */ + error = xfs_qm_qino_alloc(mp, &pip, + sbflags | XFS_SB_GQUOTINO, + flags | XFS_QMOPT_PQUOTA); if (error) goto error_rele; } mp->m_quotainfo->qi_uquotaip = uip; mp->m_quotainfo->qi_gquotaip = gip; + mp->m_quotainfo->qi_pquotaip = pip; return 0; @@ -1463,6 +1543,8 @@ error_rele: IRELE(uip); if (gip) IRELE(gip); + if (pip) + IRELE(pip); return XFS_ERROR(error); } @@ -1657,11 +1739,13 @@ xfs_qm_vop_dqalloc( prid_t prid, uint flags, struct xfs_dquot **O_udqpp, - struct xfs_dquot **O_gdqpp) + struct xfs_dquot **O_gdqpp, + struct xfs_dquot **O_pdqpp) { struct xfs_mount *mp = ip->i_mount; struct xfs_dquot *uq = NULL; struct xfs_dquot *gq = NULL; + struct xfs_dquot *pq = NULL; int error; uint lockflags; @@ -1741,24 +1825,25 @@ xfs_qm_vop_dqalloc( ASSERT(ip->i_gdquot); gq = xfs_qm_dqhold(ip->i_gdquot); } - } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) { + } + if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) { if (xfs_get_projid(ip) != prid) { xfs_iunlock(ip, lockflags); error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid, XFS_DQ_PROJ, XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, - &gq); + &pq); if (error) { ASSERT(error != ENOENT); goto error_rele; } - xfs_dqunlock(gq); + xfs_dqunlock(pq); lockflags = XFS_ILOCK_SHARED; xfs_ilock(ip, lockflags); } else { - ASSERT(ip->i_gdquot); - gq = xfs_qm_dqhold(ip->i_gdquot); + ASSERT(ip->i_pdquot); + pq = xfs_qm_dqhold(ip->i_pdquot); } } if (uq) @@ -1773,9 +1858,15 @@ xfs_qm_vop_dqalloc( *O_gdqpp = gq; else if (gq) xfs_qm_dqrele(gq); + if (O_pdqpp) + *O_pdqpp = pq; + else if (pq) + xfs_qm_dqrele(pq); return 0; error_rele: + if (gq) + xfs_qm_dqrele(gq); if (uq) xfs_qm_dqrele(uq); return error; @@ -1830,14 +1921,17 @@ xfs_qm_vop_chown_reserve( struct xfs_inode *ip, struct xfs_dquot *udqp, struct xfs_dquot *gdqp, + struct xfs_dquot *pdqp, uint flags) { struct xfs_mount *mp = ip->i_mount; uint delblks, blkflags, prjflags = 0; struct xfs_dquot *udq_unres = NULL; struct xfs_dquot *gdq_unres = NULL; + struct xfs_dquot *pdq_unres = NULL; struct xfs_dquot *udq_delblks = NULL; struct xfs_dquot *gdq_delblks = NULL; + struct xfs_dquot *pdq_delblks = NULL; int error; @@ -1861,24 +1955,28 @@ xfs_qm_vop_chown_reserve( udq_unres = ip->i_udquot; } } - if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) { - if (XFS_IS_PQUOTA_ON(ip->i_mount) && - xfs_get_projid(ip) != be32_to_cpu(gdqp->q_core.d_id)) - prjflags = XFS_QMOPT_ENOSPC; - - if (prjflags || - (XFS_IS_GQUOTA_ON(ip->i_mount) && - ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) { - gdq_delblks = gdqp; - if (delblks) { - ASSERT(ip->i_gdquot); - gdq_unres = ip->i_gdquot; - } + if (XFS_IS_GQUOTA_ON(ip->i_mount) && gdqp && + ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id)) { + gdq_delblks = gdqp; + if (delblks) { + ASSERT(ip->i_gdquot); + gdq_unres = ip->i_gdquot; + } + } + + if (XFS_IS_PQUOTA_ON(ip->i_mount) && pdqp && + xfs_get_projid(ip) != be32_to_cpu(pdqp->q_core.d_id)) { + prjflags = XFS_QMOPT_ENOSPC; + pdq_delblks = pdqp; + if (delblks) { + ASSERT(ip->i_pdquot); + pdq_unres = ip->i_pdquot; } } error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount, - udq_delblks, gdq_delblks, ip->i_d.di_nblocks, 1, + udq_delblks, gdq_delblks, pdq_delblks, + ip->i_d.di_nblocks, 1, flags | blkflags | prjflags); if (error) return error; @@ -1893,16 +1991,17 @@ xfs_qm_vop_chown_reserve( /* * Do the reservations first. Unreservation can't fail. */ - ASSERT(udq_delblks || gdq_delblks); - ASSERT(udq_unres || gdq_unres); + ASSERT(udq_delblks || gdq_delblks || pdq_delblks); + ASSERT(udq_unres || gdq_unres || pdq_unres); error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, - udq_delblks, gdq_delblks, (xfs_qcnt_t)delblks, 0, + udq_delblks, gdq_delblks, pdq_delblks, + (xfs_qcnt_t)delblks, 0, flags | blkflags | prjflags); if (error) return error; xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, - udq_unres, gdq_unres, -((xfs_qcnt_t)delblks), 0, - blkflags); + udq_unres, gdq_unres, pdq_unres, + -((xfs_qcnt_t)delblks), 0, blkflags); } return (0); @@ -1941,7 +2040,8 @@ xfs_qm_vop_create_dqattach( struct xfs_trans *tp, struct xfs_inode *ip, struct xfs_dquot *udqp, - struct xfs_dquot *gdqp) + struct xfs_dquot *gdqp, + struct xfs_dquot *pdqp) { struct xfs_mount *mp = tp->t_mountp; @@ -1961,13 +2061,18 @@ xfs_qm_vop_create_dqattach( } if (gdqp) { ASSERT(ip->i_gdquot == NULL); - ASSERT(XFS_IS_OQUOTA_ON(mp)); - ASSERT((XFS_IS_GQUOTA_ON(mp) ? - ip->i_d.di_gid : xfs_get_projid(ip)) == - be32_to_cpu(gdqp->q_core.d_id)); - + ASSERT(XFS_IS_GQUOTA_ON(mp)); + ASSERT(ip->i_d.di_gid == be32_to_cpu(gdqp->q_core.d_id)); ip->i_gdquot = xfs_qm_dqhold(gdqp); xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); } + if (pdqp) { + ASSERT(ip->i_pdquot == NULL); + ASSERT(XFS_IS_PQUOTA_ON(mp)); + ASSERT(xfs_get_projid(ip) == be32_to_cpu(pdqp->q_core.d_id)); + + ip->i_pdquot = xfs_qm_dqhold(pdqp); + xfs_trans_mod_dquot(tp, pdqp, XFS_TRANS_DQ_ICOUNT, 1); + } } diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index bdb4f8b..579d6a0 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -44,9 +44,11 @@ extern struct kmem_zone *xfs_qm_dqtrxzone; typedef struct xfs_quotainfo { struct radix_tree_root qi_uquota_tree; struct radix_tree_root qi_gquota_tree; + struct radix_tree_root qi_pquota_tree; struct mutex qi_tree_lock; - xfs_inode_t *qi_uquotaip; /* user quota inode */ - xfs_inode_t *qi_gquotaip; /* group quota inode */ + struct xfs_inode *qi_uquotaip; /* user quota inode */ + struct xfs_inode *qi_gquotaip; /* group quota inode */ + struct xfs_inode *qi_pquotaip; /* project quota inode */ struct list_head qi_lru_list; struct mutex qi_lru_lock; int qi_lru_count; @@ -78,8 +80,9 @@ xfs_dquot_tree( case XFS_DQ_USER: return &qi->qi_uquota_tree; case XFS_DQ_GROUP: - case XFS_DQ_PROJ: return &qi->qi_gquota_tree; + case XFS_DQ_PROJ: + return &qi->qi_pquota_tree; default: ASSERT(0); } @@ -93,8 +96,9 @@ xfs_dq_to_quota_inode(struct xfs_dquot *dqp) case XFS_DQ_USER: return dqp->q_mount->m_quotainfo->qi_uquotaip; case XFS_DQ_GROUP: - case XFS_DQ_PROJ: return dqp->q_mount->m_quotainfo->qi_gquotaip; + case XFS_DQ_PROJ: + return dqp->q_mount->m_quotainfo->qi_pquotaip; default: ASSERT(0); } @@ -107,18 +111,20 @@ extern void xfs_trans_mod_dquot(struct xfs_trans *, struct xfs_dquot *, uint, long); extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *, struct xfs_mount *, struct xfs_dquot *, - struct xfs_dquot *, long, long, uint); + struct xfs_dquot *, struct xfs_dquot *, + long, long, uint); extern void xfs_trans_dqjoin(struct xfs_trans *, struct xfs_dquot *); extern void xfs_trans_log_dquot(struct xfs_trans *, struct xfs_dquot *); /* - * We keep the usr and grp dquots separately so that locking will be easier - * to do at commit time. All transactions that we know of at this point + * We keep the usr, grp, and prj dquots separately so that locking will be + * easier to do at commit time. All transactions that we know of at this point * affect no more than two dquots of one type. Hence, the TRANS_MAXDQS value. */ enum { XFS_QM_TRANS_USR = 0, XFS_QM_TRANS_GRP, + XFS_QM_TRANS_PRJ, XFS_QM_TRANS_DQTYPES }; #define XFS_QM_TRANS_MAXDQS 2 diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c index 2d02eac1..437a52d 100644 --- a/fs/xfs/xfs_qm_bhv.c +++ b/fs/xfs/xfs_qm_bhv.c @@ -112,16 +112,16 @@ xfs_qm_newmount( if (((uquotaondisk && !XFS_IS_UQUOTA_ON(mp)) || (!uquotaondisk && XFS_IS_UQUOTA_ON(mp)) || - (pquotaondisk && !XFS_IS_PQUOTA_ON(mp)) || - (!pquotaondisk && XFS_IS_PQUOTA_ON(mp)) || (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) || - (!gquotaondisk && XFS_IS_OQUOTA_ON(mp))) && + (!gquotaondisk && XFS_IS_GQUOTA_ON(mp)) || + (pquotaondisk && !XFS_IS_PQUOTA_ON(mp)) || + (!pquotaondisk && XFS_IS_PQUOTA_ON(mp))) && xfs_dev_is_read_only(mp, "changing quota state")) { xfs_warn(mp, "please mount with%s%s%s%s.", (!quotaondisk ? "out quota" : ""), (uquotaondisk ? " usrquota" : ""), - (pquotaondisk ? " prjquota" : ""), - (gquotaondisk ? " grpquota" : "")); + (gquotaondisk ? " grpquota" : ""), + (pquotaondisk ? " prjquota" : "")); return XFS_ERROR(EPERM); } diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index a08801a..e4f8b2d 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -119,7 +119,8 @@ xfs_qm_scall_quotaoff( dqtype |= XFS_QMOPT_GQUOTA; flags |= (XFS_GQUOTA_CHKD | XFS_GQUOTA_ENFD); inactivate_flags |= XFS_GQUOTA_ACTIVE; - } else if (flags & XFS_PQUOTA_ACCT) { + } + if (flags & XFS_PQUOTA_ACCT) { dqtype |= XFS_QMOPT_PQUOTA; flags |= (XFS_PQUOTA_CHKD | XFS_PQUOTA_ENFD); inactivate_flags |= XFS_PQUOTA_ACTIVE; @@ -198,10 +199,9 @@ xfs_qm_scall_quotaoff( } /* - * If quotas is completely disabled, close shop. + * If all quotas are completely turned off, close shop. */ - if (((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET1) || - ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET2)) { + if (mp->m_qflags == 0) { mutex_unlock(&q->qi_quotaofflock); xfs_qm_destroy_quotainfo(mp); return (0); @@ -214,10 +214,14 @@ xfs_qm_scall_quotaoff( IRELE(q->qi_uquotaip); q->qi_uquotaip = NULL; } - if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && q->qi_gquotaip) { + if ((dqtype & XFS_QMOPT_GQUOTA) && q->qi_gquotaip) { IRELE(q->qi_gquotaip); q->qi_gquotaip = NULL; } + if ((dqtype & XFS_QMOPT_PQUOTA) && q->qi_pquotaip) { + IRELE(q->qi_pquotaip); + q->qi_pquotaip = NULL; + } out_unlock: mutex_unlock(&q->qi_quotaofflock); @@ -859,9 +863,11 @@ xfs_dqrele_inode( { /* skip quota inodes */ if (ip == ip->i_mount->m_quotainfo->qi_uquotaip || - ip == ip->i_mount->m_quotainfo->qi_gquotaip) { + ip == ip->i_mount->m_quotainfo->qi_gquotaip || + ip == ip->i_mount->m_quotainfo->qi_pquotaip) { ASSERT(ip->i_udquot == NULL); ASSERT(ip->i_gdquot == NULL); + ASSERT(ip->i_pdquot == NULL); return 0; } @@ -870,10 +876,14 @@ xfs_dqrele_inode( xfs_qm_dqrele(ip->i_udquot); ip->i_udquot = NULL; } - if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) { + if ((flags & XFS_GQUOTA_ACCT) && ip->i_gdquot) { xfs_qm_dqrele(ip->i_gdquot); ip->i_gdquot = NULL; } + if ((flags & XFS_PQUOTA_ACCT) && ip->i_pdquot) { + xfs_qm_dqrele(ip->i_pdquot); + ip->i_pdquot = NULL; + } xfs_iunlock(ip, XFS_ILOCK_EXCL); return 0; } diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index c3483ba..b14f42c 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -108,11 +108,28 @@ typedef struct xfs_dqblk { { XFS_DQ_FREEING, "FREEING" } /* - * In the worst case, when both user and group quotas are on, - * we can have a max of three dquots changing in a single transaction. + * We have the possibility of all three quota types being active at once, and + * hence free space modification requires modification of all three current + * dquots in a single transaction. For this case we need to have a reservation + * of at least 3 dquots. + * + * However, a chmod operation can change both UID and GID in a single + * transaction, resulting in requiring {old, new} x {uid, gid} dquots to be + * modified. Hence for this case we need to reserve space for at least 4 dquots. + * + * And in the worst case, there's a rename operation that can be modifying up to + * 4 inodes with dquots attached to them. In reality, the only inodes that can + * have their dquots modified are the source and destination directory inodes + * due to directory name creation and removal. That can require space allocation + * and/or freeing on both directory inodes, and hence all three dquots on each + * inode can be modified. And if the directories are world writeable, all the + * dquots can be unique and so 6 dquots can be modified.... + * + * And, of course, we also need to take into account the dquot log format item + * used to describe each dquot. */ -#define XFS_DQUOT_LOGRES(mp) (sizeof(xfs_disk_dquot_t) * 3) - +#define XFS_DQUOT_LOGRES(mp) \ + ((sizeof(struct xfs_dq_logformat) + sizeof(struct xfs_disk_dquot)) * 6) /* * These are the structures used to lay out dquots and quotaoff @@ -271,10 +288,10 @@ typedef struct xfs_qoff_logformat { * we didn't have the inode locked, the appropriate dquot(s) will be * attached atomically. */ -#define XFS_NOT_DQATTACHED(mp, ip) ((XFS_IS_UQUOTA_ON(mp) &&\ - (ip)->i_udquot == NULL) || \ - (XFS_IS_OQUOTA_ON(mp) && \ - (ip)->i_gdquot == NULL)) +#define XFS_NOT_DQATTACHED(mp, ip) \ + ((XFS_IS_UQUOTA_ON(mp) && (ip)->i_udquot == NULL) || \ + (XFS_IS_GQUOTA_ON(mp) && (ip)->i_gdquot == NULL) || \ + (XFS_IS_PQUOTA_ON(mp) && (ip)->i_pdquot == NULL)) #define XFS_QM_NEED_QUOTACHECK(mp) \ ((XFS_IS_UQUOTA_ON(mp) && \ @@ -284,14 +301,6 @@ typedef struct xfs_qoff_logformat { (XFS_IS_PQUOTA_ON(mp) && \ (mp->m_sb.sb_qflags & XFS_PQUOTA_CHKD) == 0)) -#define XFS_MOUNT_QUOTA_SET1 (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ - XFS_UQUOTA_CHKD|XFS_GQUOTA_ACCT|\ - XFS_GQUOTA_ENFD|XFS_GQUOTA_CHKD) - -#define XFS_MOUNT_QUOTA_SET2 (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ - XFS_UQUOTA_CHKD|XFS_PQUOTA_ACCT|\ - XFS_PQUOTA_ENFD|XFS_PQUOTA_CHKD) - #define XFS_MOUNT_QUOTA_ALL (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD|\ XFS_UQUOTA_CHKD|XFS_GQUOTA_ACCT|\ XFS_GQUOTA_ENFD|XFS_GQUOTA_CHKD|\ @@ -329,17 +338,18 @@ extern int xfs_trans_reserve_quota_nblks(struct xfs_trans *, struct xfs_inode *, long, long, uint); extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *, struct xfs_mount *, struct xfs_dquot *, - struct xfs_dquot *, long, long, uint); + struct xfs_dquot *, struct xfs_dquot *, long, long, uint); extern int xfs_qm_vop_dqalloc(struct xfs_inode *, uid_t, gid_t, prid_t, uint, - struct xfs_dquot **, struct xfs_dquot **); + struct xfs_dquot **, struct xfs_dquot **, struct xfs_dquot **); extern void xfs_qm_vop_create_dqattach(struct xfs_trans *, struct xfs_inode *, - struct xfs_dquot *, struct xfs_dquot *); + struct xfs_dquot *, struct xfs_dquot *, struct xfs_dquot *); extern int xfs_qm_vop_rename_dqattach(struct xfs_inode **); extern struct xfs_dquot *xfs_qm_vop_chown(struct xfs_trans *, struct xfs_inode *, struct xfs_dquot **, struct xfs_dquot *); extern int xfs_qm_vop_chown_reserve(struct xfs_trans *, struct xfs_inode *, - struct xfs_dquot *, struct xfs_dquot *, uint); + struct xfs_dquot *, struct xfs_dquot *, + struct xfs_dquot *, uint); extern int xfs_qm_dqattach(struct xfs_inode *, uint); extern int xfs_qm_dqattach_locked(struct xfs_inode *, uint); extern void xfs_qm_dqdetach(struct xfs_inode *); @@ -353,10 +363,12 @@ extern void xfs_qm_unmount_quotas(struct xfs_mount *); #else static inline int xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid, - uint flags, struct xfs_dquot **udqp, struct xfs_dquot **gdqp) + uint flags, struct xfs_dquot **udqp, struct xfs_dquot **gdqp, + struct xfs_dquot **pdqp) { *udqp = NULL; *gdqp = NULL; + *pdqp = NULL; return 0; } #define xfs_trans_dup_dqinfo(tp, tp2) @@ -371,14 +383,15 @@ static inline int xfs_trans_reserve_quota_nblks(struct xfs_trans *tp, } static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp, struct xfs_mount *mp, struct xfs_dquot *udqp, - struct xfs_dquot *gdqp, long nblks, long nions, uint flags) + struct xfs_dquot *gdqp, struct xfs_dquot *pdqp, + long nblks, long nions, uint flags) { return 0; } -#define xfs_qm_vop_create_dqattach(tp, ip, u, g) +#define xfs_qm_vop_create_dqattach(tp, ip, u, g, p) #define xfs_qm_vop_rename_dqattach(it) (0) #define xfs_qm_vop_chown(tp, ip, old, new) (NULL) -#define xfs_qm_vop_chown_reserve(tp, ip, u, g, fl) (0) +#define xfs_qm_vop_chown_reserve(tp, ip, u, g, p, fl) (0) #define xfs_qm_dqattach(ip, fl) (0) #define xfs_qm_dqattach_locked(ip, fl) (0) #define xfs_qm_dqdetach(ip) @@ -392,8 +405,8 @@ static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp, #define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \ xfs_trans_reserve_quota_nblks(tp, ip, -(nblks), -(ninos), flags) -#define xfs_trans_reserve_quota(tp, mp, ud, gd, nb, ni, f) \ - xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, nb, ni, \ +#define xfs_trans_reserve_quota(tp, mp, ud, gd, pd, nb, ni, f) \ + xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, pd, nb, ni, \ f | XFS_QMOPT_RES_REGBLKS) extern int xfs_qm_dqcheck(struct xfs_mount *, xfs_disk_dquot_t *, diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index e830fb56..f4895b6 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -360,6 +360,7 @@ xfs_symlink( prid_t prid; struct xfs_dquot *udqp = NULL; struct xfs_dquot *gdqp = NULL; + struct xfs_dquot *pdqp = NULL; uint resblks; *ipp = NULL; @@ -386,7 +387,7 @@ xfs_symlink( * Make sure that we have allocated dquot(s) on disk. */ error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, - XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); + XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp, &pdqp); if (error) goto std_return; @@ -427,7 +428,8 @@ xfs_symlink( /* * Reserve disk quota : blocks and inode. */ - error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0); + error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, + pdqp, resblks, 1, 0); if (error) goto error_return; @@ -465,7 +467,7 @@ xfs_symlink( /* * Also attach the dquot(s) to it, if applicable. */ - xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp); + xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp); if (resblks) resblks -= XFS_IALLOC_SPACE_RES(mp); @@ -563,6 +565,7 @@ xfs_symlink( error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); + xfs_qm_dqrele(pdqp); *ipp = ip; return 0; @@ -576,6 +579,7 @@ xfs_symlink( xfs_trans_cancel(tp, cancel_flags); xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); + xfs_qm_dqrele(pdqp); if (unlock_dp_on_error) xfs_iunlock(dp, XFS_ILOCK_EXCL); diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index 3ba64d5..61407a8 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -163,8 +163,10 @@ xfs_trans_mod_dquot_byino( if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot) (void) xfs_trans_mod_dquot(tp, ip->i_udquot, field, delta); - if (XFS_IS_OQUOTA_ON(mp) && ip->i_gdquot) + if (XFS_IS_GQUOTA_ON(mp) && ip->i_gdquot) (void) xfs_trans_mod_dquot(tp, ip->i_gdquot, field, delta); + if (XFS_IS_PQUOTA_ON(mp) && ip->i_pdquot) + (void) xfs_trans_mod_dquot(tp, ip->i_pdquot, field, delta); } STATIC struct xfs_dqtrx * @@ -177,8 +179,12 @@ xfs_trans_get_dqtrx( if (XFS_QM_ISUDQ(dqp)) qa = tp->t_dqinfo->dqs[XFS_QM_TRANS_USR]; - else + else if (XFS_QM_ISGDQ(dqp)) qa = tp->t_dqinfo->dqs[XFS_QM_TRANS_GRP]; + else if (XFS_QM_ISPDQ(dqp)) + qa = tp->t_dqinfo->dqs[XFS_QM_TRANS_PRJ]; + else + return NULL; for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { if (qa[i].qt_dquot == NULL || @@ -291,11 +297,10 @@ xfs_trans_mod_dquot( /* - * Given an array of dqtrx structures, lock all the dquots associated - * and join them to the transaction, provided they have been modified. - * We know that the highest number of dquots (of one type - usr OR grp), - * involved in a transaction is 2 and that both usr and grp combined - 3. - * So, we don't attempt to make this very generic. + * Given an array of dqtrx structures, lock all the dquots associated and join + * them to the transaction, provided they have been modified. We know that the + * highest number of dquots of one type - usr, grp OR prj - involved in a + * transaction is 2 so we don't need to make this very generic. */ STATIC void xfs_trans_dqlockedjoin( @@ -728,8 +733,8 @@ error_return: /* * Given dquot(s), make disk block and/or inode reservations against them. - * The fact that this does the reservation against both the usr and - * grp/prj quotas is important, because this follows a both-or-nothing + * The fact that this does the reservation against user, group and + * project quotas is important, because this follows a all-or-nothing * approach. * * flags = XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown. @@ -744,6 +749,7 @@ xfs_trans_reserve_quota_bydquots( struct xfs_mount *mp, struct xfs_dquot *udqp, struct xfs_dquot *gdqp, + struct xfs_dquot *pdqp, long nblks, long ninos, uint flags) @@ -771,11 +777,21 @@ xfs_trans_reserve_quota_bydquots( goto unwind_usr; } + if (pdqp) { + error = xfs_trans_dqresv(tp, mp, pdqp, nblks, ninos, flags); + if (error) + goto unwind_grp; + } + /* * Didn't change anything critical, so, no need to log */ return 0; +unwind_grp: + flags |= XFS_QMOPT_FORCE_RES; + if (gdqp) + xfs_trans_dqresv(tp, mp, gdqp, -nblks, -ninos, flags); unwind_usr: flags |= XFS_QMOPT_FORCE_RES; if (udqp) @@ -817,6 +833,7 @@ xfs_trans_reserve_quota_nblks( */ return xfs_trans_reserve_quota_bydquots(tp, mp, ip->i_udquot, ip->i_gdquot, + ip->i_pdquot, nblks, ninos, flags); } diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 42c0ef2..dc730ac 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -489,6 +489,7 @@ xfs_create( prid_t prid; struct xfs_dquot *udqp = NULL; struct xfs_dquot *gdqp = NULL; + struct xfs_dquot *pdqp = NULL; uint resblks; uint log_res; uint log_count; @@ -507,7 +508,8 @@ xfs_create( * Make sure that we have allocated dquot(s) on disk. */ error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid, - XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); + XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, + &udqp, &gdqp, &pdqp); if (error) return error; @@ -559,7 +561,8 @@ xfs_create( /* * Reserve disk quota and the inode. */ - error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0); + error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, + pdqp, resblks, 1, 0); if (error) goto out_trans_cancel; @@ -623,7 +626,7 @@ xfs_create( * These ids of the inode couldn't have changed since the new * inode has been locked ever since it was created. */ - xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp); + xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp); error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) @@ -635,6 +638,7 @@ xfs_create( xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); + xfs_qm_dqrele(pdqp); *ipp = ip; return 0; @@ -656,6 +660,7 @@ xfs_create( xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); + xfs_qm_dqrele(pdqp); if (unlock_dp_on_error) xfs_iunlock(dp, XFS_ILOCK_EXCL); @@ -1568,7 +1573,7 @@ xfs_free_file_space( } xfs_ilock(ip, XFS_ILOCK_EXCL); error = xfs_trans_reserve_quota(tp, mp, - ip->i_udquot, ip->i_gdquot, + ip->i_udquot, ip->i_gdquot, ip->i_pdquot, resblks, 0, XFS_QMOPT_RES_REGBLKS); if (error) goto error1; |