diff options
author | Tejun Heo <tj@kernel.org> | 2009-07-04 07:13:18 +0900 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2009-07-04 07:13:18 +0900 |
commit | c43768cbb7655ea5ff782ae250f6e2ef4297cf98 (patch) | |
tree | 3982e41dde3eecaa3739a5d1a8ed18d04bd74f01 /fs | |
parent | 1a8dd307cc0a2119be4e578c517795464e6dabba (diff) | |
parent | 746a99a5af60ee676afa2ba469ccd1373493c7e7 (diff) | |
download | op-kernel-dev-c43768cbb7655ea5ff782ae250f6e2ef4297cf98.zip op-kernel-dev-c43768cbb7655ea5ff782ae250f6e2ef4297cf98.tar.gz |
Merge branch 'master' into for-next
Pull linus#master to merge PER_CPU_DEF_ATTRIBUTES and alpha build fix
changes. As alpha in percpu tree uses 'weak' attribute instead of
inline assembly, there's no need for __used attribute.
Conflicts:
arch/alpha/include/asm/percpu.h
arch/mn10300/kernel/vmlinux.lds.S
include/linux/percpu-defs.h
Diffstat (limited to 'fs')
143 files changed, 7121 insertions, 2504 deletions
@@ -236,10 +236,12 @@ source "fs/nfsd/Kconfig" config LOCKD tristate + depends on FILE_LOCKING config LOCKD_V4 bool depends on NFSD_V3 || NFS_V3 + depends on FILE_LOCKING default y config EXPORTFS diff --git a/fs/afs/flock.c b/fs/afs/flock.c index 210acaf..3ff8bdd 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -432,7 +432,6 @@ vfs_rejected_lock: list_del_init(&fl->fl_u.afs.link); if (list_empty(&vnode->granted_locks)) afs_defer_unlock(vnode, key); - spin_unlock(&vnode->lock); goto abort_attempt; } @@ -485,6 +485,8 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req) { assert_spin_locked(&ctx->ctx_lock); + if (req->ki_eventfd != NULL) + eventfd_ctx_put(req->ki_eventfd); if (req->ki_dtor) req->ki_dtor(req); if (req->ki_iovec != &req->ki_inline_vec) @@ -509,8 +511,6 @@ static void aio_fput_routine(struct work_struct *data) /* Complete the fput(s) */ if (req->ki_filp != NULL) __fput(req->ki_filp); - if (req->ki_eventfd != NULL) - __fput(req->ki_eventfd); /* Link the iocb into the context's free list */ spin_lock_irq(&ctx->ctx_lock); @@ -528,8 +528,6 @@ static void aio_fput_routine(struct work_struct *data) */ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) { - int schedule_putreq = 0; - dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n", req, atomic_long_read(&req->ki_filp->f_count)); @@ -549,24 +547,16 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) * we would not be holding the last reference to the file*, so * this function will be executed w/out any aio kthread wakeup. */ - if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) - schedule_putreq++; - else - req->ki_filp = NULL; - if (req->ki_eventfd != NULL) { - if (unlikely(atomic_long_dec_and_test(&req->ki_eventfd->f_count))) - schedule_putreq++; - else - req->ki_eventfd = NULL; - } - if (unlikely(schedule_putreq)) { + if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) { get_ioctx(ctx); spin_lock(&fput_lock); list_add(&req->ki_list, &fput_head); spin_unlock(&fput_lock); queue_work(aio_wq, &fput_work); - } else + } else { + req->ki_filp = NULL; really_put_req(ctx, req); + } return 1; } @@ -1622,7 +1612,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, * an eventfd() fd, and will be signaled for each completed * event using the eventfd_signal() function. */ - req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd); + req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd); if (IS_ERR(req->ki_eventfd)) { ret = PTR_ERR(req->ki_eventfd); req->ki_eventfd = NULL; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 9fa212b0..b7c1603 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1522,11 +1522,11 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, info->thread = NULL; psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL); - fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo); - if (psinfo == NULL) return 0; + fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo); + /* * Figure out how many notes we're going to need for each thread. */ @@ -1929,7 +1929,10 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un elf = kmalloc(sizeof(*elf), GFP_KERNEL); if (!elf) goto out; - + /* + * The number of segs are recored into ELF header as 16bit value. + * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here. + */ segs = current->mm->map_count; #ifdef ELF_CORE_EXTRA_PHDRS segs += ELF_CORE_EXTRA_PHDRS; diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 31c46a2..49a34e7 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -1,7 +1,7 @@ /* * bio-integrity.c - bio data integrity extensions * - * Copyright (C) 2007, 2008 Oracle Corporation + * Copyright (C) 2007, 2008, 2009 Oracle Corporation * Written by: Martin K. Petersen <martin.petersen@oracle.com> * * This program is free software; you can redistribute it and/or @@ -25,63 +25,121 @@ #include <linux/bio.h> #include <linux/workqueue.h> -static struct kmem_cache *bio_integrity_slab __read_mostly; -static mempool_t *bio_integrity_pool; -static struct bio_set *integrity_bio_set; +struct integrity_slab { + struct kmem_cache *slab; + unsigned short nr_vecs; + char name[8]; +}; + +#define IS(x) { .nr_vecs = x, .name = "bip-"__stringify(x) } +struct integrity_slab bip_slab[BIOVEC_NR_POOLS] __read_mostly = { + IS(1), IS(4), IS(16), IS(64), IS(128), IS(BIO_MAX_PAGES), +}; +#undef IS + static struct workqueue_struct *kintegrityd_wq; +static inline unsigned int vecs_to_idx(unsigned int nr) +{ + switch (nr) { + case 1: + return 0; + case 2 ... 4: + return 1; + case 5 ... 16: + return 2; + case 17 ... 64: + return 3; + case 65 ... 128: + return 4; + case 129 ... BIO_MAX_PAGES: + return 5; + default: + BUG(); + } +} + +static inline int use_bip_pool(unsigned int idx) +{ + if (idx == BIOVEC_NR_POOLS) + return 1; + + return 0; +} + /** - * bio_integrity_alloc - Allocate integrity payload and attach it to bio + * bio_integrity_alloc_bioset - Allocate integrity payload and attach it to bio * @bio: bio to attach integrity metadata to * @gfp_mask: Memory allocation mask * @nr_vecs: Number of integrity metadata scatter-gather elements + * @bs: bio_set to allocate from * * Description: This function prepares a bio for attaching integrity * metadata. nr_vecs specifies the maximum number of pages containing * integrity metadata that can be attached. */ -struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, - gfp_t gfp_mask, - unsigned int nr_vecs) +struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio, + gfp_t gfp_mask, + unsigned int nr_vecs, + struct bio_set *bs) { struct bio_integrity_payload *bip; - struct bio_vec *iv; - unsigned long idx; + unsigned int idx = vecs_to_idx(nr_vecs); BUG_ON(bio == NULL); + bip = NULL; - bip = mempool_alloc(bio_integrity_pool, gfp_mask); - if (unlikely(bip == NULL)) { - printk(KERN_ERR "%s: could not alloc bip\n", __func__); - return NULL; - } + /* Lower order allocations come straight from slab */ + if (!use_bip_pool(idx)) + bip = kmem_cache_alloc(bip_slab[idx].slab, gfp_mask); - memset(bip, 0, sizeof(*bip)); + /* Use mempool if lower order alloc failed or max vecs were requested */ + if (bip == NULL) { + bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask); - iv = bvec_alloc_bs(gfp_mask, nr_vecs, &idx, integrity_bio_set); - if (unlikely(iv == NULL)) { - printk(KERN_ERR "%s: could not alloc bip_vec\n", __func__); - mempool_free(bip, bio_integrity_pool); - return NULL; + if (unlikely(bip == NULL)) { + printk(KERN_ERR "%s: could not alloc bip\n", __func__); + return NULL; + } } - bip->bip_pool = idx; - bip->bip_vec = iv; + memset(bip, 0, sizeof(*bip)); + + bip->bip_slab = idx; bip->bip_bio = bio; bio->bi_integrity = bip; return bip; } +EXPORT_SYMBOL(bio_integrity_alloc_bioset); + +/** + * bio_integrity_alloc - Allocate integrity payload and attach it to bio + * @bio: bio to attach integrity metadata to + * @gfp_mask: Memory allocation mask + * @nr_vecs: Number of integrity metadata scatter-gather elements + * + * Description: This function prepares a bio for attaching integrity + * metadata. nr_vecs specifies the maximum number of pages containing + * integrity metadata that can be attached. + */ +struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, + gfp_t gfp_mask, + unsigned int nr_vecs) +{ + return bio_integrity_alloc_bioset(bio, gfp_mask, nr_vecs, fs_bio_set); +} EXPORT_SYMBOL(bio_integrity_alloc); /** * bio_integrity_free - Free bio integrity payload * @bio: bio containing bip to be freed + * @bs: bio_set this bio was allocated from * * Description: Used to free the integrity portion of a bio. Usually * called from bio_free(). */ -void bio_integrity_free(struct bio *bio) +void bio_integrity_free(struct bio *bio, struct bio_set *bs) { struct bio_integrity_payload *bip = bio->bi_integrity; @@ -92,8 +150,10 @@ void bio_integrity_free(struct bio *bio) && bip->bip_buf != NULL) kfree(bip->bip_buf); - bvec_free_bs(integrity_bio_set, bip->bip_vec, bip->bip_pool); - mempool_free(bip, bio_integrity_pool); + if (use_bip_pool(bip->bip_slab)) + mempool_free(bip, bs->bio_integrity_pool); + else + kmem_cache_free(bip_slab[bip->bip_slab].slab, bip); bio->bi_integrity = NULL; } @@ -114,7 +174,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, struct bio_integrity_payload *bip = bio->bi_integrity; struct bio_vec *iv; - if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_pool)) { + if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_slab)) { printk(KERN_ERR "%s: bip_vec full\n", __func__); return 0; } @@ -647,8 +707,8 @@ void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors) bp->iv1 = bip->bip_vec[0]; bp->iv2 = bip->bip_vec[0]; - bp->bip1.bip_vec = &bp->iv1; - bp->bip2.bip_vec = &bp->iv2; + bp->bip1.bip_vec[0] = bp->iv1; + bp->bip2.bip_vec[0] = bp->iv2; bp->iv1.bv_len = sectors * bi->tuple_size; bp->iv2.bv_offset += sectors * bi->tuple_size; @@ -667,17 +727,19 @@ EXPORT_SYMBOL(bio_integrity_split); * @bio: New bio * @bio_src: Original bio * @gfp_mask: Memory allocation mask + * @bs: bio_set to allocate bip from * * Description: Called to allocate a bip when cloning a bio */ -int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask) +int bio_integrity_clone(struct bio *bio, struct bio *bio_src, + gfp_t gfp_mask, struct bio_set *bs) { struct bio_integrity_payload *bip_src = bio_src->bi_integrity; struct bio_integrity_payload *bip; BUG_ON(bip_src == NULL); - bip = bio_integrity_alloc(bio, gfp_mask, bip_src->bip_vcnt); + bip = bio_integrity_alloc_bioset(bio, gfp_mask, bip_src->bip_vcnt, bs); if (bip == NULL) return -EIO; @@ -693,25 +755,43 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask) } EXPORT_SYMBOL(bio_integrity_clone); -static int __init bio_integrity_init(void) +int bioset_integrity_create(struct bio_set *bs, int pool_size) { - kintegrityd_wq = create_workqueue("kintegrityd"); + unsigned int max_slab = vecs_to_idx(BIO_MAX_PAGES); + + bs->bio_integrity_pool = + mempool_create_slab_pool(pool_size, bip_slab[max_slab].slab); + if (!bs->bio_integrity_pool) + return -1; + + return 0; +} +EXPORT_SYMBOL(bioset_integrity_create); + +void bioset_integrity_free(struct bio_set *bs) +{ + if (bs->bio_integrity_pool) + mempool_destroy(bs->bio_integrity_pool); +} +EXPORT_SYMBOL(bioset_integrity_free); + +void __init bio_integrity_init(void) +{ + unsigned int i; + + kintegrityd_wq = create_workqueue("kintegrityd"); if (!kintegrityd_wq) panic("Failed to create kintegrityd\n"); - bio_integrity_slab = KMEM_CACHE(bio_integrity_payload, - SLAB_HWCACHE_ALIGN|SLAB_PANIC); + for (i = 0 ; i < BIOVEC_NR_POOLS ; i++) { + unsigned int size; - bio_integrity_pool = mempool_create_slab_pool(BIO_POOL_SIZE, - bio_integrity_slab); - if (!bio_integrity_pool) - panic("bio_integrity: can't allocate bip pool\n"); + size = sizeof(struct bio_integrity_payload) + + bip_slab[i].nr_vecs * sizeof(struct bio_vec); - integrity_bio_set = bioset_create(BIO_POOL_SIZE, 0); - if (!integrity_bio_set) - panic("bio_integrity: can't allocate bio_set\n"); - - return 0; + bip_slab[i].slab = + kmem_cache_create(bip_slab[i].name, size, 0, + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + } } -subsys_initcall(bio_integrity_init); @@ -238,7 +238,7 @@ void bio_free(struct bio *bio, struct bio_set *bs) bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio)); if (bio_integrity(bio)) - bio_integrity_free(bio); + bio_integrity_free(bio, bs); /* * If we have front padding, adjust the bio pointer before freeing @@ -341,7 +341,7 @@ struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) static void bio_kmalloc_destructor(struct bio *bio) { if (bio_integrity(bio)) - bio_integrity_free(bio); + bio_integrity_free(bio, fs_bio_set); kfree(bio); } @@ -472,7 +472,7 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask) if (bio_integrity(bio)) { int ret; - ret = bio_integrity_clone(b, bio, gfp_mask); + ret = bio_integrity_clone(b, bio, gfp_mask, fs_bio_set); if (ret < 0) { bio_put(b); @@ -1539,6 +1539,7 @@ void bioset_free(struct bio_set *bs) if (bs->bio_pool) mempool_destroy(bs->bio_pool); + bioset_integrity_free(bs); biovec_free_pools(bs); bio_put_slab(bs); @@ -1579,6 +1580,9 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad) if (!bs->bio_pool) goto bad; + if (bioset_integrity_create(bs, pool_size)) + goto bad; + if (!biovec_create_pools(bs, pool_size)) return bs; @@ -1616,6 +1620,7 @@ static int __init init_bio(void) if (!bio_slabs) panic("bio: can't allocate bios\n"); + bio_integrity_init(); biovec_init_slabs(); fs_bio_set = bioset_create(BIO_POOL_SIZE, 0); diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 6039725..f128427 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -29,51 +29,28 @@ #ifdef CONFIG_FS_POSIX_ACL -static void btrfs_update_cached_acl(struct inode *inode, - struct posix_acl **p_acl, - struct posix_acl *acl) -{ - spin_lock(&inode->i_lock); - if (*p_acl && *p_acl != BTRFS_ACL_NOT_CACHED) - posix_acl_release(*p_acl); - *p_acl = posix_acl_dup(acl); - spin_unlock(&inode->i_lock); -} - static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) { int size; const char *name; char *value = NULL; - struct posix_acl *acl = NULL, **p_acl; + struct posix_acl *acl; + + acl = get_cached_acl(inode, type); + if (acl != ACL_NOT_CACHED) + return acl; switch (type) { case ACL_TYPE_ACCESS: name = POSIX_ACL_XATTR_ACCESS; - p_acl = &BTRFS_I(inode)->i_acl; break; case ACL_TYPE_DEFAULT: name = POSIX_ACL_XATTR_DEFAULT; - p_acl = &BTRFS_I(inode)->i_default_acl; break; default: - return ERR_PTR(-EINVAL); + BUG(); } - /* Handle the cached NULL acl case without locking */ - acl = ACCESS_ONCE(*p_acl); - if (!acl) - return acl; - - spin_lock(&inode->i_lock); - acl = *p_acl; - if (acl != BTRFS_ACL_NOT_CACHED) - acl = posix_acl_dup(acl); - spin_unlock(&inode->i_lock); - - if (acl != BTRFS_ACL_NOT_CACHED) - return acl; - size = __btrfs_getxattr(inode, name, "", 0); if (size > 0) { value = kzalloc(size, GFP_NOFS); @@ -82,13 +59,13 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) size = __btrfs_getxattr(inode, name, value, size); if (size > 0) { acl = posix_acl_from_xattr(value, size); - btrfs_update_cached_acl(inode, p_acl, acl); + set_cached_acl(inode, type, acl); } kfree(value); } else if (size == -ENOENT || size == -ENODATA || size == 0) { /* FIXME, who returns -ENOENT? I think nobody */ acl = NULL; - btrfs_update_cached_acl(inode, p_acl, acl); + set_cached_acl(inode, type, acl); } else { acl = ERR_PTR(-EIO); } @@ -121,7 +98,6 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) { int ret, size = 0; const char *name; - struct posix_acl **p_acl; char *value = NULL; mode_t mode; @@ -141,13 +117,11 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) ret = 0; inode->i_mode = mode; name = POSIX_ACL_XATTR_ACCESS; - p_acl = &BTRFS_I(inode)->i_acl; break; case ACL_TYPE_DEFAULT: if (!S_ISDIR(inode->i_mode)) return acl ? -EINVAL : 0; name = POSIX_ACL_XATTR_DEFAULT; - p_acl = &BTRFS_I(inode)->i_default_acl; break; default: return -EINVAL; @@ -172,7 +146,7 @@ out: kfree(value); if (!ret) - btrfs_update_cached_acl(inode, p_acl, acl); + set_cached_acl(inode, type, acl); return ret; } diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 7f88628..6e4f6c5 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -299,8 +299,8 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) "btrfs-%s-%d", workers->name, workers->num_workers + i); if (IS_ERR(worker->task)) { - kfree(worker); ret = PTR_ERR(worker->task); + kfree(worker); goto fail; } diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index acb4f35..ea1ea0a 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -53,10 +53,6 @@ struct btrfs_inode { /* used to order data wrt metadata */ struct btrfs_ordered_inode_tree ordered_tree; - /* standard acl pointers */ - struct posix_acl *i_acl; - struct posix_acl *i_default_acl; - /* for keeping track of orphaned inodes */ struct list_head i_orphan; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 03441a9..98a8738 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -41,8 +41,6 @@ struct btrfs_ordered_sum; #define BTRFS_MAGIC "_BHRfS_M" -#define BTRFS_ACL_NOT_CACHED ((void *)-1) - #define BTRFS_MAX_LEVEL 8 #define BTRFS_COMPAT_EXTENT_TREE_V0 @@ -2076,8 +2074,7 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); -int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root - *root); +int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref); int btrfs_drop_subtree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *node, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index edc7d20..a5aca39 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -990,15 +990,13 @@ static inline int extent_ref_type(u64 parent, u64 owner) return type; } -static int find_next_key(struct btrfs_path *path, struct btrfs_key *key) +static int find_next_key(struct btrfs_path *path, int level, + struct btrfs_key *key) { - int level; - BUG_ON(!path->keep_locks); - for (level = 0; level < BTRFS_MAX_LEVEL; level++) { + for (; level < BTRFS_MAX_LEVEL; level++) { if (!path->nodes[level]) break; - btrfs_assert_tree_locked(path->nodes[level]); if (path->slots[level] + 1 >= btrfs_header_nritems(path->nodes[level])) continue; @@ -1158,7 +1156,8 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, * For simplicity, we just do not add new inline back * ref if there is any kind of item for this block */ - if (find_next_key(path, &key) == 0 && key.objectid == bytenr && + if (find_next_key(path, 0, &key) == 0 && + key.objectid == bytenr && key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) { err = -EAGAIN; goto out; @@ -2697,7 +2696,7 @@ again: printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" ", %llu bytes_used, %llu bytes_reserved, " - "%llu bytes_pinned, %llu bytes_readonly, %llu may use" + "%llu bytes_pinned, %llu bytes_readonly, %llu may use " "%llu total\n", (unsigned long long)bytes, (unsigned long long)data_sinfo->bytes_delalloc, (unsigned long long)data_sinfo->bytes_used, @@ -4128,6 +4127,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, return buf; } +#if 0 int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *leaf) { @@ -4171,8 +4171,6 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, return 0; } -#if 0 - static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_leaf_ref *ref) @@ -4553,262 +4551,471 @@ out: } #endif +struct walk_control { + u64 refs[BTRFS_MAX_LEVEL]; + u64 flags[BTRFS_MAX_LEVEL]; + struct btrfs_key update_progress; + int stage; + int level; + int shared_level; + int update_ref; + int keep_locks; +}; + +#define DROP_REFERENCE 1 +#define UPDATE_BACKREF 2 + /* - * helper function for drop_subtree, this function is similar to - * walk_down_tree. The main difference is that it checks reference - * counts while tree blocks are locked. + * hepler to process tree block while walking down the tree. + * + * when wc->stage == DROP_REFERENCE, this function checks + * reference count of the block. if the block is shared and + * we need update back refs for the subtree rooted at the + * block, this function changes wc->stage to UPDATE_BACKREF + * + * when wc->stage == UPDATE_BACKREF, this function updates + * back refs for pointers in the block. + * + * NOTE: return value 1 means we should stop walking down. */ -static noinline int walk_down_tree(struct btrfs_trans_handle *trans, +static noinline int walk_down_proc(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_path *path, int *level) + struct btrfs_path *path, + struct walk_control *wc) { - struct extent_buffer *next; - struct extent_buffer *cur; - struct extent_buffer *parent; - u64 bytenr; - u64 ptr_gen; - u64 refs; - u64 flags; - u32 blocksize; + int level = wc->level; + struct extent_buffer *eb = path->nodes[level]; + struct btrfs_key key; + u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; int ret; - cur = path->nodes[*level]; - ret = btrfs_lookup_extent_info(trans, root, cur->start, cur->len, - &refs, &flags); - BUG_ON(ret); - if (refs > 1) - goto out; + if (wc->stage == UPDATE_BACKREF && + btrfs_header_owner(eb) != root->root_key.objectid) + return 1; - BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); + /* + * when reference count of tree block is 1, it won't increase + * again. once full backref flag is set, we never clear it. + */ + if ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || + (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag))) { + BUG_ON(!path->locks[level]); + ret = btrfs_lookup_extent_info(trans, root, + eb->start, eb->len, + &wc->refs[level], + &wc->flags[level]); + BUG_ON(ret); + BUG_ON(wc->refs[level] == 0); + } - while (*level >= 0) { - cur = path->nodes[*level]; - if (*level == 0) { - ret = btrfs_drop_leaf_ref(trans, root, cur); - BUG_ON(ret); - clean_tree_block(trans, root, cur); - break; - } - if (path->slots[*level] >= btrfs_header_nritems(cur)) { - clean_tree_block(trans, root, cur); - break; + if (wc->stage == DROP_REFERENCE && + wc->update_ref && wc->refs[level] > 1) { + BUG_ON(eb == root->node); + BUG_ON(path->slots[level] > 0); + if (level == 0) + btrfs_item_key_to_cpu(eb, &key, path->slots[level]); + else + btrfs_node_key_to_cpu(eb, &key, path->slots[level]); + if (btrfs_header_owner(eb) == root->root_key.objectid && + btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) { + wc->stage = UPDATE_BACKREF; + wc->shared_level = level; } + } - bytenr = btrfs_node_blockptr(cur, path->slots[*level]); - blocksize = btrfs_level_size(root, *level - 1); - ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); + if (wc->stage == DROP_REFERENCE) { + if (wc->refs[level] > 1) + return 1; - next = read_tree_block(root, bytenr, blocksize, ptr_gen); - btrfs_tree_lock(next); - btrfs_set_lock_blocking(next); + if (path->locks[level] && !wc->keep_locks) { + btrfs_tree_unlock(eb); + path->locks[level] = 0; + } + return 0; + } - ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, - &refs, &flags); + /* wc->stage == UPDATE_BACKREF */ + if (!(wc->flags[level] & flag)) { + BUG_ON(!path->locks[level]); + ret = btrfs_inc_ref(trans, root, eb, 1); BUG_ON(ret); - if (refs > 1) { - parent = path->nodes[*level]; - ret = btrfs_free_extent(trans, root, bytenr, - blocksize, parent->start, - btrfs_header_owner(parent), - *level - 1, 0); + ret = btrfs_dec_ref(trans, root, eb, 0); + BUG_ON(ret); + ret = btrfs_set_disk_extent_flags(trans, root, eb->start, + eb->len, flag, 0); + BUG_ON(ret); + wc->flags[level] |= flag; + } + + /* + * the block is shared by multiple trees, so it's not good to + * keep the tree lock + */ + if (path->locks[level] && level > 0) { + btrfs_tree_unlock(eb); + path->locks[level] = 0; + } + return 0; +} + +/* + * hepler to process tree block while walking up the tree. + * + * when wc->stage == DROP_REFERENCE, this function drops + * reference count on the block. + * + * when wc->stage == UPDATE_BACKREF, this function changes + * wc->stage back to DROP_REFERENCE if we changed wc->stage + * to UPDATE_BACKREF previously while processing the block. + * + * NOTE: return value 1 means we should stop walking up. + */ +static noinline int walk_up_proc(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct walk_control *wc) +{ + int ret = 0; + int level = wc->level; + struct extent_buffer *eb = path->nodes[level]; + u64 parent = 0; + + if (wc->stage == UPDATE_BACKREF) { + BUG_ON(wc->shared_level < level); + if (level < wc->shared_level) + goto out; + + BUG_ON(wc->refs[level] <= 1); + ret = find_next_key(path, level + 1, &wc->update_progress); + if (ret > 0) + wc->update_ref = 0; + + wc->stage = DROP_REFERENCE; + wc->shared_level = -1; + path->slots[level] = 0; + + /* + * check reference count again if the block isn't locked. + * we should start walking down the tree again if reference + * count is one. + */ + if (!path->locks[level]) { + BUG_ON(level == 0); + btrfs_tree_lock(eb); + btrfs_set_lock_blocking(eb); + path->locks[level] = 1; + + ret = btrfs_lookup_extent_info(trans, root, + eb->start, eb->len, + &wc->refs[level], + &wc->flags[level]); BUG_ON(ret); - path->slots[*level]++; - btrfs_tree_unlock(next); - free_extent_buffer(next); - continue; + BUG_ON(wc->refs[level] == 0); + if (wc->refs[level] == 1) { + btrfs_tree_unlock(eb); + path->locks[level] = 0; + return 1; + } + } else { + BUG_ON(level != 0); } + } - BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); + /* wc->stage == DROP_REFERENCE */ + BUG_ON(wc->refs[level] > 1 && !path->locks[level]); - *level = btrfs_header_level(next); - path->nodes[*level] = next; - path->slots[*level] = 0; - path->locks[*level] = 1; - cond_resched(); + if (wc->refs[level] == 1) { + if (level == 0) { + if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) + ret = btrfs_dec_ref(trans, root, eb, 1); + else + ret = btrfs_dec_ref(trans, root, eb, 0); + BUG_ON(ret); + } + /* make block locked assertion in clean_tree_block happy */ + if (!path->locks[level] && + btrfs_header_generation(eb) == trans->transid) { + btrfs_tree_lock(eb); + btrfs_set_lock_blocking(eb); + path->locks[level] = 1; + } + clean_tree_block(trans, root, eb); + } + + if (eb == root->node) { + if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) + parent = eb->start; + else + BUG_ON(root->root_key.objectid != + btrfs_header_owner(eb)); + } else { + if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF) + parent = path->nodes[level + 1]->start; + else + BUG_ON(root->root_key.objectid != + btrfs_header_owner(path->nodes[level + 1])); } -out: - if (path->nodes[*level] == root->node) - parent = path->nodes[*level]; - else - parent = path->nodes[*level + 1]; - bytenr = path->nodes[*level]->start; - blocksize = path->nodes[*level]->len; - ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent->start, - btrfs_header_owner(parent), *level, 0); + ret = btrfs_free_extent(trans, root, eb->start, eb->len, parent, + root->root_key.objectid, level, 0); BUG_ON(ret); +out: + wc->refs[level] = 0; + wc->flags[level] = 0; + return ret; +} + +static noinline int walk_down_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct walk_control *wc) +{ + struct extent_buffer *next; + struct extent_buffer *cur; + u64 bytenr; + u64 ptr_gen; + u32 blocksize; + int level = wc->level; + int ret; + + while (level >= 0) { + cur = path->nodes[level]; + BUG_ON(path->slots[level] >= btrfs_header_nritems(cur)); - if (path->locks[*level]) { - btrfs_tree_unlock(path->nodes[*level]); - path->locks[*level] = 0; + ret = walk_down_proc(trans, root, path, wc); + if (ret > 0) + break; + + if (level == 0) + break; + + bytenr = btrfs_node_blockptr(cur, path->slots[level]); + blocksize = btrfs_level_size(root, level - 1); + ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]); + + next = read_tree_block(root, bytenr, blocksize, ptr_gen); + btrfs_tree_lock(next); + btrfs_set_lock_blocking(next); + + level--; + BUG_ON(level != btrfs_header_level(next)); + path->nodes[level] = next; + path->slots[level] = 0; + path->locks[level] = 1; + wc->level = level; } - free_extent_buffer(path->nodes[*level]); - path->nodes[*level] = NULL; - *level += 1; - cond_resched(); return 0; } -/* - * helper for dropping snapshots. This walks back up the tree in the path - * to find the first node higher up where we haven't yet gone through - * all the slots - */ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, - int *level, int max_level) + struct walk_control *wc, int max_level) { - struct btrfs_root_item *root_item = &root->root_item; - int i; - int slot; + int level = wc->level; int ret; - for (i = *level; i < max_level && path->nodes[i]; i++) { - slot = path->slots[i]; - if (slot + 1 < btrfs_header_nritems(path->nodes[i])) { - /* - * there is more work to do in this level. - * Update the drop_progress marker to reflect - * the work we've done so far, and then bump - * the slot number - */ - path->slots[i]++; - WARN_ON(*level == 0); - if (max_level == BTRFS_MAX_LEVEL) { - btrfs_node_key(path->nodes[i], - &root_item->drop_progress, - path->slots[i]); - root_item->drop_level = i; - } - *level = i; + path->slots[level] = btrfs_header_nritems(path->nodes[level]); + while (level < max_level && path->nodes[level]) { + wc->level = level; + if (path->slots[level] + 1 < + btrfs_header_nritems(path->nodes[level])) { + path->slots[level]++; return 0; } else { - struct extent_buffer *parent; - - /* - * this whole node is done, free our reference - * on it and go up one level - */ - if (path->nodes[*level] == root->node) - parent = path->nodes[*level]; - else - parent = path->nodes[*level + 1]; + ret = walk_up_proc(trans, root, path, wc); + if (ret > 0) + return 0; - clean_tree_block(trans, root, path->nodes[i]); - ret = btrfs_free_extent(trans, root, - path->nodes[i]->start, - path->nodes[i]->len, - parent->start, - btrfs_header_owner(parent), - *level, 0); - BUG_ON(ret); - if (path->locks[*level]) { - btrfs_tree_unlock(path->nodes[i]); - path->locks[i] = 0; + if (path->locks[level]) { + btrfs_tree_unlock(path->nodes[level]); + path->locks[level] = 0; } - free_extent_buffer(path->nodes[i]); - path->nodes[i] = NULL; - *level = i + 1; + free_extent_buffer(path->nodes[level]); + path->nodes[level] = NULL; + level++; } } return 1; } /* - * drop the reference count on the tree rooted at 'snap'. This traverses - * the tree freeing any blocks that have a ref count of zero after being - * decremented. + * drop a subvolume tree. + * + * this function traverses the tree freeing any blocks that only + * referenced by the tree. + * + * when a shared tree block is found. this function decreases its + * reference count by one. if update_ref is true, this function + * also make sure backrefs for the shared block and all lower level + * blocks are properly updated. */ -int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root - *root) +int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) { - int ret = 0; - int wret; - int level; struct btrfs_path *path; - int update_count; + struct btrfs_trans_handle *trans; + struct btrfs_root *tree_root = root->fs_info->tree_root; struct btrfs_root_item *root_item = &root->root_item; + struct walk_control *wc; + struct btrfs_key key; + int err = 0; + int ret; + int level; path = btrfs_alloc_path(); BUG_ON(!path); - level = btrfs_header_level(root->node); + wc = kzalloc(sizeof(*wc), GFP_NOFS); + BUG_ON(!wc); + + trans = btrfs_start_transaction(tree_root, 1); + if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { + level = btrfs_header_level(root->node); path->nodes[level] = btrfs_lock_root_node(root); btrfs_set_lock_blocking(path->nodes[level]); path->slots[level] = 0; path->locks[level] = 1; + memset(&wc->update_progress, 0, + sizeof(wc->update_progress)); } else { - struct btrfs_key key; - struct btrfs_disk_key found_key; - struct extent_buffer *node; - btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); + memcpy(&wc->update_progress, &key, + sizeof(wc->update_progress)); + level = root_item->drop_level; + BUG_ON(level == 0); path->lowest_level = level; - wret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (wret < 0) { - ret = wret; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + path->lowest_level = 0; + if (ret < 0) { + err = ret; goto out; } - node = path->nodes[level]; - btrfs_node_key(node, &found_key, path->slots[level]); - WARN_ON(memcmp(&found_key, &root_item->drop_progress, - sizeof(found_key))); + btrfs_node_key_to_cpu(path->nodes[level], &key, + path->slots[level]); + WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key))); + /* * unlock our path, this is safe because only this * function is allowed to delete this snapshot */ btrfs_unlock_up_safe(path, 0); + + level = btrfs_header_level(root->node); + while (1) { + btrfs_tree_lock(path->nodes[level]); + btrfs_set_lock_blocking(path->nodes[level]); + + ret = btrfs_lookup_extent_info(trans, root, + path->nodes[level]->start, + path->nodes[level]->len, + &wc->refs[level], + &wc->flags[level]); + BUG_ON(ret); + BUG_ON(wc->refs[level] == 0); + + if (level == root_item->drop_level) + break; + + btrfs_tree_unlock(path->nodes[level]); + WARN_ON(wc->refs[level] != 1); + level--; + } } + + wc->level = level; + wc->shared_level = -1; + wc->stage = DROP_REFERENCE; + wc->update_ref = update_ref; + wc->keep_locks = 0; + while (1) { - unsigned long update; - wret = walk_down_tree(trans, root, path, &level); - if (wret > 0) + ret = walk_down_tree(trans, root, path, wc); + if (ret < 0) { + err = ret; break; - if (wret < 0) - ret = wret; + } - wret = walk_up_tree(trans, root, path, &level, - BTRFS_MAX_LEVEL); - if (wret > 0) + ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL); + if (ret < 0) { + err = ret; break; - if (wret < 0) - ret = wret; - if (trans->transaction->in_commit || - trans->transaction->delayed_refs.flushing) { - ret = -EAGAIN; + } + + if (ret > 0) { + BUG_ON(wc->stage != DROP_REFERENCE); break; } - for (update_count = 0; update_count < 16; update_count++) { + + if (wc->stage == DROP_REFERENCE) { + level = wc->level; + btrfs_node_key(path->nodes[level], + &root_item->drop_progress, + path->slots[level]); + root_item->drop_level = level; + } + + BUG_ON(wc->level == 0); + if (trans->transaction->in_commit || + trans->transaction->delayed_refs.flushing) { + ret = btrfs_update_root(trans, tree_root, + &root->root_key, + root_item); + BUG_ON(ret); + + btrfs_end_transaction(trans, tree_root); + trans = btrfs_start_transaction(tree_root, 1); + } else { + unsigned long update; update = trans->delayed_ref_updates; trans->delayed_ref_updates = 0; if (update) - btrfs_run_delayed_refs(trans, root, update); - else - break; + btrfs_run_delayed_refs(trans, tree_root, + update); } } + btrfs_release_path(root, path); + BUG_ON(err); + + ret = btrfs_del_root(trans, tree_root, &root->root_key); + BUG_ON(ret); + + free_extent_buffer(root->node); + free_extent_buffer(root->commit_root); + kfree(root); out: + btrfs_end_transaction(trans, tree_root); + kfree(wc); btrfs_free_path(path); - return ret; + return err; } +/* + * drop subtree rooted at tree block 'node'. + * + * NOTE: this function will unlock and release tree block 'node' + */ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *node, struct extent_buffer *parent) { struct btrfs_path *path; + struct walk_control *wc; int level; int parent_level; int ret = 0; int wret; + BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); + path = btrfs_alloc_path(); BUG_ON(!path); + wc = kzalloc(sizeof(*wc), GFP_NOFS); + BUG_ON(!wc); + btrfs_assert_tree_locked(parent); parent_level = btrfs_header_level(parent); extent_buffer_get(parent); @@ -4817,24 +5024,33 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, btrfs_assert_tree_locked(node); level = btrfs_header_level(node); - extent_buffer_get(node); path->nodes[level] = node; path->slots[level] = 0; + path->locks[level] = 1; + + wc->refs[parent_level] = 1; + wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF; + wc->level = level; + wc->shared_level = -1; + wc->stage = DROP_REFERENCE; + wc->update_ref = 0; + wc->keep_locks = 1; while (1) { - wret = walk_down_tree(trans, root, path, &level); - if (wret < 0) + wret = walk_down_tree(trans, root, path, wc); + if (wret < 0) { ret = wret; - if (wret != 0) break; + } - wret = walk_up_tree(trans, root, path, &level, parent_level); + wret = walk_up_tree(trans, root, path, wc, parent_level); if (wret < 0) ret = wret; if (wret != 0) break; } + kfree(wc); btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 126477e..7c3cd24 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -151,7 +151,10 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, } if (end_pos > isize) { i_size_write(inode, end_pos); - btrfs_update_inode(trans, root, inode); + /* we've only changed i_size in ram, and we haven't updated + * the disk i_size. There is no need to log the inode + * at this time. + */ } err = btrfs_end_transaction(trans, root); out_unlock: diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8612b3a..7ffa3d3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2122,10 +2122,8 @@ static void btrfs_read_locked_inode(struct inode *inode) * any xattrs or acls */ maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino); - if (!maybe_acls) { - BTRFS_I(inode)->i_acl = NULL; - BTRFS_I(inode)->i_default_acl = NULL; - } + if (!maybe_acls) + cache_no_acl(inode); BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, alloc_group_block, 0); @@ -3141,9 +3139,6 @@ static noinline void init_btrfs_i(struct inode *inode) { struct btrfs_inode *bi = BTRFS_I(inode); - bi->i_acl = BTRFS_ACL_NOT_CACHED; - bi->i_default_acl = BTRFS_ACL_NOT_CACHED; - bi->generation = 0; bi->sequence = 0; bi->last_trans = 0; @@ -3585,12 +3580,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, owner = 1; BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, alloc_hint, owner); - if ((mode & S_IFREG)) { - if (btrfs_test_opt(root, NODATASUM)) - BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; - if (btrfs_test_opt(root, NODATACOW)) - BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; - } key[0].objectid = objectid; btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); @@ -3645,6 +3634,13 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, btrfs_inherit_iflags(inode, dir); + if ((mode & S_IFREG)) { + if (btrfs_test_opt(root, NODATASUM)) + BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; + if (btrfs_test_opt(root, NODATACOW)) + BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; + } + insert_inode_hash(inode); inode_tree_add(inode); return inode; @@ -4640,8 +4636,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ei->last_trans = 0; ei->logged_trans = 0; btrfs_ordered_inode_tree_init(&ei->ordered_tree); - ei->i_acl = BTRFS_ACL_NOT_CACHED; - ei->i_default_acl = BTRFS_ACL_NOT_CACHED; INIT_LIST_HEAD(&ei->i_orphan); INIT_LIST_HEAD(&ei->ordered_operations); return &ei->vfs_inode; @@ -4655,13 +4649,6 @@ void btrfs_destroy_inode(struct inode *inode) WARN_ON(!list_empty(&inode->i_dentry)); WARN_ON(inode->i_data.nrpages); - if (BTRFS_I(inode)->i_acl && - BTRFS_I(inode)->i_acl != BTRFS_ACL_NOT_CACHED) - posix_acl_release(BTRFS_I(inode)->i_acl); - if (BTRFS_I(inode)->i_default_acl && - BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED) - posix_acl_release(BTRFS_I(inode)->i_default_acl); - /* * Make sure we're properly removed from the ordered operation * lists. @@ -5096,6 +5083,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, u64 mask = BTRFS_I(inode)->root->sectorsize - 1; struct extent_map *em; struct btrfs_trans_handle *trans; + struct btrfs_root *root; int ret; alloc_start = offset & ~mask; @@ -5114,6 +5102,13 @@ static long btrfs_fallocate(struct inode *inode, int mode, goto out; } + root = BTRFS_I(inode)->root; + + ret = btrfs_check_data_free_space(root, inode, + alloc_end - alloc_start); + if (ret) + goto out; + locked_end = alloc_end - 1; while (1) { struct btrfs_ordered_extent *ordered; @@ -5121,7 +5116,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); if (!trans) { ret = -EIO; - goto out; + goto out_free; } /* the extent lock is ordered inside the running @@ -5182,6 +5177,8 @@ static long btrfs_fallocate(struct inode *inode, int mode, GFP_NOFS); btrfs_end_transaction(trans, BTRFS_I(inode)->root); +out_free: + btrfs_free_reserved_data_space(root, inode, alloc_end - alloc_start); out: mutex_unlock(&inode->i_mutex); return ret; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index eff18f5..9f4db84 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1028,7 +1028,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, struct btrfs_file_extent_item); comp = btrfs_file_extent_compression(leaf, extent); type = btrfs_file_extent_type(leaf, extent); - if (type == BTRFS_FILE_EXTENT_REG) { + if (type == BTRFS_FILE_EXTENT_REG || + type == BTRFS_FILE_EXTENT_PREALLOC) { disko = btrfs_file_extent_disk_bytenr(leaf, extent); diskl = btrfs_file_extent_disk_num_bytes(leaf, @@ -1051,7 +1052,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, new_key.objectid = inode->i_ino; new_key.offset = key.offset + destoff - off; - if (type == BTRFS_FILE_EXTENT_REG) { + if (type == BTRFS_FILE_EXTENT_REG || + type == BTRFS_FILE_EXTENT_PREALLOC) { ret = btrfs_insert_empty_item(trans, root, path, &new_key, size); if (ret) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index b23dc20..0083979 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1788,7 +1788,7 @@ static void merge_func(struct btrfs_work *work) btrfs_end_transaction(trans, root); } - btrfs_drop_dead_root(reloc_root); + btrfs_drop_snapshot(reloc_root, 0); if (atomic_dec_and_test(async->num_pending)) complete(async->done); @@ -2075,9 +2075,6 @@ static int do_relocation(struct btrfs_trans_handle *trans, ret = btrfs_drop_subtree(trans, root, eb, upper->eb); BUG_ON(ret); - - btrfs_tree_unlock(eb); - free_extent_buffer(eb); } if (!lowest) { btrfs_tree_unlock(upper->eb); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 4e83457..2dbf1c1 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -593,6 +593,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) return 0; } +#if 0 /* * when dropping snapshots, we generate a ton of delayed refs, and it makes * sense not to join the transaction while it is trying to flush the current @@ -681,6 +682,7 @@ int btrfs_drop_dead_root(struct btrfs_root *root) btrfs_btree_balance_dirty(tree_root, nr); return ret; } +#endif /* * new snapshots need to be created at a very specific time in the @@ -1081,7 +1083,7 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) while (!list_empty(&list)) { root = list_entry(list.next, struct btrfs_root, root_list); list_del_init(&root->root_list); - btrfs_drop_dead_root(root); + btrfs_drop_snapshot(root, 0); } return 0; } diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index b486898..3a9b7a5 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -5,7 +5,7 @@ client generated ones by default (mount option "serverino" turned on by default if server supports it). Add forceuid and forcegid mount options (so that when negotiating unix extensions specifying which uid mounted does not immediately force the server's reported -uids to be overridden). +uids to be overridden). Add support for scope moutn parm. Version 1.58 ------------ diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c index 1b09f16..20692fb 100644 --- a/fs/cifs/asn1.c +++ b/fs/cifs/asn1.c @@ -49,6 +49,7 @@ #define ASN1_OJI 6 /* Object Identifier */ #define ASN1_OJD 7 /* Object Description */ #define ASN1_EXT 8 /* External */ +#define ASN1_ENUM 10 /* Enumerated */ #define ASN1_SEQ 16 /* Sequence */ #define ASN1_SET 17 /* Set */ #define ASN1_NUMSTR 18 /* Numerical String */ @@ -78,10 +79,12 @@ #define SPNEGO_OID_LEN 7 #define NTLMSSP_OID_LEN 10 #define KRB5_OID_LEN 7 +#define KRB5U2U_OID_LEN 8 #define MSKRB5_OID_LEN 7 static unsigned long SPNEGO_OID[7] = { 1, 3, 6, 1, 5, 5, 2 }; static unsigned long NTLMSSP_OID[10] = { 1, 3, 6, 1, 4, 1, 311, 2, 2, 10 }; static unsigned long KRB5_OID[7] = { 1, 2, 840, 113554, 1, 2, 2 }; +static unsigned long KRB5U2U_OID[8] = { 1, 2, 840, 113554, 1, 2, 2, 3 }; static unsigned long MSKRB5_OID[7] = { 1, 2, 840, 48018, 1, 2, 2 }; /* @@ -122,6 +125,28 @@ asn1_octet_decode(struct asn1_ctx *ctx, unsigned char *ch) return 1; } +#if 0 /* will be needed later by spnego decoding/encoding of ntlmssp */ +static unsigned char +asn1_enum_decode(struct asn1_ctx *ctx, __le32 *val) +{ + unsigned char ch; + + if (ctx->pointer >= ctx->end) { + ctx->error = ASN1_ERR_DEC_EMPTY; + return 0; + } + + ch = *(ctx->pointer)++; /* ch has 0xa, ptr points to lenght octet */ + if ((ch) == ASN1_ENUM) /* if ch value is ENUM, 0xa */ + *val = *(++(ctx->pointer)); /* value has enum value */ + else + return 0; + + ctx->pointer++; + return 1; +} +#endif + static unsigned char asn1_tag_decode(struct asn1_ctx *ctx, unsigned int *tag) { @@ -476,10 +501,9 @@ decode_negTokenInit(unsigned char *security_blob, int length, unsigned int cls, con, tag, oidlen, rc; bool use_ntlmssp = false; bool use_kerberos = false; + bool use_kerberosu2u = false; bool use_mskerberos = false; - *secType = NTLM; /* BB eventually make Kerberos or NLTMSSP the default*/ - /* cifs_dump_mem(" Received SecBlob ", security_blob, length); */ asn1_open(&ctx, security_blob, length); @@ -515,6 +539,7 @@ decode_negTokenInit(unsigned char *security_blob, int length, return 0; } + /* SPNEGO */ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { cFYI(1, ("Error decoding negTokenInit")); return 0; @@ -526,6 +551,7 @@ decode_negTokenInit(unsigned char *security_blob, int length, return 0; } + /* negTokenInit */ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { cFYI(1, ("Error decoding negTokenInit")); return 0; @@ -537,6 +563,7 @@ decode_negTokenInit(unsigned char *security_blob, int length, return 0; } + /* sequence */ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { cFYI(1, ("Error decoding 2nd part of negTokenInit")); return 0; @@ -548,6 +575,7 @@ decode_negTokenInit(unsigned char *security_blob, int length, return 0; } + /* sequence of */ if (asn1_header_decode (&ctx, &sequence_end, &cls, &con, &tag) == 0) { cFYI(1, ("Error decoding 2nd part of negTokenInit")); @@ -560,6 +588,7 @@ decode_negTokenInit(unsigned char *security_blob, int length, return 0; } + /* list of security mechanisms */ while (!asn1_eoc_decode(&ctx, sequence_end)) { rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag); if (!rc) { @@ -576,11 +605,15 @@ decode_negTokenInit(unsigned char *security_blob, int length, if (compare_oid(oid, oidlen, MSKRB5_OID, MSKRB5_OID_LEN) && - !use_kerberos) + !use_mskerberos) use_mskerberos = true; + else if (compare_oid(oid, oidlen, KRB5U2U_OID, + KRB5U2U_OID_LEN) && + !use_kerberosu2u) + use_kerberosu2u = true; else if (compare_oid(oid, oidlen, KRB5_OID, KRB5_OID_LEN) && - !use_mskerberos) + !use_kerberos) use_kerberos = true; else if (compare_oid(oid, oidlen, NTLMSSP_OID, NTLMSSP_OID_LEN)) @@ -593,7 +626,12 @@ decode_negTokenInit(unsigned char *security_blob, int length, } } + /* mechlistMIC */ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { + /* Check if we have reached the end of the blob, but with + no mechListMic (e.g. NTLMSSP instead of KRB5) */ + if (ctx.error == ASN1_ERR_DEC_EMPTY) + goto decode_negtoken_exit; cFYI(1, ("Error decoding last part negTokenInit exit3")); return 0; } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { @@ -602,6 +640,8 @@ decode_negTokenInit(unsigned char *security_blob, int length, cls, con, tag, end, *end)); return 0; } + + /* sequence */ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { cFYI(1, ("Error decoding last part negTokenInit exit5")); return 0; @@ -611,6 +651,7 @@ decode_negTokenInit(unsigned char *security_blob, int length, cls, con, tag, end, *end)); } + /* sequence of */ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { cFYI(1, ("Error decoding last part negTokenInit exit 7")); return 0; @@ -619,6 +660,8 @@ decode_negTokenInit(unsigned char *security_blob, int length, cls, con, tag, end, *end)); return 0; } + + /* general string */ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { cFYI(1, ("Error decoding last part negTokenInit exit9")); return 0; @@ -630,13 +673,13 @@ decode_negTokenInit(unsigned char *security_blob, int length, } cFYI(1, ("Need to call asn1_octets_decode() function for %s", ctx.pointer)); /* is this UTF-8 or ASCII? */ - +decode_negtoken_exit: if (use_kerberos) *secType = Kerberos; else if (use_mskerberos) *secType = MSKerberos; else if (use_ntlmssp) - *secType = NTLMSSP; + *secType = RawNTLMSSP; return 1; } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 0d92114..9f669f9 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -333,6 +333,27 @@ cifs_destroy_inode(struct inode *inode) kmem_cache_free(cifs_inode_cachep, CIFS_I(inode)); } +static void +cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server) +{ + seq_printf(s, ",addr="); + + switch (server->addr.sockAddr.sin_family) { + case AF_INET: + seq_printf(s, "%pI4", &server->addr.sockAddr.sin_addr.s_addr); + break; + case AF_INET6: + seq_printf(s, "%pI6", + &server->addr.sockAddr6.sin6_addr.s6_addr); + if (server->addr.sockAddr6.sin6_scope_id) + seq_printf(s, "%%%u", + server->addr.sockAddr6.sin6_scope_id); + break; + default: + seq_printf(s, "(unknown)"); + } +} + /* * cifs_show_options() is for displaying mount options in /proc/mounts. * Not all settable options are displayed but most of the important @@ -343,83 +364,64 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m) { struct cifs_sb_info *cifs_sb; struct cifsTconInfo *tcon; - struct TCP_Server_Info *server; cifs_sb = CIFS_SB(m->mnt_sb); + tcon = cifs_sb->tcon; - if (cifs_sb) { - tcon = cifs_sb->tcon; - if (tcon) { - seq_printf(s, ",unc=%s", cifs_sb->tcon->treeName); - if (tcon->ses) { - if (tcon->ses->userName) - seq_printf(s, ",username=%s", - tcon->ses->userName); - if (tcon->ses->domainName) - seq_printf(s, ",domain=%s", - tcon->ses->domainName); - server = tcon->ses->server; - if (server) { - seq_printf(s, ",addr="); - switch (server->addr.sockAddr6. - sin6_family) { - case AF_INET6: - seq_printf(s, "%pI6", - &server->addr.sockAddr6.sin6_addr); - break; - case AF_INET: - seq_printf(s, "%pI4", - &server->addr.sockAddr.sin_addr.s_addr); - break; - } - } - } - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) || - !(tcon->unix_ext)) - seq_printf(s, ",uid=%d", cifs_sb->mnt_uid); - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) || - !(tcon->unix_ext)) - seq_printf(s, ",gid=%d", cifs_sb->mnt_gid); - if (!tcon->unix_ext) { - seq_printf(s, ",file_mode=0%o,dir_mode=0%o", + seq_printf(s, ",unc=%s", cifs_sb->tcon->treeName); + if (tcon->ses->userName) + seq_printf(s, ",username=%s", tcon->ses->userName); + if (tcon->ses->domainName) + seq_printf(s, ",domain=%s", tcon->ses->domainName); + + seq_printf(s, ",uid=%d", cifs_sb->mnt_uid); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) + seq_printf(s, ",forceuid"); + + seq_printf(s, ",gid=%d", cifs_sb->mnt_gid); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) + seq_printf(s, ",forcegid"); + + cifs_show_address(s, tcon->ses->server); + + if (!tcon->unix_ext) + seq_printf(s, ",file_mode=0%o,dir_mode=0%o", cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode); - } - if (tcon->seal) - seq_printf(s, ",seal"); - if (tcon->nocase) - seq_printf(s, ",nocase"); - if (tcon->retry) - seq_printf(s, ",hard"); - } - if (cifs_sb->prepath) - seq_printf(s, ",prepath=%s", cifs_sb->prepath); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) - seq_printf(s, ",posixpaths"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) - seq_printf(s, ",setuids"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) - seq_printf(s, ",serverino"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) - seq_printf(s, ",directio"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) - seq_printf(s, ",nouser_xattr"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR) - seq_printf(s, ",mapchars"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) - seq_printf(s, ",sfu"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) - seq_printf(s, ",nobrl"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) - seq_printf(s, ",cifsacl"); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) - seq_printf(s, ",dynperm"); - if (m->mnt_sb->s_flags & MS_POSIXACL) - seq_printf(s, ",acl"); - - seq_printf(s, ",rsize=%d", cifs_sb->rsize); - seq_printf(s, ",wsize=%d", cifs_sb->wsize); - } + if (tcon->seal) + seq_printf(s, ",seal"); + if (tcon->nocase) + seq_printf(s, ",nocase"); + if (tcon->retry) + seq_printf(s, ",hard"); + if (cifs_sb->prepath) + seq_printf(s, ",prepath=%s", cifs_sb->prepath); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) + seq_printf(s, ",posixpaths"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) + seq_printf(s, ",setuids"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) + seq_printf(s, ",serverino"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) + seq_printf(s, ",directio"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) + seq_printf(s, ",nouser_xattr"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR) + seq_printf(s, ",mapchars"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) + seq_printf(s, ",sfu"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) + seq_printf(s, ",nobrl"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) + seq_printf(s, ",cifsacl"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) + seq_printf(s, ",dynperm"); + if (m->mnt_sb->s_flags & MS_POSIXACL) + seq_printf(s, ",acl"); + + seq_printf(s, ",rsize=%d", cifs_sb->rsize); + seq_printf(s, ",wsize=%d", cifs_sb->wsize); + return 0; } @@ -535,9 +537,14 @@ static void cifs_umount_begin(struct super_block *sb) if (tcon == NULL) return; - lock_kernel(); read_lock(&cifs_tcp_ses_lock); - if (tcon->tc_count == 1) + if ((tcon->tc_count > 1) || (tcon->tidStatus == CifsExiting)) { + /* we have other mounts to same share or we have + already tried to force umount this and woken up + all waiting network requests, nothing to do */ + read_unlock(&cifs_tcp_ses_lock); + return; + } else if (tcon->tc_count == 1) tcon->tidStatus = CifsExiting; read_unlock(&cifs_tcp_ses_lock); @@ -552,9 +559,7 @@ static void cifs_umount_begin(struct super_block *sb) wake_up_all(&tcon->ses->server->response_q); msleep(1); } -/* BB FIXME - finish add checks for tidStatus BB */ - unlock_kernel(); return; } diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index a61ab77..e1225e6 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -83,7 +83,7 @@ enum securityEnum { NTLM, /* Legacy NTLM012 auth with NTLM hash */ NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */ RawNTLMSSP, /* NTLMSSP without SPNEGO, NTLMv2 hash */ - NTLMSSP, /* NTLMSSP via SPNEGO, NTLMv2 hash */ +/* NTLMSSP, */ /* can use rawNTLMSSP instead of NTLMSSP via SPNEGO */ Kerberos, /* Kerberos via SPNEGO */ MSKerberos, /* MS Kerberos via SPNEGO */ }; diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index f945232..c419416 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -74,7 +74,7 @@ extern unsigned int smbCalcSize(struct smb_hdr *ptr); extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr); extern int decode_negTokenInit(unsigned char *security_blob, int length, enum securityEnum *secType); -extern int cifs_inet_pton(const int, const char *source, void *dst); +extern int cifs_convert_address(char *src, void *dst); extern int map_smb_to_linux_error(struct smb_hdr *smb, int logErr); extern void header_assemble(struct smb_hdr *, char /* command */ , const struct cifsTconInfo *, int /* length of diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index b84c61d..61007c6 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -594,7 +594,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) else if (secFlags & CIFSSEC_MAY_KRB5) server->secType = Kerberos; else if (secFlags & CIFSSEC_MAY_NTLMSSP) - server->secType = NTLMSSP; + server->secType = RawNTLMSSP; else if (secFlags & CIFSSEC_MAY_LANMAN) server->secType = LANMAN; /* #ifdef CONFIG_CIFS_EXPERIMENTAL @@ -729,7 +729,7 @@ CIFSSMBTDis(const int xid, struct cifsTconInfo *tcon) * the tcon is no longer on the list, so no need to take lock before * checking this. */ - if (tcon->need_reconnect) + if ((tcon->need_reconnect) || (tcon->ses->need_reconnect)) return 0; rc = small_smb_init(SMB_COM_TREE_DISCONNECT, 0, tcon, diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 97f4311..e16d759 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -70,7 +70,6 @@ struct smb_vol { mode_t file_mode; mode_t dir_mode; unsigned secFlg; - bool rw:1; bool retry:1; bool intr:1; bool setuids:1; @@ -832,7 +831,6 @@ cifs_parse_mount_options(char *options, const char *devname, vol->dir_mode = vol->file_mode = S_IRUGO | S_IXUGO | S_IWUSR; /* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */ - vol->rw = true; /* default is always to request posix paths. */ vol->posix_paths = 1; /* default to using server inode numbers where available */ @@ -1199,7 +1197,9 @@ cifs_parse_mount_options(char *options, const char *devname, } else if (strnicmp(data, "guest", 5) == 0) { /* ignore */ } else if (strnicmp(data, "rw", 2) == 0) { - vol->rw = true; + /* ignore */ + } else if (strnicmp(data, "ro", 2) == 0) { + /* ignore */ } else if (strnicmp(data, "noblocksend", 11) == 0) { vol->noblocksnd = 1; } else if (strnicmp(data, "noautotune", 10) == 0) { @@ -1218,8 +1218,6 @@ cifs_parse_mount_options(char *options, const char *devname, parse these options again and set anything and it is ok to just ignore them */ continue; - } else if (strnicmp(data, "ro", 2) == 0) { - vol->rw = false; } else if (strnicmp(data, "hard", 4) == 0) { vol->retry = 1; } else if (strnicmp(data, "soft", 4) == 0) { @@ -1386,8 +1384,10 @@ cifs_find_tcp_session(struct sockaddr_storage *addr) server->addr.sockAddr.sin_addr.s_addr)) continue; else if (addr->ss_family == AF_INET6 && - !ipv6_addr_equal(&server->addr.sockAddr6.sin6_addr, - &addr6->sin6_addr)) + (!ipv6_addr_equal(&server->addr.sockAddr6.sin6_addr, + &addr6->sin6_addr) || + server->addr.sockAddr6.sin6_scope_id != + addr6->sin6_scope_id)) continue; ++server->srv_count; @@ -1433,28 +1433,15 @@ cifs_get_tcp_session(struct smb_vol *volume_info) memset(&addr, 0, sizeof(struct sockaddr_storage)); - if (volume_info->UNCip && volume_info->UNC) { - rc = cifs_inet_pton(AF_INET, volume_info->UNCip, - &sin_server->sin_addr.s_addr); - - if (rc <= 0) { - /* not ipv4 address, try ipv6 */ - rc = cifs_inet_pton(AF_INET6, volume_info->UNCip, - &sin_server6->sin6_addr.in6_u); - if (rc > 0) - addr.ss_family = AF_INET6; - } else { - addr.ss_family = AF_INET; - } + cFYI(1, ("UNC: %s ip: %s", volume_info->UNC, volume_info->UNCip)); - if (rc <= 0) { + if (volume_info->UNCip && volume_info->UNC) { + rc = cifs_convert_address(volume_info->UNCip, &addr); + if (!rc) { /* we failed translating address */ rc = -EINVAL; goto out_err; } - - cFYI(1, ("UNC: %s ip: %s", volume_info->UNC, - volume_info->UNCip)); } else if (volume_info->UNCip) { /* BB using ip addr as tcp_ses name to connect to the DFS root below */ @@ -1513,14 +1500,14 @@ cifs_get_tcp_session(struct smb_vol *volume_info) cFYI(1, ("attempting ipv6 connect")); /* BB should we allow ipv6 on port 139? */ /* other OS never observed in Wild doing 139 with v6 */ + sin_server6->sin6_port = htons(volume_info->port); memcpy(&tcp_ses->addr.sockAddr6, sin_server6, sizeof(struct sockaddr_in6)); - sin_server6->sin6_port = htons(volume_info->port); rc = ipv6_connect(tcp_ses); } else { + sin_server->sin_port = htons(volume_info->port); memcpy(&tcp_ses->addr.sockAddr, sin_server, sizeof(struct sockaddr_in)); - sin_server->sin_port = htons(volume_info->port); rc = ipv4_connect(tcp_ses); } if (rc < 0) { diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 3758965d..7dc6b74 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -307,8 +307,9 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, full_path = build_path_from_dentry(direntry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } if (oplockEnabled) @@ -540,8 +541,9 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); if (buf == NULL) { kfree(full_path); + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } rc = CIFSSMBOpen(xid, pTcon, full_path, diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index df4a306..8794814 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c @@ -35,26 +35,11 @@ * 0 - name is not IP */ static int -is_ip(const char *name) +is_ip(char *name) { - int rc; - struct sockaddr_in sin_server; - struct sockaddr_in6 sin_server6; - - rc = cifs_inet_pton(AF_INET, name, - &sin_server.sin_addr.s_addr); - - if (rc <= 0) { - /* not ipv4 address, try ipv6 */ - rc = cifs_inet_pton(AF_INET6, name, - &sin_server6.sin6_addr.in6_u); - if (rc > 0) - return 1; - } else { - return 1; - } - /* we failed translating address */ - return 0; + struct sockaddr_storage ss; + + return cifs_convert_address(name, &ss); } static int @@ -72,7 +57,7 @@ dns_resolver_instantiate(struct key *key, const void *data, ip[datalen] = '\0'; /* make sure this looks like an address */ - if (!is_ip((const char *) ip)) { + if (!is_ip(ip)) { kfree(ip); return -EINVAL; } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 0686684..97ce4bf 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -300,14 +300,16 @@ int cifs_open(struct inode *inode, struct file *file) pCifsInode = CIFS_I(file->f_path.dentry->d_inode); pCifsFile = cifs_fill_filedata(file); if (pCifsFile) { + rc = 0; FreeXid(xid); - return 0; + return rc; } full_path = build_path_from_dentry(file->f_path.dentry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } cFYI(1, ("inode = 0x%p file flags are 0x%x for %s", @@ -491,11 +493,12 @@ static int cifs_reopen_file(struct file *file, bool can_flush) return -EBADF; xid = GetXid(); - mutex_unlock(&pCifsFile->fh_mutex); + mutex_lock(&pCifsFile->fh_mutex); if (!pCifsFile->invalidHandle) { - mutex_lock(&pCifsFile->fh_mutex); + mutex_unlock(&pCifsFile->fh_mutex); + rc = 0; FreeXid(xid); - return 0; + return rc; } if (file->f_path.dentry == NULL) { @@ -524,7 +527,7 @@ static int cifs_reopen_file(struct file *file, bool can_flush) if (full_path == NULL) { rc = -ENOMEM; reopen_error_exit: - mutex_lock(&pCifsFile->fh_mutex); + mutex_unlock(&pCifsFile->fh_mutex); FreeXid(xid); return rc; } @@ -566,14 +569,14 @@ reopen_error_exit: cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); if (rc) { - mutex_lock(&pCifsFile->fh_mutex); + mutex_unlock(&pCifsFile->fh_mutex); cFYI(1, ("cifs_open returned 0x%x", rc)); cFYI(1, ("oplock: %d", oplock)); } else { reopen_success: pCifsFile->netfid = netfid; pCifsFile->invalidHandle = false; - mutex_lock(&pCifsFile->fh_mutex); + mutex_unlock(&pCifsFile->fh_mutex); pCifsInode = CIFS_I(inode); if (pCifsInode) { if (can_flush) { @@ -845,8 +848,9 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) tcon = cifs_sb->tcon; if (file->private_data == NULL) { + rc = -EBADF; FreeXid(xid); - return -EBADF; + return rc; } netfid = ((struct cifsFileInfo *)file->private_data)->netfid; @@ -1805,8 +1809,9 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data, pTcon = cifs_sb->tcon; if (file->private_data == NULL) { + rc = -EBADF; FreeXid(xid); - return -EBADF; + return rc; } open_file = (struct cifsFileInfo *)file->private_data; @@ -1885,8 +1890,9 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, pTcon = cifs_sb->tcon; if (file->private_data == NULL) { + rc = -EBADF; FreeXid(xid); - return -EBADF; + return rc; } open_file = (struct cifsFileInfo *)file->private_data; @@ -2019,8 +2025,9 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, xid = GetXid(); if (file->private_data == NULL) { + rc = -EBADF; FreeXid(xid); - return -EBADF; + return rc; } open_file = (struct cifsFileInfo *)file->private_data; cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); @@ -2185,8 +2192,9 @@ static int cifs_readpage(struct file *file, struct page *page) xid = GetXid(); if (file->private_data == NULL) { + rc = -EBADF; FreeXid(xid); - return -EBADF; + return rc; } cFYI(1, ("readpage %p at offset %d 0x%x\n", diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index fad882b..155c9e7 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -988,8 +988,9 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) * sb->s_vfs_rename_mutex here */ full_path = build_path_from_dentry(dentry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } if ((tcon->ses->capabilities & CAP_UNIX) && @@ -1118,8 +1119,9 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) full_path = build_path_from_dentry(direntry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } if ((pTcon->ses->capabilities & CAP_UNIX) && @@ -1303,8 +1305,9 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry) full_path = build_path_from_dentry(direntry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } rc = CIFSSMBRmDir(xid, pTcon, full_path, cifs_sb->local_nls, @@ -1508,8 +1511,9 @@ int cifs_revalidate(struct dentry *direntry) since that would deadlock */ full_path = build_path_from_dentry(direntry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } cFYI(1, ("Revalidate: %s inode 0x%p count %d dentry: 0x%p d_time %ld " "jiffies %ld", full_path, direntry->d_inode, @@ -1911,8 +1915,9 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) full_path = build_path_from_dentry(direntry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } /* diff --git a/fs/cifs/link.c b/fs/cifs/link.c index cd83c53..fc1e048 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -172,8 +172,9 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname) full_path = build_path_from_dentry(direntry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } cFYI(1, ("Full path: %s", full_path)); diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index 32d6baa..bd6d689 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -133,10 +133,12 @@ static const struct smb_to_posix_error mapping_table_ERRHRD[] = { {0, 0} }; -/* Convert string containing dotted ip address to binary form */ -/* returns 0 if invalid address */ - -int +/* + * Convert a string containing text IPv4 or IPv6 address to binary form. + * + * Returns 0 on failure. + */ +static int cifs_inet_pton(const int address_family, const char *cp, void *dst) { int ret = 0; @@ -153,6 +155,52 @@ cifs_inet_pton(const int address_family, const char *cp, void *dst) return ret; } +/* + * Try to convert a string to an IPv4 address and then attempt to convert + * it to an IPv6 address if that fails. Set the family field if either + * succeeds. If it's an IPv6 address and it has a '%' sign in it, try to + * treat the part following it as a numeric sin6_scope_id. + * + * Returns 0 on failure. + */ +int +cifs_convert_address(char *src, void *dst) +{ + int rc; + char *pct, *endp; + struct sockaddr_in *s4 = (struct sockaddr_in *) dst; + struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) dst; + + /* IPv4 address */ + if (cifs_inet_pton(AF_INET, src, &s4->sin_addr.s_addr)) { + s4->sin_family = AF_INET; + return 1; + } + + /* temporarily terminate string */ + pct = strchr(src, '%'); + if (pct) + *pct = '\0'; + + rc = cifs_inet_pton(AF_INET6, src, &s6->sin6_addr.s6_addr); + + /* repair temp termination (if any) and make pct point to scopeid */ + if (pct) + *pct++ = '%'; + + if (!rc) + return rc; + + s6->sin6_family = AF_INET6; + if (pct) { + s6->sin6_scope_id = (u32) simple_strtoul(pct, &endp, 0); + if (!*pct || *endp) + return 0; + } + + return rc; +} + /***************************************************************************** convert a NT status code to a dos class/code *****************************************************************************/ diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 897a052..7085a62 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -802,7 +802,7 @@ ssetup_ntlmssp_authenticate: #endif /* CONFIG_CIFS_UPCALL */ } else { #ifdef CONFIG_CIFS_EXPERIMENTAL - if ((experimEnabled > 1) && (type == RawNTLMSSP)) { + if (type == RawNTLMSSP) { if ((pSMB->req.hdr.Flags2 & SMBFLG2_UNICODE) == 0) { cERROR(1, ("NTLMSSP requires Unicode support")); rc = -ENOSYS; diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index e9527ee..a75afa3 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -64,8 +64,9 @@ int cifs_removexattr(struct dentry *direntry, const char *ea_name) full_path = build_path_from_dentry(direntry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } if (ea_name == NULL) { cFYI(1, ("Null xattr names not supported")); @@ -118,8 +119,9 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name, full_path = build_path_from_dentry(direntry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } /* return dos attributes as pseudo xattr */ /* return alt name if available as pseudo attr */ @@ -225,8 +227,9 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, full_path = build_path_from_dentry(direntry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } /* return dos attributes as pseudo xattr */ /* return alt name if available as pseudo attr */ @@ -351,8 +354,9 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size) full_path = build_path_from_dentry(direntry); if (full_path == NULL) { + rc = -ENOMEM; FreeXid(xid); - return -ENOMEM; + return rc; } /* return dos attributes as pseudo xattr */ /* return alt name if available as pseudo attr */ diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index c5ded5f..626c748 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -31,6 +31,7 @@ #include <linux/skbuff.h> #include <linux/netlink.h> #include <linux/vt.h> +#include <linux/falloc.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/ppp_defs.h> @@ -94,7 +95,6 @@ #include <linux/atm_tcp.h> #include <linux/sonet.h> #include <linux/atm_suni.h> -#include <linux/mtd/mtd.h> #include <linux/usb.h> #include <linux/usbdevice_fs.h> @@ -1405,46 +1405,6 @@ static int ioc_settimeout(unsigned int fd, unsigned int cmd, unsigned long arg) #define HIDPGETCONNLIST _IOR('H', 210, int) #define HIDPGETCONNINFO _IOR('H', 211, int) -struct mtd_oob_buf32 { - u_int32_t start; - u_int32_t length; - compat_caddr_t ptr; /* unsigned char* */ -}; - -#define MEMWRITEOOB32 _IOWR('M',3,struct mtd_oob_buf32) -#define MEMREADOOB32 _IOWR('M',4,struct mtd_oob_buf32) - -static int mtd_rw_oob(unsigned int fd, unsigned int cmd, unsigned long arg) -{ - struct mtd_oob_buf __user *buf = compat_alloc_user_space(sizeof(*buf)); - struct mtd_oob_buf32 __user *buf32 = compat_ptr(arg); - u32 data; - char __user *datap; - unsigned int real_cmd; - int err; - - real_cmd = (cmd == MEMREADOOB32) ? - MEMREADOOB : MEMWRITEOOB; - - if (copy_in_user(&buf->start, &buf32->start, - 2 * sizeof(u32)) || - get_user(data, &buf32->ptr)) - return -EFAULT; - datap = compat_ptr(data); - if (put_user(datap, &buf->ptr)) - return -EFAULT; - - err = sys_ioctl(fd, real_cmd, (unsigned long) buf); - - if (!err) { - if (copy_in_user(&buf32->start, &buf->start, - 2 * sizeof(u32))) - err = -EFAULT; - } - - return err; -} - #ifdef CONFIG_BLOCK struct raw32_config_request { @@ -1820,6 +1780,41 @@ lp_timeout_trans(unsigned int fd, unsigned int cmd, unsigned long arg) return sys_ioctl(fd, cmd, (unsigned long)tn); } +/* on ia32 l_start is on a 32-bit boundary */ +#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) +struct space_resv_32 { + __s16 l_type; + __s16 l_whence; + __s64 l_start __attribute__((packed)); + /* len == 0 means until end of file */ + __s64 l_len __attribute__((packed)); + __s32 l_sysid; + __u32 l_pid; + __s32 l_pad[4]; /* reserve area */ +}; + +#define FS_IOC_RESVSP_32 _IOW ('X', 40, struct space_resv_32) +#define FS_IOC_RESVSP64_32 _IOW ('X', 42, struct space_resv_32) + +/* just account for different alignment */ +static int compat_ioctl_preallocate(struct file *file, unsigned long arg) +{ + struct space_resv_32 __user *p32 = (void __user *)arg; + struct space_resv __user *p = compat_alloc_user_space(sizeof(*p)); + + if (copy_in_user(&p->l_type, &p32->l_type, sizeof(s16)) || + copy_in_user(&p->l_whence, &p32->l_whence, sizeof(s16)) || + copy_in_user(&p->l_start, &p32->l_start, sizeof(s64)) || + copy_in_user(&p->l_len, &p32->l_len, sizeof(s64)) || + copy_in_user(&p->l_sysid, &p32->l_sysid, sizeof(s32)) || + copy_in_user(&p->l_pid, &p32->l_pid, sizeof(u32)) || + copy_in_user(&p->l_pad, &p32->l_pad, 4*sizeof(u32))) + return -EFAULT; + + return ioctl_preallocate(file, p); +} +#endif + typedef int (*ioctl_trans_handler_t)(unsigned int, unsigned int, unsigned long, struct file *); @@ -2426,15 +2421,6 @@ COMPATIBLE_IOCTL(USBDEVFS_SUBMITURB32) COMPATIBLE_IOCTL(USBDEVFS_REAPURB32) COMPATIBLE_IOCTL(USBDEVFS_REAPURBNDELAY32) COMPATIBLE_IOCTL(USBDEVFS_CLEAR_HALT) -/* MTD */ -COMPATIBLE_IOCTL(MEMGETINFO) -COMPATIBLE_IOCTL(MEMERASE) -COMPATIBLE_IOCTL(MEMLOCK) -COMPATIBLE_IOCTL(MEMUNLOCK) -COMPATIBLE_IOCTL(MEMGETREGIONCOUNT) -COMPATIBLE_IOCTL(MEMGETREGIONINFO) -COMPATIBLE_IOCTL(MEMGETBADBLOCK) -COMPATIBLE_IOCTL(MEMSETBADBLOCK) /* NBD */ ULONG_IOCTL(NBD_SET_SOCK) ULONG_IOCTL(NBD_SET_BLKSIZE) @@ -2544,8 +2530,6 @@ COMPATIBLE_IOCTL(JSIOCGBUTTONS) COMPATIBLE_IOCTL(JSIOCGNAME(0)) /* now things that need handlers */ -HANDLE_IOCTL(MEMREADOOB32, mtd_rw_oob) -HANDLE_IOCTL(MEMWRITEOOB32, mtd_rw_oob) #ifdef CONFIG_NET HANDLE_IOCTL(SIOCGIFNAME, dev_ifname32) HANDLE_IOCTL(SIOCGIFCONF, dev_ifconf) @@ -2808,6 +2792,18 @@ asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, case FIOQSIZE: break; +#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) + case FS_IOC_RESVSP_32: + case FS_IOC_RESVSP64_32: + error = compat_ioctl_preallocate(filp, arg); + goto out_fput; +#else + case FS_IOC_RESVSP: + case FS_IOC_RESVSP64: + error = ioctl_preallocate(filp, (void __user *)arg); + goto out_fput; +#endif + case FIBMAP: case FIGETBSZ: case FIONREAD: diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 9b1d285..75efb02 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -423,7 +423,6 @@ static void devpts_kill_sb(struct super_block *sb) } static struct file_system_type devpts_fs_type = { - .owner = THIS_MODULE, .name = "devpts", .get_sb = devpts_get_sb, .kill_sb = devpts_kill_sb, @@ -564,13 +563,4 @@ static int __init init_devpts_fs(void) } return err; } - -static void __exit exit_devpts_fs(void) -{ - unregister_filesystem(&devpts_fs_type); - mntput(devpts_mnt); -} - module_init(init_devpts_fs) -module_exit(exit_devpts_fs) -MODULE_LICENSE("GPL"); diff --git a/fs/eventfd.c b/fs/eventfd.c index 3f0e197..31d12de8 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -14,35 +14,44 @@ #include <linux/list.h> #include <linux/spinlock.h> #include <linux/anon_inodes.h> -#include <linux/eventfd.h> #include <linux/syscalls.h> #include <linux/module.h> +#include <linux/kref.h> +#include <linux/eventfd.h> struct eventfd_ctx { + struct kref kref; wait_queue_head_t wqh; /* * Every time that a write(2) is performed on an eventfd, the * value of the __u64 being written is added to "count" and a * wakeup is performed on "wqh". A read(2) will return the "count" * value to userspace, and will reset "count" to zero. The kernel - * size eventfd_signal() also, adds to the "count" counter and + * side eventfd_signal() also, adds to the "count" counter and * issue a wakeup. */ __u64 count; unsigned int flags; }; -/* - * Adds "n" to the eventfd counter "count". Returns "n" in case of - * success, or a value lower then "n" in case of coutner overflow. - * This function is supposed to be called by the kernel in paths - * that do not allow sleeping. In this function we allow the counter - * to reach the ULLONG_MAX value, and we signal this as overflow - * condition by returining a POLLERR to poll(2). +/** + * eventfd_signal - Adds @n to the eventfd counter. + * @ctx: [in] Pointer to the eventfd context. + * @n: [in] Value of the counter to be added to the eventfd internal counter. + * The value cannot be negative. + * + * This function is supposed to be called by the kernel in paths that do not + * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX + * value, and we signal this as overflow condition by returining a POLLERR + * to poll(2). + * + * Returns @n in case of success, a non-negative number lower than @n in case + * of overflow, or the following error codes: + * + * -EINVAL : The value of @n is negative. */ -int eventfd_signal(struct file *file, int n) +int eventfd_signal(struct eventfd_ctx *ctx, int n) { - struct eventfd_ctx *ctx = file->private_data; unsigned long flags; if (n < 0) @@ -59,9 +68,45 @@ int eventfd_signal(struct file *file, int n) } EXPORT_SYMBOL_GPL(eventfd_signal); +static void eventfd_free(struct kref *kref) +{ + struct eventfd_ctx *ctx = container_of(kref, struct eventfd_ctx, kref); + + kfree(ctx); +} + +/** + * eventfd_ctx_get - Acquires a reference to the internal eventfd context. + * @ctx: [in] Pointer to the eventfd context. + * + * Returns: In case of success, returns a pointer to the eventfd context. + */ +struct eventfd_ctx *eventfd_ctx_get(struct eventfd_ctx *ctx) +{ + kref_get(&ctx->kref); + return ctx; +} +EXPORT_SYMBOL_GPL(eventfd_ctx_get); + +/** + * eventfd_ctx_put - Releases a reference to the internal eventfd context. + * @ctx: [in] Pointer to eventfd context. + * + * The eventfd context reference must have been previously acquired either + * with eventfd_ctx_get() or eventfd_ctx_fdget()). + */ +void eventfd_ctx_put(struct eventfd_ctx *ctx) +{ + kref_put(&ctx->kref, eventfd_free); +} +EXPORT_SYMBOL_GPL(eventfd_ctx_put); + static int eventfd_release(struct inode *inode, struct file *file) { - kfree(file->private_data); + struct eventfd_ctx *ctx = file->private_data; + + wake_up_poll(&ctx->wqh, POLLHUP); + eventfd_ctx_put(ctx); return 0; } @@ -185,6 +230,16 @@ static const struct file_operations eventfd_fops = { .write = eventfd_write, }; +/** + * eventfd_fget - Acquire a reference of an eventfd file descriptor. + * @fd: [in] Eventfd file descriptor. + * + * Returns a pointer to the eventfd file structure in case of success, or the + * following error pointer: + * + * -EBADF : Invalid @fd file descriptor. + * -EINVAL : The @fd file descriptor is not an eventfd file. + */ struct file *eventfd_fget(int fd) { struct file *file; @@ -201,6 +256,48 @@ struct file *eventfd_fget(int fd) } EXPORT_SYMBOL_GPL(eventfd_fget); +/** + * eventfd_ctx_fdget - Acquires a reference to the internal eventfd context. + * @fd: [in] Eventfd file descriptor. + * + * Returns a pointer to the internal eventfd context, otherwise the error + * pointers returned by the following functions: + * + * eventfd_fget + */ +struct eventfd_ctx *eventfd_ctx_fdget(int fd) +{ + struct file *file; + struct eventfd_ctx *ctx; + + file = eventfd_fget(fd); + if (IS_ERR(file)) + return (struct eventfd_ctx *) file; + ctx = eventfd_ctx_get(file->private_data); + fput(file); + + return ctx; +} +EXPORT_SYMBOL_GPL(eventfd_ctx_fdget); + +/** + * eventfd_ctx_fileget - Acquires a reference to the internal eventfd context. + * @file: [in] Eventfd file pointer. + * + * Returns a pointer to the internal eventfd context, otherwise the error + * pointer: + * + * -EINVAL : The @fd file descriptor is not an eventfd file. + */ +struct eventfd_ctx *eventfd_ctx_fileget(struct file *file) +{ + if (file->f_op != &eventfd_fops) + return ERR_PTR(-EINVAL); + + return eventfd_ctx_get(file->private_data); +} +EXPORT_SYMBOL_GPL(eventfd_ctx_fileget); + SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) { int fd; @@ -217,6 +314,7 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) if (!ctx) return -ENOMEM; + kref_init(&ctx->kref); init_waitqueue_head(&ctx->wqh); ctx->count = count; ctx->flags = flags; diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index d46e38c..d636e12 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -125,37 +125,12 @@ fail: return ERR_PTR(-EINVAL); } -static inline struct posix_acl * -ext2_iget_acl(struct inode *inode, struct posix_acl **i_acl) -{ - struct posix_acl *acl = EXT2_ACL_NOT_CACHED; - - spin_lock(&inode->i_lock); - if (*i_acl != EXT2_ACL_NOT_CACHED) - acl = posix_acl_dup(*i_acl); - spin_unlock(&inode->i_lock); - - return acl; -} - -static inline void -ext2_iset_acl(struct inode *inode, struct posix_acl **i_acl, - struct posix_acl *acl) -{ - spin_lock(&inode->i_lock); - if (*i_acl != EXT2_ACL_NOT_CACHED) - posix_acl_release(*i_acl); - *i_acl = posix_acl_dup(acl); - spin_unlock(&inode->i_lock); -} - /* * inode->i_mutex: don't care */ static struct posix_acl * ext2_get_acl(struct inode *inode, int type) { - struct ext2_inode_info *ei = EXT2_I(inode); int name_index; char *value = NULL; struct posix_acl *acl; @@ -164,23 +139,19 @@ ext2_get_acl(struct inode *inode, int type) if (!test_opt(inode->i_sb, POSIX_ACL)) return NULL; - switch(type) { - case ACL_TYPE_ACCESS: - acl = ext2_iget_acl(inode, &ei->i_acl); - if (acl != EXT2_ACL_NOT_CACHED) - return acl; - name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; - break; - - case ACL_TYPE_DEFAULT: - acl = ext2_iget_acl(inode, &ei->i_default_acl); - if (acl != EXT2_ACL_NOT_CACHED) - return acl; - name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT; - break; - - default: - return ERR_PTR(-EINVAL); + acl = get_cached_acl(inode, type); + if (acl != ACL_NOT_CACHED) + return acl; + + switch (type) { + case ACL_TYPE_ACCESS: + name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS; + break; + case ACL_TYPE_DEFAULT: + name_index = EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT; + break; + default: + BUG(); } retval = ext2_xattr_get(inode, name_index, "", NULL, 0); if (retval > 0) { @@ -197,17 +168,9 @@ ext2_get_acl(struct inode *inode, int type) acl = ERR_PTR(retval); kfree(value); - if (!IS_ERR(acl)) { - switch(type) { - case ACL_TYPE_ACCESS: - ext2_iset_acl(inode, &ei->i_acl, acl); - break; + if (!IS_ERR(acl)) + set_cached_acl(inode, type, acl); - case ACL_TYPE_DEFAULT: - ext2_iset_acl(inode, &ei->i_default_acl, acl); - break; - } - } return acl; } @@ -217,7 +180,6 @@ ext2_get_acl(struct inode *inode, int type) static int ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) { - struct ext2_inode_info *ei = EXT2_I(inode); int name_index; void *value = NULL; size_t size = 0; @@ -263,17 +225,8 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) error = ext2_xattr_set(inode, name_index, "", value, size, 0); kfree(value); - if (!error) { - switch(type) { - case ACL_TYPE_ACCESS: - ext2_iset_acl(inode, &ei->i_acl, acl); - break; - - case ACL_TYPE_DEFAULT: - ext2_iset_acl(inode, &ei->i_default_acl, acl); - break; - } - } + if (!error) + set_cached_acl(inode, type, acl); return error; } diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h index b42cf57..ecefe47 100644 --- a/fs/ext2/acl.h +++ b/fs/ext2/acl.h @@ -53,10 +53,6 @@ static inline int ext2_acl_count(size_t size) #ifdef CONFIG_EXT2_FS_POSIX_ACL -/* Value for inode->u.ext2_i.i_acl and inode->u.ext2_i.i_default_acl - if the ACL has not been cached */ -#define EXT2_ACL_NOT_CACHED ((void *)-1) - /* acl.c */ extern int ext2_permission (struct inode *, int); extern int ext2_acl_chmod (struct inode *); diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index d988a71..9a8a8e2 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -47,10 +47,6 @@ struct ext2_inode_info { */ struct rw_semaphore xattr_sem; #endif -#ifdef CONFIG_EXT2_FS_POSIX_ACL - struct posix_acl *i_acl; - struct posix_acl *i_default_acl; -#endif rwlock_t i_meta_lock; /* diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 29ed682..e271303 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1224,10 +1224,6 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) return inode; ei = EXT2_I(inode); -#ifdef CONFIG_EXT2_FS_POSIX_ACL - ei->i_acl = EXT2_ACL_NOT_CACHED; - ei->i_default_acl = EXT2_ACL_NOT_CACHED; -#endif ei->i_block_alloc_info = NULL; raw_inode = ext2_get_inode(inode->i_sb, ino, &bh); diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 6524eca..e1dedb0 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -66,8 +66,16 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str inode = NULL; if (ino) { inode = ext2_iget(dir->i_sb, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); + if (unlikely(IS_ERR(inode))) { + if (PTR_ERR(inode) == -ESTALE) { + ext2_error(dir->i_sb, __func__, + "deleted inode referenced: %lu", + ino); + return ERR_PTR(-EIO); + } else { + return ERR_CAST(inode); + } + } } return d_splice_alias(inode, dentry); } diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 4589996..1a9ffee 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -152,10 +152,6 @@ static struct inode *ext2_alloc_inode(struct super_block *sb) ei = (struct ext2_inode_info *)kmem_cache_alloc(ext2_inode_cachep, GFP_KERNEL); if (!ei) return NULL; -#ifdef CONFIG_EXT2_FS_POSIX_ACL - ei->i_acl = EXT2_ACL_NOT_CACHED; - ei->i_default_acl = EXT2_ACL_NOT_CACHED; -#endif ei->i_block_alloc_info = NULL; ei->vfs_inode.i_version = 1; return &ei->vfs_inode; @@ -198,18 +194,6 @@ static void destroy_inodecache(void) static void ext2_clear_inode(struct inode *inode) { struct ext2_block_alloc_info *rsv = EXT2_I(inode)->i_block_alloc_info; -#ifdef CONFIG_EXT2_FS_POSIX_ACL - struct ext2_inode_info *ei = EXT2_I(inode); - - if (ei->i_acl && ei->i_acl != EXT2_ACL_NOT_CACHED) { - posix_acl_release(ei->i_acl); - ei->i_acl = EXT2_ACL_NOT_CACHED; - } - if (ei->i_default_acl && ei->i_default_acl != EXT2_ACL_NOT_CACHED) { - posix_acl_release(ei->i_default_acl); - ei->i_default_acl = EXT2_ACL_NOT_CACHED; - } -#endif ext2_discard_reservation(inode); EXT2_I(inode)->i_block_alloc_info = NULL; if (unlikely(rsv)) diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index e0c7454..e167bae 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c @@ -126,33 +126,6 @@ fail: return ERR_PTR(-EINVAL); } -static inline struct posix_acl * -ext3_iget_acl(struct inode *inode, struct posix_acl **i_acl) -{ - struct posix_acl *acl = ACCESS_ONCE(*i_acl); - - if (acl) { - spin_lock(&inode->i_lock); - acl = *i_acl; - if (acl != EXT3_ACL_NOT_CACHED) - acl = posix_acl_dup(acl); - spin_unlock(&inode->i_lock); - } - - return acl; -} - -static inline void -ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl, - struct posix_acl *acl) -{ - spin_lock(&inode->i_lock); - if (*i_acl != EXT3_ACL_NOT_CACHED) - posix_acl_release(*i_acl); - *i_acl = posix_acl_dup(acl); - spin_unlock(&inode->i_lock); -} - /* * Inode operation get_posix_acl(). * @@ -161,7 +134,6 @@ ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl, static struct posix_acl * ext3_get_acl(struct inode *inode, int type) { - struct ext3_inode_info *ei = EXT3_I(inode); int name_index; char *value = NULL; struct posix_acl *acl; @@ -170,24 +142,21 @@ ext3_get_acl(struct inode *inode, int type) if (!test_opt(inode->i_sb, POSIX_ACL)) return NULL; - switch(type) { - case ACL_TYPE_ACCESS: - acl = ext3_iget_acl(inode, &ei->i_acl); - if (acl != EXT3_ACL_NOT_CACHED) - return acl; - name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS; - break; - - case ACL_TYPE_DEFAULT: - acl = ext3_iget_acl(inode, &ei->i_default_acl); - if (acl != EXT3_ACL_NOT_CACHED) - return acl; - name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT; - break; - - default: - return ERR_PTR(-EINVAL); + acl = get_cached_acl(inode, type); + if (acl != ACL_NOT_CACHED) + return acl; + + switch (type) { + case ACL_TYPE_ACCESS: + name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS; + break; + case ACL_TYPE_DEFAULT: + name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT; + break; + default: + BUG(); } + retval = ext3_xattr_get(inode, name_index, "", NULL, 0); if (retval > 0) { value = kmalloc(retval, GFP_NOFS); @@ -203,17 +172,9 @@ ext3_get_acl(struct inode *inode, int type) acl = ERR_PTR(retval); kfree(value); - if (!IS_ERR(acl)) { - switch(type) { - case ACL_TYPE_ACCESS: - ext3_iset_acl(inode, &ei->i_acl, acl); - break; + if (!IS_ERR(acl)) + set_cached_acl(inode, type, acl); - case ACL_TYPE_DEFAULT: - ext3_iset_acl(inode, &ei->i_default_acl, acl); - break; - } - } return acl; } @@ -226,7 +187,6 @@ static int ext3_set_acl(handle_t *handle, struct inode *inode, int type, struct posix_acl *acl) { - struct ext3_inode_info *ei = EXT3_I(inode); int name_index; void *value = NULL; size_t size = 0; @@ -271,17 +231,10 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type, value, size, 0); kfree(value); - if (!error) { - switch(type) { - case ACL_TYPE_ACCESS: - ext3_iset_acl(inode, &ei->i_acl, acl); - break; - case ACL_TYPE_DEFAULT: - ext3_iset_acl(inode, &ei->i_default_acl, acl); - break; - } - } + if (!error) + set_cached_acl(inode, type, acl); + return error; } diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h index 42da16b..07d15a3 100644 --- a/fs/ext3/acl.h +++ b/fs/ext3/acl.h @@ -53,10 +53,6 @@ static inline int ext3_acl_count(size_t size) #ifdef CONFIG_EXT3_FS_POSIX_ACL -/* Value for inode->u.ext3_i.i_acl and inode->u.ext3_i.i_default_acl - if the ACL has not been cached */ -#define EXT3_ACL_NOT_CACHED ((void *)-1) - /* acl.c */ extern int ext3_permission (struct inode *, int); extern int ext3_acl_chmod (struct inode *); diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 05dea81..5f51fed 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -2752,10 +2752,6 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino) return inode; ei = EXT3_I(inode); -#ifdef CONFIG_EXT3_FS_POSIX_ACL - ei->i_acl = EXT3_ACL_NOT_CACHED; - ei->i_default_acl = EXT3_ACL_NOT_CACHED; -#endif ei->i_block_alloc_info = NULL; ret = __ext3_get_inode_loc(inode, &iloc, 0); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 601e881..524b349 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -464,10 +464,6 @@ static struct inode *ext3_alloc_inode(struct super_block *sb) ei = kmem_cache_alloc(ext3_inode_cachep, GFP_NOFS); if (!ei) return NULL; -#ifdef CONFIG_EXT3_FS_POSIX_ACL - ei->i_acl = EXT3_ACL_NOT_CACHED; - ei->i_default_acl = EXT3_ACL_NOT_CACHED; -#endif ei->i_block_alloc_info = NULL; ei->vfs_inode.i_version = 1; return &ei->vfs_inode; @@ -518,18 +514,6 @@ static void destroy_inodecache(void) static void ext3_clear_inode(struct inode *inode) { struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info; -#ifdef CONFIG_EXT3_FS_POSIX_ACL - if (EXT3_I(inode)->i_acl && - EXT3_I(inode)->i_acl != EXT3_ACL_NOT_CACHED) { - posix_acl_release(EXT3_I(inode)->i_acl); - EXT3_I(inode)->i_acl = EXT3_ACL_NOT_CACHED; - } - if (EXT3_I(inode)->i_default_acl && - EXT3_I(inode)->i_default_acl != EXT3_ACL_NOT_CACHED) { - posix_acl_release(EXT3_I(inode)->i_default_acl); - EXT3_I(inode)->i_default_acl = EXT3_ACL_NOT_CACHED; - } -#endif ext3_discard_reservation(inode); EXT3_I(inode)->i_block_alloc_info = NULL; if (unlikely(rsv)) diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 605aeed..f6d8967 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -126,33 +126,6 @@ fail: return ERR_PTR(-EINVAL); } -static inline struct posix_acl * -ext4_iget_acl(struct inode *inode, struct posix_acl **i_acl) -{ - struct posix_acl *acl = ACCESS_ONCE(*i_acl); - - if (acl) { - spin_lock(&inode->i_lock); - acl = *i_acl; - if (acl != EXT4_ACL_NOT_CACHED) - acl = posix_acl_dup(acl); - spin_unlock(&inode->i_lock); - } - - return acl; -} - -static inline void -ext4_iset_acl(struct inode *inode, struct posix_acl **i_acl, - struct posix_acl *acl) -{ - spin_lock(&inode->i_lock); - if (*i_acl != EXT4_ACL_NOT_CACHED) - posix_acl_release(*i_acl); - *i_acl = posix_acl_dup(acl); - spin_unlock(&inode->i_lock); -} - /* * Inode operation get_posix_acl(). * @@ -161,7 +134,6 @@ ext4_iset_acl(struct inode *inode, struct posix_acl **i_acl, static struct posix_acl * ext4_get_acl(struct inode *inode, int type) { - struct ext4_inode_info *ei = EXT4_I(inode); int name_index; char *value = NULL; struct posix_acl *acl; @@ -170,23 +142,19 @@ ext4_get_acl(struct inode *inode, int type) if (!test_opt(inode->i_sb, POSIX_ACL)) return NULL; + acl = get_cached_acl(inode, type); + if (acl != ACL_NOT_CACHED) + return acl; + switch (type) { case ACL_TYPE_ACCESS: - acl = ext4_iget_acl(inode, &ei->i_acl); - if (acl != EXT4_ACL_NOT_CACHED) - return acl; name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; break; - case ACL_TYPE_DEFAULT: - acl = ext4_iget_acl(inode, &ei->i_default_acl); - if (acl != EXT4_ACL_NOT_CACHED) - return acl; name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT; break; - default: - return ERR_PTR(-EINVAL); + BUG(); } retval = ext4_xattr_get(inode, name_index, "", NULL, 0); if (retval > 0) { @@ -203,17 +171,9 @@ ext4_get_acl(struct inode *inode, int type) acl = ERR_PTR(retval); kfree(value); - if (!IS_ERR(acl)) { - switch (type) { - case ACL_TYPE_ACCESS: - ext4_iset_acl(inode, &ei->i_acl, acl); - break; + if (!IS_ERR(acl)) + set_cached_acl(inode, type, acl); - case ACL_TYPE_DEFAULT: - ext4_iset_acl(inode, &ei->i_default_acl, acl); - break; - } - } return acl; } @@ -226,7 +186,6 @@ static int ext4_set_acl(handle_t *handle, struct inode *inode, int type, struct posix_acl *acl) { - struct ext4_inode_info *ei = EXT4_I(inode); int name_index; void *value = NULL; size_t size = 0; @@ -271,17 +230,9 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type, value, size, 0); kfree(value); - if (!error) { - switch (type) { - case ACL_TYPE_ACCESS: - ext4_iset_acl(inode, &ei->i_acl, acl); - break; + if (!error) + set_cached_acl(inode, type, acl); - case ACL_TYPE_DEFAULT: - ext4_iset_acl(inode, &ei->i_default_acl, acl); - break; - } - } return error; } diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h index cb45257..949789d 100644 --- a/fs/ext4/acl.h +++ b/fs/ext4/acl.h @@ -53,10 +53,6 @@ static inline int ext4_acl_count(size_t size) #ifdef CONFIG_EXT4_FS_POSIX_ACL -/* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl - if the ACL has not been cached */ -#define EXT4_ACL_NOT_CACHED ((void *)-1) - /* acl.c */ extern int ext4_permission(struct inode *, int); extern int ext4_acl_chmod(struct inode *); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 17b9998..0ddf7e5 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -595,10 +595,6 @@ struct ext4_inode_info { */ struct rw_semaphore xattr_sem; #endif -#ifdef CONFIG_EXT4_FS_POSIX_ACL - struct posix_acl *i_acl; - struct posix_acl *i_default_acl; -#endif struct list_head i_orphan; /* unlinked but open inodes */ diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 7c17ae2..60a26f3 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4453,10 +4453,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) return inode; ei = EXT4_I(inode); -#ifdef CONFIG_EXT4_FS_POSIX_ACL - ei->i_acl = EXT4_ACL_NOT_CACHED; - ei->i_default_acl = EXT4_ACL_NOT_CACHED; -#endif ret = __ext4_get_inode_loc(inode, &iloc, 0); if (ret < 0) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8bb9e2d..8f4f079 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -666,10 +666,6 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) if (!ei) return NULL; -#ifdef CONFIG_EXT4_FS_POSIX_ACL - ei->i_acl = EXT4_ACL_NOT_CACHED; - ei->i_default_acl = EXT4_ACL_NOT_CACHED; -#endif ei->vfs_inode.i_version = 1; ei->vfs_inode.i_data.writeback_index = 0; memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); @@ -735,18 +731,6 @@ static void destroy_inodecache(void) static void ext4_clear_inode(struct inode *inode) { -#ifdef CONFIG_EXT4_FS_POSIX_ACL - if (EXT4_I(inode)->i_acl && - EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) { - posix_acl_release(EXT4_I(inode)->i_acl); - EXT4_I(inode)->i_acl = EXT4_ACL_NOT_CACHED; - } - if (EXT4_I(inode)->i_default_acl && - EXT4_I(inode)->i_default_acl != EXT4_ACL_NOT_CACHED) { - posix_acl_release(EXT4_I(inode)->i_default_acl); - EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED; - } -#endif ext4_discard_preallocations(inode); if (EXT4_JOURNAL(inode)) jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index caf0491..c54226b 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -278,7 +278,26 @@ int sb_has_dirty_inodes(struct super_block *sb) EXPORT_SYMBOL(sb_has_dirty_inodes); /* - * Write a single inode's dirty pages and inode data out to disk. + * Wait for writeback on an inode to complete. + */ +static void inode_wait_for_writeback(struct inode *inode) +{ + DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC); + wait_queue_head_t *wqh; + + wqh = bit_waitqueue(&inode->i_state, __I_SYNC); + do { + spin_unlock(&inode_lock); + __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); + spin_lock(&inode_lock); + } while (inode->i_state & I_SYNC); +} + +/* + * Write out an inode's dirty pages. Called under inode_lock. Either the + * caller has ref on the inode (either via __iget or via syscall against an fd) + * or the inode has I_WILL_FREE set (via generic_forget_inode) + * * If `wait' is set, wait on the writeout. * * The whole writeout design is quite complex and fragile. We want to avoid @@ -288,13 +307,38 @@ EXPORT_SYMBOL(sb_has_dirty_inodes); * Called under inode_lock. */ static int -__sync_single_inode(struct inode *inode, struct writeback_control *wbc) +writeback_single_inode(struct inode *inode, struct writeback_control *wbc) { - unsigned dirty; struct address_space *mapping = inode->i_mapping; int wait = wbc->sync_mode == WB_SYNC_ALL; + unsigned dirty; int ret; + if (!atomic_read(&inode->i_count)) + WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); + else + WARN_ON(inode->i_state & I_WILL_FREE); + + if (inode->i_state & I_SYNC) { + /* + * If this inode is locked for writeback and we are not doing + * writeback-for-data-integrity, move it to s_more_io so that + * writeback can proceed with the other inodes on s_io. + * + * We'll have another go at writing back this inode when we + * completed a full scan of s_io. + */ + if (!wait) { + requeue_io(inode); + return 0; + } + + /* + * It's a data-integrity sync. We must wait. + */ + inode_wait_for_writeback(inode); + } + BUG_ON(inode->i_state & I_SYNC); /* Set I_SYNC, reset I_DIRTY */ @@ -390,50 +434,6 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) } /* - * Write out an inode's dirty pages. Called under inode_lock. Either the - * caller has ref on the inode (either via __iget or via syscall against an fd) - * or the inode has I_WILL_FREE set (via generic_forget_inode) - */ -static int -__writeback_single_inode(struct inode *inode, struct writeback_control *wbc) -{ - wait_queue_head_t *wqh; - - if (!atomic_read(&inode->i_count)) - WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); - else - WARN_ON(inode->i_state & I_WILL_FREE); - - if ((wbc->sync_mode != WB_SYNC_ALL) && (inode->i_state & I_SYNC)) { - /* - * We're skipping this inode because it's locked, and we're not - * doing writeback-for-data-integrity. Move it to s_more_io so - * that writeback can proceed with the other inodes on s_io. - * We'll have another go at writing back this inode when we - * completed a full scan of s_io. - */ - requeue_io(inode); - return 0; - } - - /* - * It's a data-integrity sync. We must wait. - */ - if (inode->i_state & I_SYNC) { - DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC); - - wqh = bit_waitqueue(&inode->i_state, __I_SYNC); - do { - spin_unlock(&inode_lock); - __wait_on_bit(wqh, &wq, inode_wait, - TASK_UNINTERRUPTIBLE); - spin_lock(&inode_lock); - } while (inode->i_state & I_SYNC); - } - return __sync_single_inode(inode, wbc); -} - -/* * Write out a superblock's list of dirty inodes. A wait will be performed * upon no inodes, all inodes or the final one, depending upon sync_mode. * @@ -526,7 +526,7 @@ void generic_sync_sb_inodes(struct super_block *sb, BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); __iget(inode); pages_skipped = wbc->pages_skipped; - __writeback_single_inode(inode, wbc); + writeback_single_inode(inode, wbc); if (current_is_pdflush()) writeback_release(bdi); if (wbc->pages_skipped != pages_skipped) { @@ -708,7 +708,7 @@ int write_inode_now(struct inode *inode, int sync) might_sleep(); spin_lock(&inode_lock); - ret = __writeback_single_inode(inode, &wbc); + ret = writeback_single_inode(inode, &wbc); spin_unlock(&inode_lock); if (sync) inode_sync_wait(inode); @@ -732,7 +732,7 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc) int ret; spin_lock(&inode_lock); - ret = __writeback_single_inode(inode, wbc); + ret = writeback_single_inode(inode, wbc); spin_unlock(&inode_lock); return ret; } diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 8fed2ed..f58ecbc 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -849,6 +849,81 @@ err: return err; } +static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size, + struct fuse_copy_state *cs) +{ + struct fuse_notify_inval_inode_out outarg; + int err = -EINVAL; + + if (size != sizeof(outarg)) + goto err; + + err = fuse_copy_one(cs, &outarg, sizeof(outarg)); + if (err) + goto err; + fuse_copy_finish(cs); + + down_read(&fc->killsb); + err = -ENOENT; + if (!fc->sb) + goto err_unlock; + + err = fuse_reverse_inval_inode(fc->sb, outarg.ino, + outarg.off, outarg.len); + +err_unlock: + up_read(&fc->killsb); + return err; + +err: + fuse_copy_finish(cs); + return err; +} + +static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size, + struct fuse_copy_state *cs) +{ + struct fuse_notify_inval_entry_out outarg; + int err = -EINVAL; + char buf[FUSE_NAME_MAX+1]; + struct qstr name; + + if (size < sizeof(outarg)) + goto err; + + err = fuse_copy_one(cs, &outarg, sizeof(outarg)); + if (err) + goto err; + + err = -ENAMETOOLONG; + if (outarg.namelen > FUSE_NAME_MAX) + goto err; + + name.name = buf; + name.len = outarg.namelen; + err = fuse_copy_one(cs, buf, outarg.namelen + 1); + if (err) + goto err; + fuse_copy_finish(cs); + buf[outarg.namelen] = 0; + name.hash = full_name_hash(name.name, name.len); + + down_read(&fc->killsb); + err = -ENOENT; + if (!fc->sb) + goto err_unlock; + + err = fuse_reverse_inval_entry(fc->sb, outarg.parent, &name); + +err_unlock: + up_read(&fc->killsb); + return err; + +err: + fuse_copy_finish(cs); + return err; +} + static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, unsigned int size, struct fuse_copy_state *cs) { @@ -856,6 +931,12 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, case FUSE_NOTIFY_POLL: return fuse_notify_poll(fc, size, cs); + case FUSE_NOTIFY_INVAL_INODE: + return fuse_notify_inval_inode(fc, size, cs); + + case FUSE_NOTIFY_INVAL_ENTRY: + return fuse_notify_inval_entry(fc, size, cs); + default: fuse_copy_finish(cs); return -EINVAL; @@ -910,7 +991,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { int err; - unsigned nbytes = iov_length(iov, nr_segs); + size_t nbytes = iov_length(iov, nr_segs); struct fuse_req *req; struct fuse_out_header oh; struct fuse_copy_state cs; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index b3089a0..e703654 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -375,7 +375,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, struct fuse_conn *fc = get_fuse_conn(dir); struct fuse_req *req; struct fuse_req *forget_req; - struct fuse_open_in inarg; + struct fuse_create_in inarg; struct fuse_open_out outopen; struct fuse_entry_out outentry; struct fuse_file *ff; @@ -399,15 +399,20 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode, if (!ff) goto out_put_request; + if (!fc->dont_mask) + mode &= ~current_umask(); + flags &= ~O_NOCTTY; memset(&inarg, 0, sizeof(inarg)); memset(&outentry, 0, sizeof(outentry)); inarg.flags = flags; inarg.mode = mode; + inarg.umask = current_umask(); req->in.h.opcode = FUSE_CREATE; req->in.h.nodeid = get_node_id(dir); req->in.numargs = 2; - req->in.args[0].size = sizeof(inarg); + req->in.args[0].size = fc->minor < 12 ? sizeof(struct fuse_open_in) : + sizeof(inarg); req->in.args[0].value = &inarg; req->in.args[1].size = entry->d_name.len + 1; req->in.args[1].value = entry->d_name.name; @@ -546,12 +551,17 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode, if (IS_ERR(req)) return PTR_ERR(req); + if (!fc->dont_mask) + mode &= ~current_umask(); + memset(&inarg, 0, sizeof(inarg)); inarg.mode = mode; inarg.rdev = new_encode_dev(rdev); + inarg.umask = current_umask(); req->in.h.opcode = FUSE_MKNOD; req->in.numargs = 2; - req->in.args[0].size = sizeof(inarg); + req->in.args[0].size = fc->minor < 12 ? FUSE_COMPAT_MKNOD_IN_SIZE : + sizeof(inarg); req->in.args[0].value = &inarg; req->in.args[1].size = entry->d_name.len + 1; req->in.args[1].value = entry->d_name.name; @@ -578,8 +588,12 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode) if (IS_ERR(req)) return PTR_ERR(req); + if (!fc->dont_mask) + mode &= ~current_umask(); + memset(&inarg, 0, sizeof(inarg)); inarg.mode = mode; + inarg.umask = current_umask(); req->in.h.opcode = FUSE_MKDIR; req->in.numargs = 2; req->in.args[0].size = sizeof(inarg); @@ -845,6 +859,43 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat, return err; } +int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, + struct qstr *name) +{ + int err = -ENOTDIR; + struct inode *parent; + struct dentry *dir; + struct dentry *entry; + + parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid); + if (!parent) + return -ENOENT; + + mutex_lock(&parent->i_mutex); + if (!S_ISDIR(parent->i_mode)) + goto unlock; + + err = -ENOENT; + dir = d_find_alias(parent); + if (!dir) + goto unlock; + + entry = d_lookup(dir, name); + dput(dir); + if (!entry) + goto unlock; + + fuse_invalidate_attr(parent); + fuse_invalidate_entry(entry); + dput(entry); + err = 0; + + unlock: + mutex_unlock(&parent->i_mutex); + iput(parent); + return err; +} + /* * Calling into a user-controlled filesystem gives the filesystem * daemon ptrace-like capabilities over the requester process. This diff --git a/fs/fuse/file.c b/fs/fuse/file.c index fce6ce6..cbc4640 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1922,7 +1922,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait) req = fuse_get_req(fc); if (IS_ERR(req)) - return PTR_ERR(req); + return POLLERR; req->in.h.opcode = FUSE_POLL; req->in.h.nodeid = ff->nodeid; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index aaf2f9f..52b641f 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -446,6 +446,9 @@ struct fuse_conn { /** Do multi-page cached writes */ unsigned big_writes:1; + /** Don't apply umask to creation modes */ + unsigned dont_mask:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; @@ -481,6 +484,12 @@ struct fuse_conn { /** Called on final put */ void (*release)(struct fuse_conn *); + + /** Super block for this connection. */ + struct super_block *sb; + + /** Read/write semaphore to hold when accessing sb. */ + struct rw_semaphore killsb; }; static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) @@ -509,6 +518,11 @@ extern const struct file_operations fuse_dev_operations; extern const struct dentry_operations fuse_dentry_operations; /** + * Inode to nodeid comparison. + */ +int fuse_inode_eq(struct inode *inode, void *_nodeidp); + +/** * Get a filled in inode */ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, @@ -708,6 +722,19 @@ void fuse_release_nowrite(struct inode *inode); u64 fuse_get_attr_version(struct fuse_conn *fc); +/** + * File-system tells the kernel to invalidate cache for the given node id. + */ +int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid, + loff_t offset, loff_t len); + +/** + * File-system tells the kernel to invalidate parent attributes and + * the dentry matching parent/name. + */ +int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, + struct qstr *name); + int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, bool isdir); ssize_t fuse_direct_io(struct file *file, const char __user *buf, diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index d8673cc..f91ccc4 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -206,7 +206,7 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr) BUG(); } -static int fuse_inode_eq(struct inode *inode, void *_nodeidp) +int fuse_inode_eq(struct inode *inode, void *_nodeidp) { u64 nodeid = *(u64 *) _nodeidp; if (get_node_id(inode) == nodeid) @@ -257,6 +257,31 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, return inode; } +int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid, + loff_t offset, loff_t len) +{ + struct inode *inode; + pgoff_t pg_start; + pgoff_t pg_end; + + inode = ilookup5(sb, nodeid, fuse_inode_eq, &nodeid); + if (!inode) + return -ENOENT; + + fuse_invalidate_attr(inode); + if (offset >= 0) { + pg_start = offset >> PAGE_CACHE_SHIFT; + if (len <= 0) + pg_end = -1; + else + pg_end = (offset + len - 1) >> PAGE_CACHE_SHIFT; + invalidate_inode_pages2_range(inode->i_mapping, + pg_start, pg_end); + } + iput(inode); + return 0; +} + static void fuse_umount_begin(struct super_block *sb) { fuse_abort_conn(get_fuse_conn_super(sb)); @@ -480,6 +505,7 @@ void fuse_conn_init(struct fuse_conn *fc) memset(fc, 0, sizeof(*fc)); spin_lock_init(&fc->lock); mutex_init(&fc->inst_mutex); + init_rwsem(&fc->killsb); atomic_set(&fc->count, 1); init_waitqueue_head(&fc->waitq); init_waitqueue_head(&fc->blocked_waitq); @@ -725,6 +751,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) } if (arg->flags & FUSE_BIG_WRITES) fc->big_writes = 1; + if (arg->flags & FUSE_DONT_MASK) + fc->dont_mask = 1; } else { ra_pages = fc->max_read / PAGE_CACHE_SIZE; fc->no_lock = 1; @@ -748,7 +776,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) arg->minor = FUSE_KERNEL_MINOR_VERSION; arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE; arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | - FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES; + FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); @@ -860,10 +888,16 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) fuse_conn_init(fc); fc->dev = sb->s_dev; + fc->sb = sb; err = fuse_bdi_init(fc, sb); if (err) goto err_put_conn; + /* Handle umasking inside the fuse code */ + if (sb->s_flags & MS_POSIXACL) + fc->dont_mask = 1; + sb->s_flags |= MS_POSIXACL; + fc->release = fuse_free_conn; fc->flags = d.flags; fc->user_id = d.user_id; @@ -941,12 +975,25 @@ static int fuse_get_sb(struct file_system_type *fs_type, return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt); } +static void fuse_kill_sb_anon(struct super_block *sb) +{ + struct fuse_conn *fc = get_fuse_conn_super(sb); + + if (fc) { + down_write(&fc->killsb); + fc->sb = NULL; + up_write(&fc->killsb); + } + + kill_anon_super(sb); +} + static struct file_system_type fuse_fs_type = { .owner = THIS_MODULE, .name = "fuse", .fs_flags = FS_HAS_SUBTYPE, .get_sb = fuse_get_sb, - .kill_sb = kill_anon_super, + .kill_sb = fuse_kill_sb_anon, }; #ifdef CONFIG_BLOCK @@ -958,11 +1005,24 @@ static int fuse_get_sb_blk(struct file_system_type *fs_type, mnt); } +static void fuse_kill_sb_blk(struct super_block *sb) +{ + struct fuse_conn *fc = get_fuse_conn_super(sb); + + if (fc) { + down_write(&fc->killsb); + fc->sb = NULL; + up_write(&fc->killsb); + } + + kill_block_super(sb); +} + static struct file_system_type fuseblk_fs_type = { .owner = THIS_MODULE, .name = "fuseblk", .get_sb = fuse_get_sb_blk, - .kill_sb = kill_block_super, + .kill_sb = fuse_kill_sb_blk, .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, }; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index fe02ad4..032604e 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -972,6 +972,7 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) sb->s_blocksize_bits = 10; sb->s_magic = HOSTFS_SUPER_MAGIC; sb->s_op = &hostfs_sbops; + sb->s_maxbytes = MAX_LFS_FILESIZE; /* NULL is printed as <NULL> by sprintf: avoid that. */ if (req_root == NULL) @@ -25,6 +25,7 @@ #include <linux/fsnotify.h> #include <linux/mount.h> #include <linux/async.h> +#include <linux/posix_acl.h> /* * This is needed for the following functions: @@ -189,6 +190,9 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode) } inode->i_private = NULL; inode->i_mapping = mapping; +#ifdef CONFIG_FS_POSIX_ACL + inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED; +#endif #ifdef CONFIG_FSNOTIFY inode->i_fsnotify_mask = 0; @@ -227,6 +231,12 @@ void destroy_inode(struct inode *inode) ima_inode_free(inode); security_inode_free(inode); fsnotify_inode_delete(inode); +#ifdef CONFIG_FS_POSIX_ACL + if (inode->i_acl && inode->i_acl != ACL_NOT_CACHED) + posix_acl_release(inode->i_acl); + if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) + posix_acl_release(inode->i_default_acl); +#endif if (inode->i_sb->s_op->destroy_inode) inode->i_sb->s_op->destroy_inode(inode); else @@ -665,12 +675,17 @@ void unlock_new_inode(struct inode *inode) if (inode->i_mode & S_IFDIR) { struct file_system_type *type = inode->i_sb->s_type; - /* - * ensure nobody is actually holding i_mutex - */ - mutex_destroy(&inode->i_mutex); - mutex_init(&inode->i_mutex); - lockdep_set_class(&inode->i_mutex, &type->i_mutex_dir_key); + /* Set new key only if filesystem hasn't already changed it */ + if (!lockdep_match_class(&inode->i_mutex, + &type->i_mutex_key)) { + /* + * ensure nobody is actually holding i_mutex + */ + mutex_destroy(&inode->i_mutex); + mutex_init(&inode->i_mutex); + lockdep_set_class(&inode->i_mutex, + &type->i_mutex_dir_key); + } } #endif /* @@ -15,6 +15,7 @@ #include <linux/uaccess.h> #include <linux/writeback.h> #include <linux/buffer_head.h> +#include <linux/falloc.h> #include <asm/ioctls.h> @@ -403,6 +404,37 @@ EXPORT_SYMBOL(generic_block_fiemap); #endif /* CONFIG_BLOCK */ +/* + * This provides compatibility with legacy XFS pre-allocation ioctls + * which predate the fallocate syscall. + * + * Only the l_start, l_len and l_whence fields of the 'struct space_resv' + * are used here, rest are ignored. + */ +int ioctl_preallocate(struct file *filp, void __user *argp) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + struct space_resv sr; + + if (copy_from_user(&sr, argp, sizeof(sr))) + return -EFAULT; + + switch (sr.l_whence) { + case SEEK_SET: + break; + case SEEK_CUR: + sr.l_start += filp->f_pos; + break; + case SEEK_END: + sr.l_start += i_size_read(inode); + break; + default: + return -EINVAL; + } + + return do_fallocate(filp, FALLOC_FL_KEEP_SIZE, sr.l_start, sr.l_len); +} + static int file_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -414,6 +446,9 @@ static int file_ioctl(struct file *filp, unsigned int cmd, return ioctl_fibmap(filp, p); case FIONREAD: return put_user(i_size_read(inode) - filp->f_pos, p); + case FS_IOC_RESVSP: + case FS_IOC_RESVSP64: + return ioctl_preallocate(filp, p); } return vfs_ioctl(filp, cmd, arg); diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 043740d..8fcb62392 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c @@ -156,48 +156,25 @@ static void *jffs2_acl_to_medium(const struct posix_acl *acl, size_t *size) return ERR_PTR(-EINVAL); } -static struct posix_acl *jffs2_iget_acl(struct inode *inode, struct posix_acl **i_acl) -{ - struct posix_acl *acl = JFFS2_ACL_NOT_CACHED; - - spin_lock(&inode->i_lock); - if (*i_acl != JFFS2_ACL_NOT_CACHED) - acl = posix_acl_dup(*i_acl); - spin_unlock(&inode->i_lock); - return acl; -} - -static void jffs2_iset_acl(struct inode *inode, struct posix_acl **i_acl, struct posix_acl *acl) -{ - spin_lock(&inode->i_lock); - if (*i_acl != JFFS2_ACL_NOT_CACHED) - posix_acl_release(*i_acl); - *i_acl = posix_acl_dup(acl); - spin_unlock(&inode->i_lock); -} - static struct posix_acl *jffs2_get_acl(struct inode *inode, int type) { - struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); struct posix_acl *acl; char *value = NULL; int rc, xprefix; + acl = get_cached_acl(inode, type); + if (acl != ACL_NOT_CACHED) + return acl; + switch (type) { case ACL_TYPE_ACCESS: - acl = jffs2_iget_acl(inode, &f->i_acl_access); - if (acl != JFFS2_ACL_NOT_CACHED) - return acl; xprefix = JFFS2_XPREFIX_ACL_ACCESS; break; case ACL_TYPE_DEFAULT: - acl = jffs2_iget_acl(inode, &f->i_acl_default); - if (acl != JFFS2_ACL_NOT_CACHED) - return acl; xprefix = JFFS2_XPREFIX_ACL_DEFAULT; break; default: - return ERR_PTR(-EINVAL); + BUG(); } rc = do_jffs2_getxattr(inode, xprefix, "", NULL, 0); if (rc > 0) { @@ -215,16 +192,8 @@ static struct posix_acl *jffs2_get_acl(struct inode *inode, int type) } if (value) kfree(value); - if (!IS_ERR(acl)) { - switch (type) { - case ACL_TYPE_ACCESS: - jffs2_iset_acl(inode, &f->i_acl_access, acl); - break; - case ACL_TYPE_DEFAULT: - jffs2_iset_acl(inode, &f->i_acl_default, acl); - break; - } - } + if (!IS_ERR(acl)) + set_cached_acl(inode, type, acl); return acl; } @@ -249,7 +218,6 @@ static int __jffs2_set_acl(struct inode *inode, int xprefix, struct posix_acl *a static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl) { - struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); int rc, xprefix; if (S_ISLNK(inode->i_mode)) @@ -285,16 +253,8 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl) return -EINVAL; } rc = __jffs2_set_acl(inode, xprefix, acl); - if (!rc) { - switch(type) { - case ACL_TYPE_ACCESS: - jffs2_iset_acl(inode, &f->i_acl_access, acl); - break; - case ACL_TYPE_DEFAULT: - jffs2_iset_acl(inode, &f->i_acl_default, acl); - break; - } - } + if (!rc) + set_cached_acl(inode, type, acl); return rc; } @@ -321,12 +281,10 @@ int jffs2_permission(struct inode *inode, int mask) int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode) { - struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); struct posix_acl *acl, *clone; int rc; - f->i_acl_default = NULL; - f->i_acl_access = NULL; + cache_no_acl(inode); if (S_ISLNK(*i_mode)) return 0; /* Symlink always has no-ACL */ @@ -339,7 +297,7 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode) *i_mode &= ~current_umask(); } else { if (S_ISDIR(*i_mode)) - jffs2_iset_acl(inode, &f->i_acl_default, acl); + set_cached_acl(inode, ACL_TYPE_DEFAULT, acl); clone = posix_acl_clone(acl, GFP_KERNEL); if (!clone) @@ -350,7 +308,7 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode) return rc; } if (rc > 0) - jffs2_iset_acl(inode, &f->i_acl_access, clone); + set_cached_acl(inode, ACL_TYPE_ACCESS, clone); posix_acl_release(clone); } @@ -359,17 +317,16 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode) int jffs2_init_acl_post(struct inode *inode) { - struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); int rc; - if (f->i_acl_default) { - rc = __jffs2_set_acl(inode, JFFS2_XPREFIX_ACL_DEFAULT, f->i_acl_default); + if (inode->i_default_acl) { + rc = __jffs2_set_acl(inode, JFFS2_XPREFIX_ACL_DEFAULT, inode->i_default_acl); if (rc) return rc; } - if (f->i_acl_access) { - rc = __jffs2_set_acl(inode, JFFS2_XPREFIX_ACL_ACCESS, f->i_acl_access); + if (inode->i_acl) { + rc = __jffs2_set_acl(inode, JFFS2_XPREFIX_ACL_ACCESS, inode->i_acl); if (rc) return rc; } @@ -377,18 +334,6 @@ int jffs2_init_acl_post(struct inode *inode) return 0; } -void jffs2_clear_acl(struct jffs2_inode_info *f) -{ - if (f->i_acl_access && f->i_acl_access != JFFS2_ACL_NOT_CACHED) { - posix_acl_release(f->i_acl_access); - f->i_acl_access = JFFS2_ACL_NOT_CACHED; - } - if (f->i_acl_default && f->i_acl_default != JFFS2_ACL_NOT_CACHED) { - posix_acl_release(f->i_acl_default); - f->i_acl_default = JFFS2_ACL_NOT_CACHED; - } -} - int jffs2_acl_chmod(struct inode *inode) { struct posix_acl *acl, *clone; diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h index 8ca058a..fc929f2 100644 --- a/fs/jffs2/acl.h +++ b/fs/jffs2/acl.h @@ -26,13 +26,10 @@ struct jffs2_acl_header { #ifdef CONFIG_JFFS2_FS_POSIX_ACL -#define JFFS2_ACL_NOT_CACHED ((void *)-1) - extern int jffs2_permission(struct inode *, int); extern int jffs2_acl_chmod(struct inode *); extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *); extern int jffs2_init_acl_post(struct inode *); -extern void jffs2_clear_acl(struct jffs2_inode_info *); extern struct xattr_handler jffs2_acl_access_xattr_handler; extern struct xattr_handler jffs2_acl_default_xattr_handler; @@ -43,6 +40,5 @@ extern struct xattr_handler jffs2_acl_default_xattr_handler; #define jffs2_acl_chmod(inode) (0) #define jffs2_init_acl_pre(dir_i,inode,mode) (0) #define jffs2_init_acl_post(inode) (0) -#define jffs2_clear_acl(f) #endif /* CONFIG_JFFS2_FS_POSIX_ACL */ diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h index 4c41db9..c6923da 100644 --- a/fs/jffs2/jffs2_fs_i.h +++ b/fs/jffs2/jffs2_fs_i.h @@ -50,10 +50,6 @@ struct jffs2_inode_info { uint16_t flags; uint8_t usercompr; struct inode vfs_inode; -#ifdef CONFIG_JFFS2_FS_POSIX_ACL - struct posix_acl *i_acl_access; - struct posix_acl *i_acl_default; -#endif }; #endif /* _JFFS2_FS_I */ diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index 2228380..a7f03b7 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -56,10 +56,6 @@ static inline void jffs2_init_inode_info(struct jffs2_inode_info *f) f->target = NULL; f->flags = 0; f->usercompr = 0; -#ifdef CONFIG_JFFS2_FS_POSIX_ACL - f->i_acl_access = JFFS2_ACL_NOT_CACHED; - f->i_acl_default = JFFS2_ACL_NOT_CACHED; -#endif } diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index 1fc1e92..1a80301 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -1424,7 +1424,6 @@ void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f) struct jffs2_full_dirent *fd, *fds; int deleted; - jffs2_clear_acl(f); jffs2_xattr_delete_inode(c, f->inocache); mutex_lock(&f->sem); deleted = f->inocache && !f->inocache->pino_nlink; diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c index 1d437de..696686c 100644 --- a/fs/jffs2/scan.c +++ b/fs/jffs2/scan.c @@ -130,9 +130,9 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) if (jffs2_sum_active()) { s = kzalloc(sizeof(struct jffs2_summary), GFP_KERNEL); if (!s) { - kfree(flashbuf); JFFS2_WARNING("Can't allocate memory for summary\n"); - return -ENOMEM; + ret = -ENOMEM; + goto out; } } @@ -196,7 +196,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) if (c->nextblock) { ret = file_dirty(c, c->nextblock); if (ret) - return ret; + goto out; /* deleting summary information of the old nextblock */ jffs2_sum_reset_collected(c->summary); } @@ -207,7 +207,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c) } else { ret = file_dirty(c, jeb); if (ret) - return ret; + goto out; } break; diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index 06ca1b8..91fa3ad 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -31,27 +31,24 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type) { struct posix_acl *acl; char *ea_name; - struct jfs_inode_info *ji = JFS_IP(inode); - struct posix_acl **p_acl; int size; char *value = NULL; + acl = get_cached_acl(inode, type); + if (acl != ACL_NOT_CACHED) + return acl; + switch(type) { case ACL_TYPE_ACCESS: ea_name = POSIX_ACL_XATTR_ACCESS; - p_acl = &ji->i_acl; break; case ACL_TYPE_DEFAULT: ea_name = POSIX_ACL_XATTR_DEFAULT; - p_acl = &ji->i_default_acl; break; default: return ERR_PTR(-EINVAL); } - if (*p_acl != JFS_ACL_NOT_CACHED) - return posix_acl_dup(*p_acl); - size = __jfs_getxattr(inode, ea_name, NULL, 0); if (size > 0) { @@ -62,17 +59,18 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type) } if (size < 0) { - if (size == -ENODATA) { - *p_acl = NULL; + if (size == -ENODATA) acl = NULL; - } else + else acl = ERR_PTR(size); } else { acl = posix_acl_from_xattr(value, size); - if (!IS_ERR(acl)) - *p_acl = posix_acl_dup(acl); } kfree(value); + if (!IS_ERR(acl)) { + set_cached_acl(inode, type, acl); + posix_acl_release(acl); + } return acl; } @@ -80,8 +78,6 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type, struct posix_acl *acl) { char *ea_name; - struct jfs_inode_info *ji = JFS_IP(inode); - struct posix_acl **p_acl; int rc; int size = 0; char *value = NULL; @@ -92,11 +88,9 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type, switch(type) { case ACL_TYPE_ACCESS: ea_name = POSIX_ACL_XATTR_ACCESS; - p_acl = &ji->i_acl; break; case ACL_TYPE_DEFAULT: ea_name = POSIX_ACL_XATTR_DEFAULT; - p_acl = &ji->i_default_acl; if (!S_ISDIR(inode->i_mode)) return acl ? -EACCES : 0; break; @@ -116,27 +110,24 @@ static int jfs_set_acl(tid_t tid, struct inode *inode, int type, out: kfree(value); - if (!rc) { - if (*p_acl && (*p_acl != JFS_ACL_NOT_CACHED)) - posix_acl_release(*p_acl); - *p_acl = posix_acl_dup(acl); - } + if (!rc) + set_cached_acl(inode, type, acl); + return rc; } static int jfs_check_acl(struct inode *inode, int mask) { - struct jfs_inode_info *ji = JFS_IP(inode); + struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS); - if (ji->i_acl == JFS_ACL_NOT_CACHED) { - struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl)) - return PTR_ERR(acl); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + int error = posix_acl_permission(inode, acl, mask); posix_acl_release(acl); + return error; } - if (ji->i_acl) - return posix_acl_permission(inode, ji->i_acl, mask); return -EAGAIN; } diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h index 439901d..1439f11 100644 --- a/fs/jfs/jfs_incore.h +++ b/fs/jfs/jfs_incore.h @@ -74,10 +74,6 @@ struct jfs_inode_info { /* xattr_sem allows us to access the xattrs without taking i_mutex */ struct rw_semaphore xattr_sem; lid_t xtlid; /* lid of xtree lock on directory */ -#ifdef CONFIG_JFS_POSIX_ACL - struct posix_acl *i_acl; - struct posix_acl *i_default_acl; -#endif union { struct { xtpage_t _xtroot; /* 288: xtree root */ @@ -107,8 +103,6 @@ struct jfs_inode_info { #define i_inline u.link._inline #define i_inline_ea u.link._inline_ea -#define JFS_ACL_NOT_CACHED ((void *)-1) - #define IREAD_LOCK(ip, subclass) \ down_read_nested(&JFS_IP(ip)->rdwrlock, subclass) #define IREAD_UNLOCK(ip) up_read(&JFS_IP(ip)->rdwrlock) diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 09b1b6e..37e6dcd 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -128,18 +128,6 @@ static void jfs_destroy_inode(struct inode *inode) ji->active_ag = -1; } spin_unlock_irq(&ji->ag_lock); - -#ifdef CONFIG_JFS_POSIX_ACL - if (ji->i_acl != JFS_ACL_NOT_CACHED) { - posix_acl_release(ji->i_acl); - ji->i_acl = JFS_ACL_NOT_CACHED; - } - if (ji->i_default_acl != JFS_ACL_NOT_CACHED) { - posix_acl_release(ji->i_default_acl); - ji->i_default_acl = JFS_ACL_NOT_CACHED; - } -#endif - kmem_cache_free(jfs_inode_cachep, ji); } @@ -798,10 +786,6 @@ static void init_once(void *foo) init_rwsem(&jfs_ip->xattr_sem); spin_lock_init(&jfs_ip->ag_lock); jfs_ip->active_ag = -1; -#ifdef CONFIG_JFS_POSIX_ACL - jfs_ip->i_acl = JFS_ACL_NOT_CACHED; - jfs_ip->i_default_acl = JFS_ACL_NOT_CACHED; -#endif inode_init_once(&jfs_ip->vfs_inode); } diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 61dfa81..fad3645 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -727,10 +727,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name, /* * We're changing the ACL. Get rid of the cached one */ - acl =JFS_IP(inode)->i_acl; - if (acl != JFS_ACL_NOT_CACHED) - posix_acl_release(acl); - JFS_IP(inode)->i_acl = JFS_ACL_NOT_CACHED; + forget_cached_acl(inode, ACL_TYPE_ACCESS); return 0; } else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) { @@ -746,10 +743,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name, /* * We're changing the default ACL. Get rid of the cached one */ - acl =JFS_IP(inode)->i_default_acl; - if (acl && (acl != JFS_ACL_NOT_CACHED)) - posix_acl_release(acl); - JFS_IP(inode)->i_default_acl = JFS_ACL_NOT_CACHED; + forget_cached_acl(inode, ACL_TYPE_DEFAULT); return 0; } diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index dd79570..f2fdcbc 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -126,7 +126,6 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl) struct nlm_lock *lock = &argp->lock; nlmclnt_next_cookie(&argp->cookie); - argp->state = nsm_local_state; memcpy(&lock->fh, NFS_FH(fl->fl_file->f_path.dentry->d_inode), sizeof(struct nfs_fh)); lock->caller = utsname()->nodename; lock->oh.data = req->a_owner; @@ -165,6 +164,7 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl) /* Set up the argument struct */ nlmclnt_setlockargs(call, fl); + lock_kernel(); if (IS_SETLK(cmd) || IS_SETLKW(cmd)) { if (fl->fl_type != F_UNLCK) { call->a_args.block = IS_SETLKW(cmd) ? 1 : 0; @@ -178,6 +178,7 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl) fl->fl_ops->fl_release_private(fl); fl->fl_ops = NULL; + unlock_kernel(); dprintk("lockd: clnt proc returns %d\n", status); return status; @@ -519,6 +520,7 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) if (nsm_monitor(host) < 0) goto out; + req->a_args.state = nsm_local_state; fl->fl_flags |= FL_ACCESS; status = do_vfs_lock(fl); diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 6d5d4a4..7fce1b5 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -53,7 +53,7 @@ static DEFINE_SPINLOCK(nsm_lock); /* * Local NSM state */ -int __read_mostly nsm_local_state; +u32 __read_mostly nsm_local_state; int __read_mostly nsm_use_hostnames; static inline struct sockaddr *nsm_addr(const struct nsm_handle *nsm) @@ -112,6 +112,7 @@ static struct rpc_clnt *nsm_create(void) .program = &nsm_program, .version = NSM_VERSION, .authflavor = RPC_AUTH_NULL, + .flags = RPC_CLNT_CREATE_NOPING, }; return rpc_create(&args); @@ -184,13 +185,19 @@ int nsm_monitor(const struct nlm_host *host) nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf; status = nsm_mon_unmon(nsm, NSMPROC_MON, &res); - if (res.status != 0) + if (unlikely(res.status != 0)) status = -EIO; - if (status < 0) + if (unlikely(status < 0)) { printk(KERN_NOTICE "lockd: cannot monitor %s\n", nsm->sm_name); - else - nsm->sm_monitored = 1; - return status; + return status; + } + + nsm->sm_monitored = 1; + if (unlikely(nsm_local_state != res.state)) { + nsm_local_state = res.state; + dprintk("lockd: NSM state changed to %d\n", nsm_local_state); + } + return 0; } /** diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 83ee342..e577a78 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -326,6 +326,8 @@ static void nlmsvc_freegrantargs(struct nlm_rqst *call) { if (call->a_args.lock.oh.data != call->a_owner) kfree(call->a_args.lock.oh.data); + + locks_release_private(&call->a_args.lock.fl); } /* @@ -151,7 +151,7 @@ static struct file_lock *locks_alloc_lock(void) return kmem_cache_alloc(filelock_cache, GFP_KERNEL); } -static void locks_release_private(struct file_lock *fl) +void locks_release_private(struct file_lock *fl) { if (fl->fl_ops) { if (fl->fl_ops->fl_release_private) @@ -165,6 +165,7 @@ static void locks_release_private(struct file_lock *fl) } } +EXPORT_SYMBOL_GPL(locks_release_private); /* Free a lock which is not in use. */ static void locks_free_lock(struct file_lock *fl) diff --git a/fs/minix/minix.h b/fs/minix/minix.h index cb7fdd1..9dcf95b 100644 --- a/fs/minix/minix.h +++ b/fs/minix/minix.h @@ -1,3 +1,6 @@ +#ifndef FS_MINIX_H +#define FS_MINIX_H + #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/minix_fs.h> @@ -86,3 +89,5 @@ static inline struct minix_inode_info *minix_i(struct inode *inode) { return list_entry(inode, struct minix_inode_info, vfs_inode); } + +#endif /* FS_MINIX_H */ @@ -1698,8 +1698,11 @@ struct file *do_filp_open(int dfd, const char *pathname, if (error) return ERR_PTR(error); error = path_walk(pathname, &nd); - if (error) + if (error) { + if (nd.root.mnt) + path_put(&nd.root); return ERR_PTR(error); + } if (unlikely(!audit_dummy_context())) audit_inode(pathname, nd.path.dentry); @@ -1758,7 +1761,13 @@ do_last: goto exit; } filp = nameidata_to_filp(&nd, open_flag); + if (IS_ERR(filp)) + ima_counts_put(&nd.path, + acc_mode & (MAY_READ | MAY_WRITE | + MAY_EXEC)); mnt_drop_write(nd.path.mnt); + if (nd.root.mnt) + path_put(&nd.root); return filp; } @@ -1812,6 +1821,9 @@ ok: goto exit; } filp = nameidata_to_filp(&nd, open_flag); + if (IS_ERR(filp)) + ima_counts_put(&nd.path, + acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC)); /* * It is now safe to drop the mnt write * because the filp has had a write taken @@ -1819,6 +1831,8 @@ ok: */ if (will_write) mnt_drop_write(nd.path.mnt); + if (nd.root.mnt) + path_put(&nd.root); return filp; exit_mutex_unlock: @@ -1859,6 +1873,8 @@ do_link: * with "intent.open". */ release_open_intent(&nd); + if (nd.root.mnt) + path_put(&nd.root); return ERR_PTR(error); } nd.flags &= ~LOOKUP_PARENT; diff --git a/fs/namespace.c b/fs/namespace.c index 2dd333b..3dc283f 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -42,6 +42,8 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); static int event; static DEFINE_IDA(mnt_id_ida); static DEFINE_IDA(mnt_group_ida); +static int mnt_id_start = 0; +static int mnt_group_start = 1; static struct list_head *mount_hashtable __read_mostly; static struct kmem_cache *mnt_cache __read_mostly; @@ -69,7 +71,9 @@ static int mnt_alloc_id(struct vfsmount *mnt) retry: ida_pre_get(&mnt_id_ida, GFP_KERNEL); spin_lock(&vfsmount_lock); - res = ida_get_new(&mnt_id_ida, &mnt->mnt_id); + res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id); + if (!res) + mnt_id_start = mnt->mnt_id + 1; spin_unlock(&vfsmount_lock); if (res == -EAGAIN) goto retry; @@ -79,8 +83,11 @@ retry: static void mnt_free_id(struct vfsmount *mnt) { + int id = mnt->mnt_id; spin_lock(&vfsmount_lock); - ida_remove(&mnt_id_ida, mnt->mnt_id); + ida_remove(&mnt_id_ida, id); + if (mnt_id_start > id) + mnt_id_start = id; spin_unlock(&vfsmount_lock); } @@ -91,10 +98,18 @@ static void mnt_free_id(struct vfsmount *mnt) */ static int mnt_alloc_group_id(struct vfsmount *mnt) { + int res; + if (!ida_pre_get(&mnt_group_ida, GFP_KERNEL)) return -ENOMEM; - return ida_get_new_above(&mnt_group_ida, 1, &mnt->mnt_group_id); + res = ida_get_new_above(&mnt_group_ida, + mnt_group_start, + &mnt->mnt_group_id); + if (!res) + mnt_group_start = mnt->mnt_group_id + 1; + + return res; } /* @@ -102,7 +117,10 @@ static int mnt_alloc_group_id(struct vfsmount *mnt) */ void mnt_release_group_id(struct vfsmount *mnt) { - ida_remove(&mnt_group_ida, mnt->mnt_group_id); + int id = mnt->mnt_group_id; + ida_remove(&mnt_group_ida, id); + if (mnt_group_start > id) + mnt_group_start = id; mnt->mnt_group_id = 0; } @@ -1937,6 +1955,21 @@ dput_out: return retval; } +static struct mnt_namespace *alloc_mnt_ns(void) +{ + struct mnt_namespace *new_ns; + + new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); + if (!new_ns) + return ERR_PTR(-ENOMEM); + atomic_set(&new_ns->count, 1); + new_ns->root = NULL; + INIT_LIST_HEAD(&new_ns->list); + init_waitqueue_head(&new_ns->poll); + new_ns->event = 0; + return new_ns; +} + /* * Allocate a new namespace structure and populate it with contents * copied from the namespace of the passed in task structure. @@ -1948,14 +1981,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, struct vfsmount *rootmnt = NULL, *pwdmnt = NULL; struct vfsmount *p, *q; - new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); - if (!new_ns) - return ERR_PTR(-ENOMEM); - - atomic_set(&new_ns->count, 1); - INIT_LIST_HEAD(&new_ns->list); - init_waitqueue_head(&new_ns->poll); - new_ns->event = 0; + new_ns = alloc_mnt_ns(); + if (IS_ERR(new_ns)) + return new_ns; down_write(&namespace_sem); /* First pass: copy the tree topology */ @@ -2019,6 +2047,24 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, return new_ns; } +/** + * create_mnt_ns - creates a private namespace and adds a root filesystem + * @mnt: pointer to the new root filesystem mountpoint + */ +struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt) +{ + struct mnt_namespace *new_ns; + + new_ns = alloc_mnt_ns(); + if (!IS_ERR(new_ns)) { + mnt->mnt_ns = new_ns; + new_ns->root = mnt; + list_add(&new_ns->list, &new_ns->root->mnt_list); + } + return new_ns; +} +EXPORT_SYMBOL(create_mnt_ns); + SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, char __user *, type, unsigned long, flags, void __user *, data) { @@ -2194,16 +2240,9 @@ static void __init init_mount_tree(void) mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); if (IS_ERR(mnt)) panic("Can't create rootfs"); - ns = kmalloc(sizeof(*ns), GFP_KERNEL); - if (!ns) + ns = create_mnt_ns(mnt); + if (IS_ERR(ns)) panic("Can't allocate initial namespace"); - atomic_set(&ns->count, 1); - INIT_LIST_HEAD(&ns->list); - init_waitqueue_head(&ns->poll); - ns->event = 0; - list_add(&mnt->mnt_list, &ns->list); - ns->root = mnt; - mnt->mnt_ns = ns; init_task.nsproxy->mnt_ns = ns; get_mnt_ns(ns); @@ -2246,10 +2285,14 @@ void __init mnt_init(void) init_mount_tree(); } -void __put_mnt_ns(struct mnt_namespace *ns) +void put_mnt_ns(struct mnt_namespace *ns) { - struct vfsmount *root = ns->root; + struct vfsmount *root; LIST_HEAD(umount_list); + + if (!atomic_dec_and_lock(&ns->count, &vfsmount_lock)) + return; + root = ns->root; ns->root = NULL; spin_unlock(&vfsmount_lock); down_write(&namespace_sem); @@ -2260,3 +2303,4 @@ void __put_mnt_ns(struct mnt_namespace *ns) release_mounts(&umount_list); kfree(ns); } +EXPORT_SYMBOL(put_mnt_ns); diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index e67f3ec..2a77bc2 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -1,6 +1,6 @@ config NFS_FS tristate "NFS client support" - depends on INET + depends on INET && FILE_LOCKING select LOCKD select SUNRPC select NFS_ACL_SUPPORT if NFS_V3_ACL @@ -74,6 +74,15 @@ config NFS_V4 If unsure, say N. +config NFS_V4_1 + bool "NFS client support for NFSv4.1 (DEVELOPER ONLY)" + depends on NFS_V4 && EXPERIMENTAL + help + This option enables support for minor version 1 of the NFSv4 protocol + (draft-ietf-nfsv4-minorversion1) in the kernel's NFS client. + + Unless you're an NFS developer, say N. + config ROOT_NFS bool "Root file system on NFS" depends on NFS_FS=y && IP_PNP diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index a886e69..7f604c7 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -17,6 +17,9 @@ #include <linux/freezer.h> #include <linux/kthread.h> #include <linux/sunrpc/svcauth_gss.h> +#if defined(CONFIG_NFS_V4_1) +#include <linux/sunrpc/bc_xprt.h> +#endif #include <net/inet_sock.h> @@ -28,11 +31,12 @@ struct nfs_callback_data { unsigned int users; + struct svc_serv *serv; struct svc_rqst *rqst; struct task_struct *task; }; -static struct nfs_callback_data nfs_callback_info; +static struct nfs_callback_data nfs_callback_info[NFS4_MAX_MINOR_VERSION + 1]; static DEFINE_MUTEX(nfs_callback_mutex); static struct svc_program nfs4_callback_program; @@ -56,10 +60,10 @@ module_param_call(callback_tcpport, param_set_port, param_get_int, &nfs_callback_set_tcpport, 0644); /* - * This is the callback kernel thread. + * This is the NFSv4 callback kernel thread. */ static int -nfs_callback_svc(void *vrqstp) +nfs4_callback_svc(void *vrqstp) { int err, preverr = 0; struct svc_rqst *rqstp = vrqstp; @@ -97,20 +101,12 @@ nfs_callback_svc(void *vrqstp) } /* - * Bring up the callback thread if it is not already up. + * Prepare to bring up the NFSv4 callback service */ -int nfs_callback_up(void) +struct svc_rqst * +nfs4_callback_up(struct svc_serv *serv) { - struct svc_serv *serv = NULL; - int ret = 0; - - mutex_lock(&nfs_callback_mutex); - if (nfs_callback_info.users++ || nfs_callback_info.task != NULL) - goto out; - serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL); - ret = -ENOMEM; - if (!serv) - goto out_err; + int ret; ret = svc_create_xprt(serv, "tcp", PF_INET, nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); @@ -127,27 +123,174 @@ int nfs_callback_up(void) nfs_callback_tcpport6 = ret; dprintk("NFS: Callback listener port = %u (af %u)\n", nfs_callback_tcpport6, PF_INET6); - } else if (ret != -EAFNOSUPPORT) + } else if (ret == -EAFNOSUPPORT) + ret = 0; + else goto out_err; #endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ - nfs_callback_info.rqst = svc_prepare_thread(serv, &serv->sv_pools[0]); - if (IS_ERR(nfs_callback_info.rqst)) { - ret = PTR_ERR(nfs_callback_info.rqst); - nfs_callback_info.rqst = NULL; + return svc_prepare_thread(serv, &serv->sv_pools[0]); + +out_err: + if (ret == 0) + ret = -ENOMEM; + return ERR_PTR(ret); +} + +#if defined(CONFIG_NFS_V4_1) +/* + * The callback service for NFSv4.1 callbacks + */ +static int +nfs41_callback_svc(void *vrqstp) +{ + struct svc_rqst *rqstp = vrqstp; + struct svc_serv *serv = rqstp->rq_server; + struct rpc_rqst *req; + int error; + DEFINE_WAIT(wq); + + set_freezable(); + + /* + * FIXME: do we really need to run this under the BKL? If so, please + * add a comment about what it's intended to protect. + */ + lock_kernel(); + while (!kthread_should_stop()) { + prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE); + spin_lock_bh(&serv->sv_cb_lock); + if (!list_empty(&serv->sv_cb_list)) { + req = list_first_entry(&serv->sv_cb_list, + struct rpc_rqst, rq_bc_list); + list_del(&req->rq_bc_list); + spin_unlock_bh(&serv->sv_cb_lock); + dprintk("Invoking bc_svc_process()\n"); + error = bc_svc_process(serv, req, rqstp); + dprintk("bc_svc_process() returned w/ error code= %d\n", + error); + } else { + spin_unlock_bh(&serv->sv_cb_lock); + schedule(); + } + finish_wait(&serv->sv_cb_waitq, &wq); + } + unlock_kernel(); + return 0; +} + +/* + * Bring up the NFSv4.1 callback service + */ +struct svc_rqst * +nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) +{ + struct svc_xprt *bc_xprt; + struct svc_rqst *rqstp = ERR_PTR(-ENOMEM); + + dprintk("--> %s\n", __func__); + /* Create a svc_sock for the service */ + bc_xprt = svc_sock_create(serv, xprt->prot); + if (!bc_xprt) + goto out; + + /* + * Save the svc_serv in the transport so that it can + * be referenced when the session backchannel is initialized + */ + serv->bc_xprt = bc_xprt; + xprt->bc_serv = serv; + + INIT_LIST_HEAD(&serv->sv_cb_list); + spin_lock_init(&serv->sv_cb_lock); + init_waitqueue_head(&serv->sv_cb_waitq); + rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]); + if (IS_ERR(rqstp)) + svc_sock_destroy(bc_xprt); +out: + dprintk("--> %s return %p\n", __func__, rqstp); + return rqstp; +} + +static inline int nfs_minorversion_callback_svc_setup(u32 minorversion, + struct svc_serv *serv, struct rpc_xprt *xprt, + struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp)) +{ + if (minorversion) { + *rqstpp = nfs41_callback_up(serv, xprt); + *callback_svc = nfs41_callback_svc; + } + return minorversion; +} + +static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, + struct nfs_callback_data *cb_info) +{ + if (minorversion) + xprt->bc_serv = cb_info->serv; +} +#else +static inline int nfs_minorversion_callback_svc_setup(u32 minorversion, + struct svc_serv *serv, struct rpc_xprt *xprt, + struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp)) +{ + return 0; +} + +static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, + struct nfs_callback_data *cb_info) +{ +} +#endif /* CONFIG_NFS_V4_1 */ + +/* + * Bring up the callback thread if it is not already up. + */ +int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) +{ + struct svc_serv *serv = NULL; + struct svc_rqst *rqstp; + int (*callback_svc)(void *vrqstp); + struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; + char svc_name[12]; + int ret = 0; + int minorversion_setup; + + mutex_lock(&nfs_callback_mutex); + if (cb_info->users++ || cb_info->task != NULL) { + nfs_callback_bc_serv(minorversion, xprt, cb_info); + goto out; + } + serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL); + if (!serv) { + ret = -ENOMEM; + goto out_err; + } + + minorversion_setup = nfs_minorversion_callback_svc_setup(minorversion, + serv, xprt, &rqstp, &callback_svc); + if (!minorversion_setup) { + /* v4.0 callback setup */ + rqstp = nfs4_callback_up(serv); + callback_svc = nfs4_callback_svc; + } + + if (IS_ERR(rqstp)) { + ret = PTR_ERR(rqstp); goto out_err; } svc_sock_update_bufs(serv); - nfs_callback_info.task = kthread_run(nfs_callback_svc, - nfs_callback_info.rqst, - "nfsv4-svc"); - if (IS_ERR(nfs_callback_info.task)) { - ret = PTR_ERR(nfs_callback_info.task); - svc_exit_thread(nfs_callback_info.rqst); - nfs_callback_info.rqst = NULL; - nfs_callback_info.task = NULL; + sprintf(svc_name, "nfsv4.%u-svc", minorversion); + cb_info->serv = serv; + cb_info->rqst = rqstp; + cb_info->task = kthread_run(callback_svc, cb_info->rqst, svc_name); + if (IS_ERR(cb_info->task)) { + ret = PTR_ERR(cb_info->task); + svc_exit_thread(cb_info->rqst); + cb_info->rqst = NULL; + cb_info->task = NULL; goto out_err; } out: @@ -164,22 +307,25 @@ out: out_err: dprintk("NFS: Couldn't create callback socket or server thread; " "err = %d\n", ret); - nfs_callback_info.users--; + cb_info->users--; goto out; } /* * Kill the callback thread if it's no longer being used. */ -void nfs_callback_down(void) +void nfs_callback_down(int minorversion) { + struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; + mutex_lock(&nfs_callback_mutex); - nfs_callback_info.users--; - if (nfs_callback_info.users == 0 && nfs_callback_info.task != NULL) { - kthread_stop(nfs_callback_info.task); - svc_exit_thread(nfs_callback_info.rqst); - nfs_callback_info.rqst = NULL; - nfs_callback_info.task = NULL; + cb_info->users--; + if (cb_info->users == 0 && cb_info->task != NULL) { + kthread_stop(cb_info->task); + svc_exit_thread(cb_info->rqst); + cb_info->serv = NULL; + cb_info->rqst = NULL; + cb_info->task = NULL; } mutex_unlock(&nfs_callback_mutex); } diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index e110e28..07baa82 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -20,13 +20,24 @@ enum nfs4_callback_procnum { enum nfs4_callback_opnum { OP_CB_GETATTR = 3, OP_CB_RECALL = 4, +/* Callback operations new to NFSv4.1 */ + OP_CB_LAYOUTRECALL = 5, + OP_CB_NOTIFY = 6, + OP_CB_PUSH_DELEG = 7, + OP_CB_RECALL_ANY = 8, + OP_CB_RECALLABLE_OBJ_AVAIL = 9, + OP_CB_RECALL_SLOT = 10, + OP_CB_SEQUENCE = 11, + OP_CB_WANTS_CANCELLED = 12, + OP_CB_NOTIFY_LOCK = 13, + OP_CB_NOTIFY_DEVICEID = 14, OP_CB_ILLEGAL = 10044, }; struct cb_compound_hdr_arg { unsigned int taglen; const char *tag; - unsigned int callback_ident; + unsigned int minorversion; unsigned nops; }; @@ -59,16 +70,59 @@ struct cb_recallargs { uint32_t truncate; }; +#if defined(CONFIG_NFS_V4_1) + +struct referring_call { + uint32_t rc_sequenceid; + uint32_t rc_slotid; +}; + +struct referring_call_list { + struct nfs4_sessionid rcl_sessionid; + uint32_t rcl_nrefcalls; + struct referring_call *rcl_refcalls; +}; + +struct cb_sequenceargs { + struct sockaddr *csa_addr; + struct nfs4_sessionid csa_sessionid; + uint32_t csa_sequenceid; + uint32_t csa_slotid; + uint32_t csa_highestslotid; + uint32_t csa_cachethis; + uint32_t csa_nrclists; + struct referring_call_list *csa_rclists; +}; + +struct cb_sequenceres { + __be32 csr_status; + struct nfs4_sessionid csr_sessionid; + uint32_t csr_sequenceid; + uint32_t csr_slotid; + uint32_t csr_highestslotid; + uint32_t csr_target_highestslotid; +}; + +extern unsigned nfs4_callback_sequence(struct cb_sequenceargs *args, + struct cb_sequenceres *res); + +#endif /* CONFIG_NFS_V4_1 */ + extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res); extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy); #ifdef CONFIG_NFS_V4 -extern int nfs_callback_up(void); -extern void nfs_callback_down(void); -#else -#define nfs_callback_up() (0) -#define nfs_callback_down() do {} while(0) -#endif +extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt); +extern void nfs_callback_down(int minorversion); +#endif /* CONFIG_NFS_V4 */ + +/* + * nfs41: Callbacks are expected to not cause substantial latency, + * so we limit their concurrency to 1 by setting up the maximum number + * of slots for the backchannel. + */ +#define NFS41_BC_MIN_CALLBACKS 1 +#define NFS41_BC_MAX_CALLBACKS 1 extern unsigned int nfs_callback_set_tcpport; extern unsigned short nfs_callback_tcpport; diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index f7e83e2..b7da1f5 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -101,3 +101,130 @@ out: dprintk("%s: exit with status = %d\n", __func__, ntohl(res)); return res; } + +#if defined(CONFIG_NFS_V4_1) + +/* + * Validate the sequenceID sent by the server. + * Return success if the sequenceID is one more than what we last saw on + * this slot, accounting for wraparound. Increments the slot's sequence. + * + * We don't yet implement a duplicate request cache, so at this time + * we will log replays, and process them as if we had not seen them before, + * but we don't bump the sequence in the slot. Not too worried about it, + * since we only currently implement idempotent callbacks anyway. + * + * We have a single slot backchannel at this time, so we don't bother + * checking the used_slots bit array on the table. The lower layer guarantees + * a single outstanding callback request at a time. + */ +static int +validate_seqid(struct nfs4_slot_table *tbl, u32 slotid, u32 seqid) +{ + struct nfs4_slot *slot; + + dprintk("%s enter. slotid %d seqid %d\n", + __func__, slotid, seqid); + + if (slotid > NFS41_BC_MAX_CALLBACKS) + return htonl(NFS4ERR_BADSLOT); + + slot = tbl->slots + slotid; + dprintk("%s slot table seqid: %d\n", __func__, slot->seq_nr); + + /* Normal */ + if (likely(seqid == slot->seq_nr + 1)) { + slot->seq_nr++; + return htonl(NFS4_OK); + } + + /* Replay */ + if (seqid == slot->seq_nr) { + dprintk("%s seqid %d is a replay - no DRC available\n", + __func__, seqid); + return htonl(NFS4_OK); + } + + /* Wraparound */ + if (seqid == 1 && (slot->seq_nr + 1) == 0) { + slot->seq_nr = 1; + return htonl(NFS4_OK); + } + + /* Misordered request */ + return htonl(NFS4ERR_SEQ_MISORDERED); +} + +/* + * Returns a pointer to a held 'struct nfs_client' that matches the server's + * address, major version number, and session ID. It is the caller's + * responsibility to release the returned reference. + * + * Returns NULL if there are no connections with sessions, or if no session + * matches the one of interest. + */ + static struct nfs_client *find_client_with_session( + const struct sockaddr *addr, u32 nfsversion, + struct nfs4_sessionid *sessionid) +{ + struct nfs_client *clp; + + clp = nfs_find_client(addr, 4); + if (clp == NULL) + return NULL; + + do { + struct nfs_client *prev = clp; + + if (clp->cl_session != NULL) { + if (memcmp(clp->cl_session->sess_id.data, + sessionid->data, + NFS4_MAX_SESSIONID_LEN) == 0) { + /* Returns a held reference to clp */ + return clp; + } + } + clp = nfs_find_client_next(prev); + nfs_put_client(prev); + } while (clp != NULL); + + return NULL; +} + +/* FIXME: referring calls should be processed */ +unsigned nfs4_callback_sequence(struct cb_sequenceargs *args, + struct cb_sequenceres *res) +{ + struct nfs_client *clp; + int i, status; + + for (i = 0; i < args->csa_nrclists; i++) + kfree(args->csa_rclists[i].rcl_refcalls); + kfree(args->csa_rclists); + + status = htonl(NFS4ERR_BADSESSION); + clp = find_client_with_session(args->csa_addr, 4, &args->csa_sessionid); + if (clp == NULL) + goto out; + + status = validate_seqid(&clp->cl_session->bc_slot_table, + args->csa_slotid, args->csa_sequenceid); + if (status) + goto out_putclient; + + memcpy(&res->csr_sessionid, &args->csa_sessionid, + sizeof(res->csr_sessionid)); + res->csr_sequenceid = args->csa_sequenceid; + res->csr_slotid = args->csa_slotid; + res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; + res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; + +out_putclient: + nfs_put_client(clp); +out: + dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); + res->csr_status = status; + return res->csr_status; +} + +#endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index dd0ef34..e5a2dac 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -20,6 +20,11 @@ 2 + 2 + 3 + 3) #define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) +#if defined(CONFIG_NFS_V4_1) +#define CB_OP_SEQUENCE_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ + 4 + 1 + 3) +#endif /* CONFIG_NFS_V4_1 */ + #define NFSDBG_FACILITY NFSDBG_CALLBACK typedef __be32 (*callback_process_op_t)(void *, void *); @@ -132,7 +137,6 @@ static __be32 decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound_hdr_arg *hdr) { __be32 *p; - unsigned int minor_version; __be32 status; status = decode_string(xdr, &hdr->taglen, &hdr->tag); @@ -147,15 +151,19 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound p = read_buf(xdr, 12); if (unlikely(p == NULL)) return htonl(NFS4ERR_RESOURCE); - minor_version = ntohl(*p++); - /* Check minor version is zero. */ - if (minor_version != 0) { - printk(KERN_WARNING "%s: NFSv4 server callback with illegal minor version %u!\n", - __func__, minor_version); + hdr->minorversion = ntohl(*p++); + /* Check minor version is zero or one. */ + if (hdr->minorversion <= 1) { + p++; /* skip callback_ident */ + } else { + printk(KERN_WARNING "%s: NFSv4 server callback with " + "illegal minor version %u!\n", + __func__, hdr->minorversion); return htonl(NFS4ERR_MINOR_VERS_MISMATCH); } - hdr->callback_ident = ntohl(*p++); hdr->nops = ntohl(*p); + dprintk("%s: minorversion %d nops %d\n", __func__, + hdr->minorversion, hdr->nops); return 0; } @@ -204,6 +212,122 @@ out: return status; } +#if defined(CONFIG_NFS_V4_1) + +static unsigned decode_sessionid(struct xdr_stream *xdr, + struct nfs4_sessionid *sid) +{ + uint32_t *p; + int len = NFS4_MAX_SESSIONID_LEN; + + p = read_buf(xdr, len); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE);; + + memcpy(sid->data, p, len); + return 0; +} + +static unsigned decode_rc_list(struct xdr_stream *xdr, + struct referring_call_list *rc_list) +{ + uint32_t *p; + int i; + unsigned status; + + status = decode_sessionid(xdr, &rc_list->rcl_sessionid); + if (status) + goto out; + + status = htonl(NFS4ERR_RESOURCE); + p = read_buf(xdr, sizeof(uint32_t)); + if (unlikely(p == NULL)) + goto out; + + rc_list->rcl_nrefcalls = ntohl(*p++); + if (rc_list->rcl_nrefcalls) { + p = read_buf(xdr, + rc_list->rcl_nrefcalls * 2 * sizeof(uint32_t)); + if (unlikely(p == NULL)) + goto out; + rc_list->rcl_refcalls = kmalloc(rc_list->rcl_nrefcalls * + sizeof(*rc_list->rcl_refcalls), + GFP_KERNEL); + if (unlikely(rc_list->rcl_refcalls == NULL)) + goto out; + for (i = 0; i < rc_list->rcl_nrefcalls; i++) { + rc_list->rcl_refcalls[i].rc_sequenceid = ntohl(*p++); + rc_list->rcl_refcalls[i].rc_slotid = ntohl(*p++); + } + } + status = 0; + +out: + return status; +} + +static unsigned decode_cb_sequence_args(struct svc_rqst *rqstp, + struct xdr_stream *xdr, + struct cb_sequenceargs *args) +{ + uint32_t *p; + int i; + unsigned status; + + status = decode_sessionid(xdr, &args->csa_sessionid); + if (status) + goto out; + + status = htonl(NFS4ERR_RESOURCE); + p = read_buf(xdr, 5 * sizeof(uint32_t)); + if (unlikely(p == NULL)) + goto out; + + args->csa_addr = svc_addr(rqstp); + args->csa_sequenceid = ntohl(*p++); + args->csa_slotid = ntohl(*p++); + args->csa_highestslotid = ntohl(*p++); + args->csa_cachethis = ntohl(*p++); + args->csa_nrclists = ntohl(*p++); + args->csa_rclists = NULL; + if (args->csa_nrclists) { + args->csa_rclists = kmalloc(args->csa_nrclists * + sizeof(*args->csa_rclists), + GFP_KERNEL); + if (unlikely(args->csa_rclists == NULL)) + goto out; + + for (i = 0; i < args->csa_nrclists; i++) { + status = decode_rc_list(xdr, &args->csa_rclists[i]); + if (status) + goto out_free; + } + } + status = 0; + + dprintk("%s: sessionid %x:%x:%x:%x sequenceid %u slotid %u " + "highestslotid %u cachethis %d nrclists %u\n", + __func__, + ((u32 *)&args->csa_sessionid)[0], + ((u32 *)&args->csa_sessionid)[1], + ((u32 *)&args->csa_sessionid)[2], + ((u32 *)&args->csa_sessionid)[3], + args->csa_sequenceid, args->csa_slotid, + args->csa_highestslotid, args->csa_cachethis, + args->csa_nrclists); +out: + dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); + return status; + +out_free: + for (i = 0; i < args->csa_nrclists; i++) + kfree(args->csa_rclists[i].rcl_refcalls); + kfree(args->csa_rclists); + goto out; +} + +#endif /* CONFIG_NFS_V4_1 */ + static __be32 encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) { __be32 *p; @@ -353,31 +477,134 @@ out: return status; } -static __be32 process_op(struct svc_rqst *rqstp, +#if defined(CONFIG_NFS_V4_1) + +static unsigned encode_sessionid(struct xdr_stream *xdr, + const struct nfs4_sessionid *sid) +{ + uint32_t *p; + int len = NFS4_MAX_SESSIONID_LEN; + + p = xdr_reserve_space(xdr, len); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + + memcpy(p, sid, len); + return 0; +} + +static unsigned encode_cb_sequence_res(struct svc_rqst *rqstp, + struct xdr_stream *xdr, + const struct cb_sequenceres *res) +{ + uint32_t *p; + unsigned status = res->csr_status; + + if (unlikely(status != 0)) + goto out; + + encode_sessionid(xdr, &res->csr_sessionid); + + p = xdr_reserve_space(xdr, 4 * sizeof(uint32_t)); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + + *p++ = htonl(res->csr_sequenceid); + *p++ = htonl(res->csr_slotid); + *p++ = htonl(res->csr_highestslotid); + *p++ = htonl(res->csr_target_highestslotid); +out: + dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); + return status; +} + +static __be32 +preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op) +{ + if (op_nr == OP_CB_SEQUENCE) { + if (nop != 0) + return htonl(NFS4ERR_SEQUENCE_POS); + } else { + if (nop == 0) + return htonl(NFS4ERR_OP_NOT_IN_SESSION); + } + + switch (op_nr) { + case OP_CB_GETATTR: + case OP_CB_RECALL: + case OP_CB_SEQUENCE: + *op = &callback_ops[op_nr]; + break; + + case OP_CB_LAYOUTRECALL: + case OP_CB_NOTIFY_DEVICEID: + case OP_CB_NOTIFY: + case OP_CB_PUSH_DELEG: + case OP_CB_RECALL_ANY: + case OP_CB_RECALLABLE_OBJ_AVAIL: + case OP_CB_RECALL_SLOT: + case OP_CB_WANTS_CANCELLED: + case OP_CB_NOTIFY_LOCK: + return htonl(NFS4ERR_NOTSUPP); + + default: + return htonl(NFS4ERR_OP_ILLEGAL); + } + + return htonl(NFS_OK); +} + +#else /* CONFIG_NFS_V4_1 */ + +static __be32 +preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op) +{ + return htonl(NFS4ERR_MINOR_VERS_MISMATCH); +} + +#endif /* CONFIG_NFS_V4_1 */ + +static __be32 +preprocess_nfs4_op(unsigned int op_nr, struct callback_op **op) +{ + switch (op_nr) { + case OP_CB_GETATTR: + case OP_CB_RECALL: + *op = &callback_ops[op_nr]; + break; + default: + return htonl(NFS4ERR_OP_ILLEGAL); + } + + return htonl(NFS_OK); +} + +static __be32 process_op(uint32_t minorversion, int nop, + struct svc_rqst *rqstp, struct xdr_stream *xdr_in, void *argp, struct xdr_stream *xdr_out, void *resp) { struct callback_op *op = &callback_ops[0]; unsigned int op_nr = OP_CB_ILLEGAL; - __be32 status = 0; + __be32 status; long maxlen; __be32 res; dprintk("%s: start\n", __func__); status = decode_op_hdr(xdr_in, &op_nr); - if (likely(status == 0)) { - switch (op_nr) { - case OP_CB_GETATTR: - case OP_CB_RECALL: - op = &callback_ops[op_nr]; - break; - default: - op_nr = OP_CB_ILLEGAL; - op = &callback_ops[0]; - status = htonl(NFS4ERR_OP_ILLEGAL); - } + if (unlikely(status)) { + status = htonl(NFS4ERR_OP_ILLEGAL); + goto out; } + dprintk("%s: minorversion=%d nop=%d op_nr=%u\n", + __func__, minorversion, nop, op_nr); + + status = minorversion ? preprocess_nfs41_op(nop, op_nr, &op) : + preprocess_nfs4_op(op_nr, &op); + if (status == htonl(NFS4ERR_OP_ILLEGAL)) + op_nr = OP_CB_ILLEGAL; +out: maxlen = xdr_out->end - xdr_out->p; if (maxlen > 0 && maxlen < PAGE_SIZE) { if (likely(status == 0 && op->decode_args != NULL)) @@ -425,7 +652,8 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r return rpc_system_err; while (status == 0 && nops != hdr_arg.nops) { - status = process_op(rqstp, &xdr_in, argp, &xdr_out, resp); + status = process_op(hdr_arg.minorversion, nops, + rqstp, &xdr_in, argp, &xdr_out, resp); nops++; } @@ -452,7 +680,15 @@ static struct callback_op callback_ops[] = { .process_op = (callback_process_op_t)nfs4_callback_recall, .decode_args = (callback_decode_arg_t)decode_recall_args, .res_maxsize = CB_OP_RECALL_RES_MAXSZ, - } + }, +#if defined(CONFIG_NFS_V4_1) + [OP_CB_SEQUENCE] = { + .process_op = (callback_process_op_t)nfs4_callback_sequence, + .decode_args = (callback_decode_arg_t)decode_cb_sequence_args, + .encode_res = (callback_encode_res_t)encode_cb_sequence_res, + .res_maxsize = CB_OP_SEQUENCE_RES_MAXSZ, + }, +#endif /* CONFIG_NFS_V4_1 */ }; /* diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 75c9cd2..c2d0616 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -37,6 +37,7 @@ #include <linux/in6.h> #include <net/ipv6.h> #include <linux/nfs_xdr.h> +#include <linux/sunrpc/bc_xprt.h> #include <asm/system.h> @@ -102,6 +103,7 @@ struct nfs_client_initdata { size_t addrlen; const struct nfs_rpc_ops *rpc_ops; int proto; + u32 minorversion; }; /* @@ -114,18 +116,13 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ { struct nfs_client *clp; struct rpc_cred *cred; + int err = -ENOMEM; if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) goto error_0; clp->rpc_ops = cl_init->rpc_ops; - if (cl_init->rpc_ops->version == 4) { - if (nfs_callback_up() < 0) - goto error_2; - __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state); - } - atomic_set(&clp->cl_count, 1); clp->cl_cons_state = NFS_CS_INITING; @@ -133,9 +130,10 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ clp->cl_addrlen = cl_init->addrlen; if (cl_init->hostname) { + err = -ENOMEM; clp->cl_hostname = kstrdup(cl_init->hostname, GFP_KERNEL); if (!clp->cl_hostname) - goto error_3; + goto error_cleanup; } INIT_LIST_HEAD(&clp->cl_superblocks); @@ -150,6 +148,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); clp->cl_boot_time = CURRENT_TIME; clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; + clp->cl_minorversion = cl_init->minorversion; #endif cred = rpc_lookup_machine_cred(); if (!IS_ERR(cred)) @@ -159,13 +158,10 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ return clp; -error_3: - if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) - nfs_callback_down(); -error_2: +error_cleanup: kfree(clp); error_0: - return NULL; + return ERR_PTR(err); } static void nfs4_shutdown_client(struct nfs_client *clp) @@ -182,12 +178,42 @@ static void nfs4_shutdown_client(struct nfs_client *clp) } /* + * Destroy the NFS4 callback service + */ +static void nfs4_destroy_callback(struct nfs_client *clp) +{ +#ifdef CONFIG_NFS_V4 + if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) + nfs_callback_down(clp->cl_minorversion); +#endif /* CONFIG_NFS_V4 */ +} + +/* + * Clears/puts all minor version specific parts from an nfs_client struct + * reverting it to minorversion 0. + */ +static void nfs4_clear_client_minor_version(struct nfs_client *clp) +{ +#ifdef CONFIG_NFS_V4_1 + if (nfs4_has_session(clp)) { + nfs4_destroy_session(clp->cl_session); + clp->cl_session = NULL; + } + + clp->cl_call_sync = _nfs4_call_sync; +#endif /* CONFIG_NFS_V4_1 */ + + nfs4_destroy_callback(clp); +} + +/* * Destroy a shared client record */ static void nfs_free_client(struct nfs_client *clp) { dprintk("--> nfs_free_client(%u)\n", clp->rpc_ops->version); + nfs4_clear_client_minor_version(clp); nfs4_shutdown_client(clp); nfs_fscache_release_client_cookie(clp); @@ -196,9 +222,6 @@ static void nfs_free_client(struct nfs_client *clp) if (!IS_ERR(clp->cl_rpcclient)) rpc_shutdown_client(clp->cl_rpcclient); - if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) - nfs_callback_down(); - if (clp->cl_machine_cred != NULL) put_rpccred(clp->cl_machine_cred); @@ -347,7 +370,8 @@ struct nfs_client *nfs_find_client(const struct sockaddr *addr, u32 nfsversion) struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; /* Don't match clients that failed to initialise properly */ - if (clp->cl_cons_state != NFS_CS_READY) + if (!(clp->cl_cons_state == NFS_CS_READY || + clp->cl_cons_state == NFS_CS_SESSION_INITING)) continue; /* Different NFS versions cannot share the same nfs_client */ @@ -420,7 +444,9 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat if (clp->cl_proto != data->proto) continue; - + /* Match nfsv4 minorversion */ + if (clp->cl_minorversion != data->minorversion) + continue; /* Match the full socket address */ if (!nfs_sockaddr_cmp(sap, clap)) continue; @@ -456,9 +482,10 @@ static struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_in spin_unlock(&nfs_client_lock); new = nfs_alloc_client(cl_init); - } while (new); + } while (!IS_ERR(new)); - return ERR_PTR(-ENOMEM); + dprintk("--> nfs_get_client() = %ld [failed]\n", PTR_ERR(new)); + return new; /* install a new client and return with it unready */ install_client: @@ -478,7 +505,7 @@ found_client: nfs_free_client(new); error = wait_event_killable(nfs_client_active_wq, - clp->cl_cons_state != NFS_CS_INITING); + clp->cl_cons_state < NFS_CS_INITING); if (error < 0) { nfs_put_client(clp); return ERR_PTR(-ERESTARTSYS); @@ -499,13 +526,29 @@ found_client: /* * Mark a server as ready or failed */ -static void nfs_mark_client_ready(struct nfs_client *clp, int state) +void nfs_mark_client_ready(struct nfs_client *clp, int state) { clp->cl_cons_state = state; wake_up_all(&nfs_client_active_wq); } /* + * With sessions, the client is not marked ready until after a + * successful EXCHANGE_ID and CREATE_SESSION. + * + * Map errors cl_cons_state errors to EPROTONOSUPPORT to indicate + * other versions of NFS can be tried. + */ +int nfs4_check_client_ready(struct nfs_client *clp) +{ + if (!nfs4_has_session(clp)) + return 0; + if (clp->cl_cons_state < NFS_CS_READY) + return -EPROTONOSUPPORT; + return 0; +} + +/* * Initialise the timeout values for a connection */ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, @@ -1050,6 +1093,61 @@ error: #ifdef CONFIG_NFS_V4 /* + * Initialize the NFS4 callback service + */ +static int nfs4_init_callback(struct nfs_client *clp) +{ + int error; + + if (clp->rpc_ops->version == 4) { + if (nfs4_has_session(clp)) { + error = xprt_setup_backchannel( + clp->cl_rpcclient->cl_xprt, + NFS41_BC_MIN_CALLBACKS); + if (error < 0) + return error; + } + + error = nfs_callback_up(clp->cl_minorversion, + clp->cl_rpcclient->cl_xprt); + if (error < 0) { + dprintk("%s: failed to start callback. Error = %d\n", + __func__, error); + return error; + } + __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state); + } + return 0; +} + +/* + * Initialize the minor version specific parts of an NFS4 client record + */ +static int nfs4_init_client_minor_version(struct nfs_client *clp) +{ + clp->cl_call_sync = _nfs4_call_sync; + +#if defined(CONFIG_NFS_V4_1) + if (clp->cl_minorversion) { + struct nfs4_session *session = NULL; + /* + * Create the session and mark it expired. + * When a SEQUENCE operation encounters the expired session + * it will do session recovery to initialize it. + */ + session = nfs4_alloc_session(clp); + if (!session) + return -ENOMEM; + + clp->cl_session = session; + clp->cl_call_sync = _nfs4_call_sync_session; + } +#endif /* CONFIG_NFS_V4_1 */ + + return nfs4_init_callback(clp); +} + +/* * Initialise an NFS4 client record */ static int nfs4_init_client(struct nfs_client *clp, @@ -1083,7 +1181,12 @@ static int nfs4_init_client(struct nfs_client *clp, } __set_bit(NFS_CS_IDMAP, &clp->cl_res_state); - nfs_mark_client_ready(clp, NFS_CS_READY); + error = nfs4_init_client_minor_version(clp); + if (error < 0) + goto error; + + if (!nfs4_has_session(clp)) + nfs_mark_client_ready(clp, NFS_CS_READY); return 0; error: @@ -1101,7 +1204,8 @@ static int nfs4_set_client(struct nfs_server *server, const size_t addrlen, const char *ip_addr, rpc_authflavor_t authflavour, - int proto, const struct rpc_timeout *timeparms) + int proto, const struct rpc_timeout *timeparms, + u32 minorversion) { struct nfs_client_initdata cl_init = { .hostname = hostname, @@ -1109,6 +1213,7 @@ static int nfs4_set_client(struct nfs_server *server, .addrlen = addrlen, .rpc_ops = &nfs_v4_clientops, .proto = proto, + .minorversion = minorversion, }; struct nfs_client *clp; int error; @@ -1138,6 +1243,36 @@ error: } /* + * Initialize a session. + * Note: save the mount rsize and wsize for create_server negotiation. + */ +static void nfs4_init_session(struct nfs_client *clp, + unsigned int wsize, unsigned int rsize) +{ +#if defined(CONFIG_NFS_V4_1) + if (nfs4_has_session(clp)) { + clp->cl_session->fc_attrs.max_rqst_sz = wsize; + clp->cl_session->fc_attrs.max_resp_sz = rsize; + } +#endif /* CONFIG_NFS_V4_1 */ +} + +/* + * Session has been established, and the client marked ready. + * Set the mount rsize and wsize with negotiated fore channel + * attributes which will be bound checked in nfs_server_set_fsinfo. + */ +static void nfs4_session_set_rwsize(struct nfs_server *server) +{ +#ifdef CONFIG_NFS_V4_1 + if (!nfs4_has_session(server->nfs_client)) + return; + server->rsize = server->nfs_client->cl_session->fc_attrs.max_resp_sz; + server->wsize = server->nfs_client->cl_session->fc_attrs.max_rqst_sz; +#endif /* CONFIG_NFS_V4_1 */ +} + +/* * Create a version 4 volume record */ static int nfs4_init_server(struct nfs_server *server, @@ -1164,7 +1299,8 @@ static int nfs4_init_server(struct nfs_server *server, data->client_address, data->auth_flavors[0], data->nfs_server.protocol, - &timeparms); + &timeparms, + data->minorversion); if (error < 0) goto error; @@ -1214,6 +1350,8 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, BUG_ON(!server->nfs_client->rpc_ops); BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); + nfs4_init_session(server->nfs_client, server->wsize, server->rsize); + /* Probe the root fh to retrieve its FSID */ error = nfs4_path_walk(server, mntfh, data->nfs_server.export_path); if (error < 0) @@ -1224,6 +1362,8 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, (unsigned long long) server->fsid.minor); dprintk("Mount FH: %d\n", mntfh->size); + nfs4_session_set_rwsize(server); + error = nfs_probe_fsinfo(server, mntfh, &fattr); if (error < 0) goto error; @@ -1282,7 +1422,8 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, parent_client->cl_ipaddr, data->authflavor, parent_server->client->cl_xprt->prot, - parent_server->client->cl_timeout); + parent_server->client->cl_timeout, + parent_client->cl_minorversion); if (error < 0) goto error; diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 968225a..af05b91 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -68,29 +68,26 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_ { struct inode *inode = state->inode; struct file_lock *fl; - int status; + int status = 0; + + if (inode->i_flock == NULL) + goto out; + /* Protect inode->i_flock using the BKL */ + lock_kernel(); for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) continue; if (nfs_file_open_context(fl->fl_file) != ctx) continue; + unlock_kernel(); status = nfs4_lock_delegation_recall(state, fl); - if (status >= 0) - continue; - switch (status) { - default: - printk(KERN_ERR "%s: unhandled error %d.\n", - __func__, status); - case -NFS4ERR_EXPIRED: - /* kill_proc(fl->fl_pid, SIGLOST, 1); */ - case -NFS4ERR_STALE_CLIENTID: - nfs4_schedule_state_recovery(NFS_SERVER(inode)->nfs_client); - goto out_err; - } + if (status < 0) + goto out; + lock_kernel(); } - return 0; -out_err: + unlock_kernel(); +out: return status; } @@ -268,7 +265,10 @@ static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegat struct nfs_inode *nfsi = NFS_I(inode); nfs_msync_inode(inode); - /* Guard against new delegated open calls */ + /* + * Guard against new delegated open/lock/unlock calls and against + * state recovery + */ down_write(&nfsi->rwsem); nfs_delegation_claim_opens(inode, &delegation->stateid); up_write(&nfsi->rwsem); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 08f6b04..489fc01 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -259,6 +259,9 @@ static void nfs_direct_read_release(void *calldata) } static const struct rpc_call_ops nfs_read_direct_ops = { +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs_read_prepare, +#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_direct_read_result, .rpc_release = nfs_direct_read_release, }; @@ -535,6 +538,9 @@ static void nfs_direct_commit_release(void *calldata) } static const struct rpc_call_ops nfs_commit_direct_ops = { +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs_write_prepare, +#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_direct_commit_result, .rpc_release = nfs_direct_commit_release, }; @@ -673,6 +679,9 @@ out_unlock: } static const struct rpc_call_ops nfs_write_direct_ops = { +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs_write_prepare, +#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_direct_write_result, .rpc_release = nfs_direct_write_release, }; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index ec7e27d..0055b81 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -48,6 +48,9 @@ static ssize_t nfs_file_splice_read(struct file *filp, loff_t *ppos, size_t count, unsigned int flags); static ssize_t nfs_file_read(struct kiocb *, const struct iovec *iov, unsigned long nr_segs, loff_t pos); +static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, + struct file *filp, loff_t *ppos, + size_t count, unsigned int flags); static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov, unsigned long nr_segs, loff_t pos); static int nfs_file_flush(struct file *, fl_owner_t id); @@ -73,6 +76,7 @@ const struct file_operations nfs_file_operations = { .lock = nfs_lock, .flock = nfs_flock, .splice_read = nfs_file_splice_read, + .splice_write = nfs_file_splice_write, .check_flags = nfs_check_flags, .setlease = nfs_setlease, }; @@ -587,12 +591,38 @@ out_swapfile: goto out; } +static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, + struct file *filp, loff_t *ppos, + size_t count, unsigned int flags) +{ + struct dentry *dentry = filp->f_path.dentry; + struct inode *inode = dentry->d_inode; + ssize_t ret; + + dprintk("NFS splice_write(%s/%s, %lu@%llu)\n", + dentry->d_parent->d_name.name, dentry->d_name.name, + (unsigned long) count, (unsigned long long) *ppos); + + /* + * The combination of splice and an O_APPEND destination is disallowed. + */ + + nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); + + ret = generic_file_splice_write(pipe, filp, ppos, count, flags); + if (ret >= 0 && nfs_need_sync_write(filp, inode)) { + int err = nfs_do_fsync(nfs_file_open_context(filp), inode); + if (err < 0) + ret = err; + } + return ret; +} + static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) { struct inode *inode = filp->f_mapping->host; int status = 0; - lock_kernel(); /* Try local locking first */ posix_test_lock(filp, fl); if (fl->fl_type != F_UNLCK) { @@ -608,7 +638,6 @@ static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) status = NFS_PROTO(inode)->lock(filp, cmd, fl); out: - unlock_kernel(); return status; out_noconflict: fl->fl_type = F_UNLCK; @@ -650,13 +679,11 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl) * If we're signalled while cleaning up locks on process exit, we * still need to complete the unlock. */ - lock_kernel(); /* Use local locking if mounted with "-onolock" */ if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) status = NFS_PROTO(inode)->lock(filp, cmd, fl); else status = do_vfs_lock(filp, fl); - unlock_kernel(); return status; } @@ -673,13 +700,11 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl) if (status != 0) goto out; - lock_kernel(); /* Use local locking if mounted with "-onolock" */ if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) status = NFS_PROTO(inode)->lock(filp, cmd, fl); else status = do_vfs_lock(filp, fl); - unlock_kernel(); if (status < 0) goto out; /* diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index e4d6a83..7dd90a6 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -2,6 +2,7 @@ * NFS internal definitions */ +#include "nfs4_fs.h" #include <linux/mount.h> #include <linux/security.h> @@ -17,6 +18,18 @@ struct nfs_string; */ #define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1) +/* + * Determine if sessions are in use. + */ +static inline int nfs4_has_session(const struct nfs_client *clp) +{ +#ifdef CONFIG_NFS_V4_1 + if (clp->cl_session) + return 1; +#endif /* CONFIG_NFS_V4_1 */ + return 0; +} + struct nfs_clone_mount { const struct super_block *sb; const struct dentry *dentry; @@ -30,6 +43,12 @@ struct nfs_clone_mount { }; /* + * Note: RFC 1813 doesn't limit the number of auth flavors that + * a server can return, so make something up. + */ +#define NFS_MAX_SECFLAVORS (12) + +/* * In-kernel mount arguments */ struct nfs_parsed_mount_data { @@ -44,6 +63,7 @@ struct nfs_parsed_mount_data { unsigned int auth_flavor_len; rpc_authflavor_t auth_flavors[1]; char *client_address; + unsigned int minorversion; char *fscache_uniq; struct { @@ -77,6 +97,8 @@ struct nfs_mount_request { unsigned short protocol; struct nfs_fh *fh; int noresvport; + unsigned int *auth_flav_len; + rpc_authflavor_t *auth_flavs; }; extern int nfs_mount(struct nfs_mount_request *info); @@ -99,6 +121,8 @@ extern void nfs_free_server(struct nfs_server *server); extern struct nfs_server *nfs_clone_server(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *); +extern void nfs_mark_client_ready(struct nfs_client *clp, int state); +extern int nfs4_check_client_ready(struct nfs_client *clp); #ifdef CONFIG_PROC_FS extern int __init nfs_fs_proc_init(void); extern void nfs_fs_proc_exit(void); @@ -146,6 +170,20 @@ extern __be32 * nfs_decode_dirent(__be32 *, struct nfs_entry *, int); extern struct rpc_procinfo nfs3_procedures[]; extern __be32 *nfs3_decode_dirent(__be32 *, struct nfs_entry *, int); +/* nfs4proc.c */ +static inline void nfs4_restart_rpc(struct rpc_task *task, + const struct nfs_client *clp) +{ +#ifdef CONFIG_NFS_V4_1 + if (nfs4_has_session(clp) && + test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)) { + rpc_restart_call_prepare(task); + return; + } +#endif /* CONFIG_NFS_V4_1 */ + rpc_restart_call(task); +} + /* nfs4xdr.c */ #ifdef CONFIG_NFS_V4 extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus); @@ -205,6 +243,38 @@ extern int nfs4_path_walk(struct nfs_server *server, const char *path); #endif +/* read.c */ +extern void nfs_read_prepare(struct rpc_task *task, void *calldata); + +/* write.c */ +extern void nfs_write_prepare(struct rpc_task *task, void *calldata); + +/* nfs4proc.c */ +extern int _nfs4_call_sync(struct nfs_server *server, + struct rpc_message *msg, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + int cache_reply); +extern int _nfs4_call_sync_session(struct nfs_server *server, + struct rpc_message *msg, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + int cache_reply); + +#ifdef CONFIG_NFS_V4_1 +extern void nfs41_sequence_free_slot(const struct nfs_client *, + struct nfs4_sequence_res *res); +#endif /* CONFIG_NFS_V4_1 */ + +static inline void nfs4_sequence_free_slot(const struct nfs_client *clp, + struct nfs4_sequence_res *res) +{ +#ifdef CONFIG_NFS_V4_1 + if (nfs4_has_session(clp)) + nfs41_sequence_free_slot(clp, res); +#endif /* CONFIG_NFS_V4_1 */ +} + /* * Determine the device name as a string */ diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index ca905a5..38ef9ea 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -20,8 +20,116 @@ # define NFSDBG_FACILITY NFSDBG_MOUNT #endif +/* + * Defined by RFC 1094, section A.3; and RFC 1813, section 5.1.4 + */ +#define MNTPATHLEN (1024) + +/* + * XDR data type sizes + */ +#define encode_dirpath_sz (1 + XDR_QUADLEN(MNTPATHLEN)) +#define MNT_status_sz (1) +#define MNT_fhs_status_sz (1) +#define MNT_fhandle_sz XDR_QUADLEN(NFS2_FHSIZE) +#define MNT_fhandle3_sz (1 + XDR_QUADLEN(NFS3_FHSIZE)) +#define MNT_authflav3_sz (1 + NFS_MAX_SECFLAVORS) + +/* + * XDR argument and result sizes + */ +#define MNT_enc_dirpath_sz encode_dirpath_sz +#define MNT_dec_mountres_sz (MNT_status_sz + MNT_fhandle_sz) +#define MNT_dec_mountres3_sz (MNT_status_sz + MNT_fhandle_sz + \ + MNT_authflav3_sz) + +/* + * Defined by RFC 1094, section A.5 + */ +enum { + MOUNTPROC_NULL = 0, + MOUNTPROC_MNT = 1, + MOUNTPROC_DUMP = 2, + MOUNTPROC_UMNT = 3, + MOUNTPROC_UMNTALL = 4, + MOUNTPROC_EXPORT = 5, +}; + +/* + * Defined by RFC 1813, section 5.2 + */ +enum { + MOUNTPROC3_NULL = 0, + MOUNTPROC3_MNT = 1, + MOUNTPROC3_DUMP = 2, + MOUNTPROC3_UMNT = 3, + MOUNTPROC3_UMNTALL = 4, + MOUNTPROC3_EXPORT = 5, +}; + static struct rpc_program mnt_program; +/* + * Defined by OpenGroup XNFS Version 3W, chapter 8 + */ +enum mountstat { + MNT_OK = 0, + MNT_EPERM = 1, + MNT_ENOENT = 2, + MNT_EACCES = 13, + MNT_EINVAL = 22, +}; + +static struct { + u32 status; + int errno; +} mnt_errtbl[] = { + { .status = MNT_OK, .errno = 0, }, + { .status = MNT_EPERM, .errno = -EPERM, }, + { .status = MNT_ENOENT, .errno = -ENOENT, }, + { .status = MNT_EACCES, .errno = -EACCES, }, + { .status = MNT_EINVAL, .errno = -EINVAL, }, +}; + +/* + * Defined by RFC 1813, section 5.1.5 + */ +enum mountstat3 { + MNT3_OK = 0, /* no error */ + MNT3ERR_PERM = 1, /* Not owner */ + MNT3ERR_NOENT = 2, /* No such file or directory */ + MNT3ERR_IO = 5, /* I/O error */ + MNT3ERR_ACCES = 13, /* Permission denied */ + MNT3ERR_NOTDIR = 20, /* Not a directory */ + MNT3ERR_INVAL = 22, /* Invalid argument */ + MNT3ERR_NAMETOOLONG = 63, /* Filename too long */ + MNT3ERR_NOTSUPP = 10004, /* Operation not supported */ + MNT3ERR_SERVERFAULT = 10006, /* A failure on the server */ +}; + +static struct { + u32 status; + int errno; +} mnt3_errtbl[] = { + { .status = MNT3_OK, .errno = 0, }, + { .status = MNT3ERR_PERM, .errno = -EPERM, }, + { .status = MNT3ERR_NOENT, .errno = -ENOENT, }, + { .status = MNT3ERR_IO, .errno = -EIO, }, + { .status = MNT3ERR_ACCES, .errno = -EACCES, }, + { .status = MNT3ERR_NOTDIR, .errno = -ENOTDIR, }, + { .status = MNT3ERR_INVAL, .errno = -EINVAL, }, + { .status = MNT3ERR_NAMETOOLONG, .errno = -ENAMETOOLONG, }, + { .status = MNT3ERR_NOTSUPP, .errno = -ENOTSUPP, }, + { .status = MNT3ERR_SERVERFAULT, .errno = -ESERVERFAULT, }, +}; + +struct mountres { + int errno; + struct nfs_fh *fh; + unsigned int *auth_count; + rpc_authflavor_t *auth_flavors; +}; + struct mnt_fhstatus { u32 status; struct nfs_fh *fh; @@ -35,8 +143,10 @@ struct mnt_fhstatus { */ int nfs_mount(struct nfs_mount_request *info) { - struct mnt_fhstatus result = { - .fh = info->fh + struct mountres result = { + .fh = info->fh, + .auth_count = info->auth_flav_len, + .auth_flavors = info->auth_flavs, }; struct rpc_message msg = { .rpc_argp = info->dirpath, @@ -68,14 +178,14 @@ int nfs_mount(struct nfs_mount_request *info) if (info->version == NFS_MNT3_VERSION) msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT]; else - msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT]; + msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC_MNT]; status = rpc_call_sync(mnt_clnt, &msg, 0); rpc_shutdown_client(mnt_clnt); if (status < 0) goto out_call_err; - if (result.status != 0) + if (result.errno != 0) goto out_mnt_err; dprintk("NFS: MNT request succeeded\n"); @@ -86,72 +196,215 @@ out: out_clnt_err: status = PTR_ERR(mnt_clnt); - dprintk("NFS: failed to create RPC client, status=%d\n", status); + dprintk("NFS: failed to create MNT RPC client, status=%d\n", status); goto out; out_call_err: - dprintk("NFS: failed to start MNT request, status=%d\n", status); + dprintk("NFS: MNT request failed, status=%d\n", status); goto out; out_mnt_err: - dprintk("NFS: MNT server returned result %d\n", result.status); - status = nfs_stat_to_errno(result.status); + dprintk("NFS: MNT server returned result %d\n", result.errno); + status = result.errno; goto out; } /* * XDR encode/decode functions for MOUNT */ -static int xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p, - const char *path) + +static int encode_mntdirpath(struct xdr_stream *xdr, const char *pathname) +{ + const u32 pathname_len = strlen(pathname); + __be32 *p; + + if (unlikely(pathname_len > MNTPATHLEN)) + return -EIO; + + p = xdr_reserve_space(xdr, sizeof(u32) + pathname_len); + if (unlikely(p == NULL)) + return -EIO; + xdr_encode_opaque(p, pathname, pathname_len); + + return 0; +} + +static int mnt_enc_dirpath(struct rpc_rqst *req, __be32 *p, + const char *dirpath) +{ + struct xdr_stream xdr; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + return encode_mntdirpath(&xdr, dirpath); +} + +/* + * RFC 1094: "A non-zero status indicates some sort of error. In this + * case, the status is a UNIX error number." This can be problematic + * if the server and client use different errno values for the same + * error. + * + * However, the OpenGroup XNFS spec provides a simple mapping that is + * independent of local errno values on the server and the client. + */ +static int decode_status(struct xdr_stream *xdr, struct mountres *res) { - p = xdr_encode_string(p, path); + unsigned int i; + u32 status; + __be32 *p; + + p = xdr_inline_decode(xdr, sizeof(status)); + if (unlikely(p == NULL)) + return -EIO; + status = ntohl(*p); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + for (i = 0; i <= ARRAY_SIZE(mnt_errtbl); i++) { + if (mnt_errtbl[i].status == status) { + res->errno = mnt_errtbl[i].errno; + return 0; + } + } + + dprintk("NFS: unrecognized MNT status code: %u\n", status); + res->errno = -EACCES; return 0; } -static int xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p, - struct mnt_fhstatus *res) +static int decode_fhandle(struct xdr_stream *xdr, struct mountres *res) { struct nfs_fh *fh = res->fh; + __be32 *p; + + p = xdr_inline_decode(xdr, NFS2_FHSIZE); + if (unlikely(p == NULL)) + return -EIO; + + fh->size = NFS2_FHSIZE; + memcpy(fh->data, p, NFS2_FHSIZE); + return 0; +} + +static int mnt_dec_mountres(struct rpc_rqst *req, __be32 *p, + struct mountres *res) +{ + struct xdr_stream xdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + + status = decode_status(&xdr, res); + if (unlikely(status != 0 || res->errno != 0)) + return status; + return decode_fhandle(&xdr, res); +} + +static int decode_fhs_status(struct xdr_stream *xdr, struct mountres *res) +{ + unsigned int i; + u32 status; + __be32 *p; - if ((res->status = ntohl(*p++)) == 0) { - fh->size = NFS2_FHSIZE; - memcpy(fh->data, p, NFS2_FHSIZE); + p = xdr_inline_decode(xdr, sizeof(status)); + if (unlikely(p == NULL)) + return -EIO; + status = ntohl(*p); + + for (i = 0; i <= ARRAY_SIZE(mnt3_errtbl); i++) { + if (mnt3_errtbl[i].status == status) { + res->errno = mnt3_errtbl[i].errno; + return 0; + } } + + dprintk("NFS: unrecognized MNT3 status code: %u\n", status); + res->errno = -EACCES; return 0; } -static int xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, - struct mnt_fhstatus *res) +static int decode_fhandle3(struct xdr_stream *xdr, struct mountres *res) { struct nfs_fh *fh = res->fh; - unsigned size; - - if ((res->status = ntohl(*p++)) == 0) { - size = ntohl(*p++); - if (size <= NFS3_FHSIZE && size != 0) { - fh->size = size; - memcpy(fh->data, p, size); - } else - res->status = -EBADHANDLE; + u32 size; + __be32 *p; + + p = xdr_inline_decode(xdr, sizeof(size)); + if (unlikely(p == NULL)) + return -EIO; + + size = ntohl(*p++); + if (size > NFS3_FHSIZE || size == 0) + return -EIO; + + p = xdr_inline_decode(xdr, size); + if (unlikely(p == NULL)) + return -EIO; + + fh->size = size; + memcpy(fh->data, p, size); + return 0; +} + +static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res) +{ + rpc_authflavor_t *flavors = res->auth_flavors; + unsigned int *count = res->auth_count; + u32 entries, i; + __be32 *p; + + if (*count == 0) + return 0; + + p = xdr_inline_decode(xdr, sizeof(entries)); + if (unlikely(p == NULL)) + return -EIO; + entries = ntohl(*p); + dprintk("NFS: received %u auth flavors\n", entries); + if (entries > NFS_MAX_SECFLAVORS) + entries = NFS_MAX_SECFLAVORS; + + p = xdr_inline_decode(xdr, sizeof(u32) * entries); + if (unlikely(p == NULL)) + return -EIO; + + if (entries > *count) + entries = *count; + + for (i = 0; i < entries; i++) { + flavors[i] = ntohl(*p++); + dprintk("NFS:\tflavor %u: %d\n", i, flavors[i]); } + *count = i; + return 0; } -#define MNT_dirpath_sz (1 + 256) -#define MNT_fhstatus_sz (1 + 8) -#define MNT_fhstatus3_sz (1 + 16) +static int mnt_dec_mountres3(struct rpc_rqst *req, __be32 *p, + struct mountres *res) +{ + struct xdr_stream xdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + + status = decode_fhs_status(&xdr, res); + if (unlikely(status != 0 || res->errno != 0)) + return status; + status = decode_fhandle3(&xdr, res); + if (unlikely(status != 0)) { + res->errno = -EBADHANDLE; + return 0; + } + return decode_auth_flavors(&xdr, res); +} static struct rpc_procinfo mnt_procedures[] = { - [MNTPROC_MNT] = { - .p_proc = MNTPROC_MNT, - .p_encode = (kxdrproc_t) xdr_encode_dirpath, - .p_decode = (kxdrproc_t) xdr_decode_fhstatus, - .p_arglen = MNT_dirpath_sz, - .p_replen = MNT_fhstatus_sz, - .p_statidx = MNTPROC_MNT, + [MOUNTPROC_MNT] = { + .p_proc = MOUNTPROC_MNT, + .p_encode = (kxdrproc_t)mnt_enc_dirpath, + .p_decode = (kxdrproc_t)mnt_dec_mountres, + .p_arglen = MNT_enc_dirpath_sz, + .p_replen = MNT_dec_mountres_sz, + .p_statidx = MOUNTPROC_MNT, .p_name = "MOUNT", }, }; @@ -159,10 +412,10 @@ static struct rpc_procinfo mnt_procedures[] = { static struct rpc_procinfo mnt3_procedures[] = { [MOUNTPROC3_MNT] = { .p_proc = MOUNTPROC3_MNT, - .p_encode = (kxdrproc_t) xdr_encode_dirpath, - .p_decode = (kxdrproc_t) xdr_decode_fhstatus3, - .p_arglen = MNT_dirpath_sz, - .p_replen = MNT_fhstatus3_sz, + .p_encode = (kxdrproc_t)mnt_enc_dirpath, + .p_decode = (kxdrproc_t)mnt_dec_mountres3, + .p_arglen = MNT_enc_dirpath_sz, + .p_replen = MNT_dec_mountres3_sz, .p_statidx = MOUNTPROC3_MNT, .p_name = "MOUNT", }, diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index f01caec..40c7667 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -65,6 +65,11 @@ char *nfs_path(const char *base, dentry = dentry->d_parent; } spin_unlock(&dcache_lock); + if (*end != '/') { + if (--buflen < 0) + goto Elong; + *--end = '/'; + } namelen = strlen(base); /* Strip off excess slashes in base string */ while (namelen > 0 && base[namelen - 1] == '/') diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 6bbf0e6..bac6051 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -207,8 +207,6 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) status = nfs_revalidate_inode(server, inode); if (status < 0) return ERR_PTR(status); - if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL) - nfs_zap_acl_cache(inode); acl = nfs3_get_cached_acl(inode, type); if (acl != ERR_PTR(-EAGAIN)) return acl; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 84345de..61bc3a3 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -44,6 +44,7 @@ enum nfs4_client_state { NFS4CLNT_RECLAIM_REBOOT, NFS4CLNT_RECLAIM_NOGRACE, NFS4CLNT_DELEGRETURN, + NFS4CLNT_SESSION_SETUP, }; /* @@ -177,6 +178,14 @@ struct nfs4_state_recovery_ops { int state_flag_bit; int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *); int (*recover_lock)(struct nfs4_state *, struct file_lock *); + int (*establish_clid)(struct nfs_client *, struct rpc_cred *); + struct rpc_cred * (*get_clid_cred)(struct nfs_client *); +}; + +struct nfs4_state_maintenance_ops { + int (*sched_state_renewal)(struct nfs_client *, struct rpc_cred *); + struct rpc_cred * (*get_state_renewal_cred_locked)(struct nfs_client *); + int (*renew_lease)(struct nfs_client *, struct rpc_cred *); }; extern const struct dentry_operations nfs4_dentry_operations; @@ -193,6 +202,7 @@ extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struc extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *); extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *); extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); +extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); extern int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait); extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); @@ -200,8 +210,26 @@ extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fh extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, struct nfs4_fs_locations *fs_locations, struct page *page); -extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops; -extern struct nfs4_state_recovery_ops nfs4_nograce_recovery_ops; +extern struct nfs4_state_recovery_ops *nfs4_reboot_recovery_ops[]; +extern struct nfs4_state_recovery_ops *nfs4_nograce_recovery_ops[]; +#if defined(CONFIG_NFS_V4_1) +extern int nfs4_setup_sequence(struct nfs_client *clp, + struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, + int cache_reply, struct rpc_task *task); +extern void nfs4_destroy_session(struct nfs4_session *session); +extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); +extern int nfs4_proc_create_session(struct nfs_client *, int reset); +extern int nfs4_proc_destroy_session(struct nfs4_session *); +#else /* CONFIG_NFS_v4_1 */ +static inline int nfs4_setup_sequence(struct nfs_client *clp, + struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, + int cache_reply, struct rpc_task *task) +{ + return 0; +} +#endif /* CONFIG_NFS_V4_1 */ + +extern struct nfs4_state_maintenance_ops *nfs4_state_renewal_ops[]; extern const u32 nfs4_fattr_bitmap[2]; extern const u32 nfs4_statfs_bitmap[2]; @@ -216,7 +244,12 @@ extern void nfs4_kill_renewd(struct nfs_client *); extern void nfs4_renew_state(struct work_struct *); /* nfs4state.c */ +struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp); struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp); +#if defined(CONFIG_NFS_V4_1) +struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp); +struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp); +#endif /* CONFIG_NFS_V4_1 */ extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); extern void nfs4_put_state_owner(struct nfs4_state_owner *); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4674f80..92ce435 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -48,11 +48,14 @@ #include <linux/smp_lock.h> #include <linux/namei.h> #include <linux/mount.h> +#include <linux/module.h> +#include <linux/sunrpc/bc_xprt.h> #include "nfs4_fs.h" #include "delegation.h" #include "internal.h" #include "iostat.h" +#include "callback.h" #define NFSDBG_FACILITY NFSDBG_PROC @@ -247,7 +250,25 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, ret = nfs4_wait_clnt_recover(clp); if (ret == 0) exception->retry = 1; +#if !defined(CONFIG_NFS_V4_1) break; +#else /* !defined(CONFIG_NFS_V4_1) */ + if (!nfs4_has_session(server->nfs_client)) + break; + /* FALLTHROUGH */ + case -NFS4ERR_BADSESSION: + case -NFS4ERR_BADSLOT: + case -NFS4ERR_BAD_HIGH_SLOT: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + case -NFS4ERR_DEADSESSION: + case -NFS4ERR_SEQ_FALSE_RETRY: + case -NFS4ERR_SEQ_MISORDERED: + dprintk("%s ERROR: %d Reset session\n", __func__, + errorcode); + set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state); + exception->retry = 1; + /* FALLTHROUGH */ +#endif /* !defined(CONFIG_NFS_V4_1) */ case -NFS4ERR_FILE_OPEN: case -NFS4ERR_GRACE: case -NFS4ERR_DELAY: @@ -271,6 +292,353 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp spin_unlock(&clp->cl_lock); } +#if defined(CONFIG_NFS_V4_1) + +/* + * nfs4_free_slot - free a slot and efficiently update slot table. + * + * freeing a slot is trivially done by clearing its respective bit + * in the bitmap. + * If the freed slotid equals highest_used_slotid we want to update it + * so that the server would be able to size down the slot table if needed, + * otherwise we know that the highest_used_slotid is still in use. + * When updating highest_used_slotid there may be "holes" in the bitmap + * so we need to scan down from highest_used_slotid to 0 looking for the now + * highest slotid in use. + * If none found, highest_used_slotid is set to -1. + */ +static void +nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid) +{ + int slotid = free_slotid; + + spin_lock(&tbl->slot_tbl_lock); + /* clear used bit in bitmap */ + __clear_bit(slotid, tbl->used_slots); + + /* update highest_used_slotid when it is freed */ + if (slotid == tbl->highest_used_slotid) { + slotid = find_last_bit(tbl->used_slots, tbl->max_slots); + if (slotid >= 0 && slotid < tbl->max_slots) + tbl->highest_used_slotid = slotid; + else + tbl->highest_used_slotid = -1; + } + rpc_wake_up_next(&tbl->slot_tbl_waitq); + spin_unlock(&tbl->slot_tbl_lock); + dprintk("%s: free_slotid %u highest_used_slotid %d\n", __func__, + free_slotid, tbl->highest_used_slotid); +} + +void nfs41_sequence_free_slot(const struct nfs_client *clp, + struct nfs4_sequence_res *res) +{ + struct nfs4_slot_table *tbl; + + if (!nfs4_has_session(clp)) { + dprintk("%s: No session\n", __func__); + return; + } + tbl = &clp->cl_session->fc_slot_table; + if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) { + dprintk("%s: No slot\n", __func__); + /* just wake up the next guy waiting since + * we may have not consumed a slot after all */ + rpc_wake_up_next(&tbl->slot_tbl_waitq); + return; + } + nfs4_free_slot(tbl, res->sr_slotid); + res->sr_slotid = NFS4_MAX_SLOT_TABLE; +} + +static void nfs41_sequence_done(struct nfs_client *clp, + struct nfs4_sequence_res *res, + int rpc_status) +{ + unsigned long timestamp; + struct nfs4_slot_table *tbl; + struct nfs4_slot *slot; + + /* + * sr_status remains 1 if an RPC level error occurred. The server + * may or may not have processed the sequence operation.. + * Proceed as if the server received and processed the sequence + * operation. + */ + if (res->sr_status == 1) + res->sr_status = NFS_OK; + + /* -ERESTARTSYS can result in skipping nfs41_sequence_setup */ + if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) + goto out; + + tbl = &clp->cl_session->fc_slot_table; + slot = tbl->slots + res->sr_slotid; + + if (res->sr_status == 0) { + /* Update the slot's sequence and clientid lease timer */ + ++slot->seq_nr; + timestamp = res->sr_renewal_time; + spin_lock(&clp->cl_lock); + if (time_before(clp->cl_last_renewal, timestamp)) + clp->cl_last_renewal = timestamp; + spin_unlock(&clp->cl_lock); + return; + } +out: + /* The session may be reset by one of the error handlers. */ + dprintk("%s: Error %d free the slot \n", __func__, res->sr_status); + nfs41_sequence_free_slot(clp, res); +} + +/* + * nfs4_find_slot - efficiently look for a free slot + * + * nfs4_find_slot looks for an unset bit in the used_slots bitmap. + * If found, we mark the slot as used, update the highest_used_slotid, + * and respectively set up the sequence operation args. + * The slot number is returned if found, or NFS4_MAX_SLOT_TABLE otherwise. + * + * Note: must be called with under the slot_tbl_lock. + */ +static u8 +nfs4_find_slot(struct nfs4_slot_table *tbl, struct rpc_task *task) +{ + int slotid; + u8 ret_id = NFS4_MAX_SLOT_TABLE; + BUILD_BUG_ON((u8)NFS4_MAX_SLOT_TABLE != (int)NFS4_MAX_SLOT_TABLE); + + dprintk("--> %s used_slots=%04lx highest_used=%d max_slots=%d\n", + __func__, tbl->used_slots[0], tbl->highest_used_slotid, + tbl->max_slots); + slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slots); + if (slotid >= tbl->max_slots) + goto out; + __set_bit(slotid, tbl->used_slots); + if (slotid > tbl->highest_used_slotid) + tbl->highest_used_slotid = slotid; + ret_id = slotid; +out: + dprintk("<-- %s used_slots=%04lx highest_used=%d slotid=%d \n", + __func__, tbl->used_slots[0], tbl->highest_used_slotid, ret_id); + return ret_id; +} + +static int nfs4_recover_session(struct nfs4_session *session) +{ + struct nfs_client *clp = session->clp; + int ret; + + for (;;) { + ret = nfs4_wait_clnt_recover(clp); + if (ret != 0) + return ret; + if (!test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)) + break; + nfs4_schedule_state_manager(clp); + } + return 0; +} + +static int nfs41_setup_sequence(struct nfs4_session *session, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + int cache_reply, + struct rpc_task *task) +{ + struct nfs4_slot *slot; + struct nfs4_slot_table *tbl; + int status = 0; + u8 slotid; + + dprintk("--> %s\n", __func__); + /* slot already allocated? */ + if (res->sr_slotid != NFS4_MAX_SLOT_TABLE) + return 0; + + memset(res, 0, sizeof(*res)); + res->sr_slotid = NFS4_MAX_SLOT_TABLE; + tbl = &session->fc_slot_table; + + spin_lock(&tbl->slot_tbl_lock); + if (test_bit(NFS4CLNT_SESSION_SETUP, &session->clp->cl_state)) { + if (tbl->highest_used_slotid != -1) { + rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); + spin_unlock(&tbl->slot_tbl_lock); + dprintk("<-- %s: Session reset: draining\n", __func__); + return -EAGAIN; + } + + /* The slot table is empty; start the reset thread */ + dprintk("%s Session Reset\n", __func__); + spin_unlock(&tbl->slot_tbl_lock); + status = nfs4_recover_session(session); + if (status) + return status; + spin_lock(&tbl->slot_tbl_lock); + } + + slotid = nfs4_find_slot(tbl, task); + if (slotid == NFS4_MAX_SLOT_TABLE) { + rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); + spin_unlock(&tbl->slot_tbl_lock); + dprintk("<-- %s: no free slots\n", __func__); + return -EAGAIN; + } + spin_unlock(&tbl->slot_tbl_lock); + + slot = tbl->slots + slotid; + args->sa_session = session; + args->sa_slotid = slotid; + args->sa_cache_this = cache_reply; + + dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr); + + res->sr_session = session; + res->sr_slotid = slotid; + res->sr_renewal_time = jiffies; + /* + * sr_status is only set in decode_sequence, and so will remain + * set to 1 if an rpc level failure occurs. + */ + res->sr_status = 1; + return 0; +} + +int nfs4_setup_sequence(struct nfs_client *clp, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + int cache_reply, + struct rpc_task *task) +{ + int ret = 0; + + dprintk("--> %s clp %p session %p sr_slotid %d\n", + __func__, clp, clp->cl_session, res->sr_slotid); + + if (!nfs4_has_session(clp)) + goto out; + ret = nfs41_setup_sequence(clp->cl_session, args, res, cache_reply, + task); + if (ret != -EAGAIN) { + /* terminate rpc task */ + task->tk_status = ret; + task->tk_action = NULL; + } +out: + dprintk("<-- %s status=%d\n", __func__, ret); + return ret; +} + +struct nfs41_call_sync_data { + struct nfs_client *clp; + struct nfs4_sequence_args *seq_args; + struct nfs4_sequence_res *seq_res; + int cache_reply; +}; + +static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs41_call_sync_data *data = calldata; + + dprintk("--> %s data->clp->cl_session %p\n", __func__, + data->clp->cl_session); + if (nfs4_setup_sequence(data->clp, data->seq_args, + data->seq_res, data->cache_reply, task)) + return; + rpc_call_start(task); +} + +static void nfs41_call_sync_done(struct rpc_task *task, void *calldata) +{ + struct nfs41_call_sync_data *data = calldata; + + nfs41_sequence_done(data->clp, data->seq_res, task->tk_status); + nfs41_sequence_free_slot(data->clp, data->seq_res); +} + +struct rpc_call_ops nfs41_call_sync_ops = { + .rpc_call_prepare = nfs41_call_sync_prepare, + .rpc_call_done = nfs41_call_sync_done, +}; + +static int nfs4_call_sync_sequence(struct nfs_client *clp, + struct rpc_clnt *clnt, + struct rpc_message *msg, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + int cache_reply) +{ + int ret; + struct rpc_task *task; + struct nfs41_call_sync_data data = { + .clp = clp, + .seq_args = args, + .seq_res = res, + .cache_reply = cache_reply, + }; + struct rpc_task_setup task_setup = { + .rpc_client = clnt, + .rpc_message = msg, + .callback_ops = &nfs41_call_sync_ops, + .callback_data = &data + }; + + res->sr_slotid = NFS4_MAX_SLOT_TABLE; + task = rpc_run_task(&task_setup); + if (IS_ERR(task)) + ret = PTR_ERR(task); + else { + ret = task->tk_status; + rpc_put_task(task); + } + return ret; +} + +int _nfs4_call_sync_session(struct nfs_server *server, + struct rpc_message *msg, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + int cache_reply) +{ + return nfs4_call_sync_sequence(server->nfs_client, server->client, + msg, args, res, cache_reply); +} + +#endif /* CONFIG_NFS_V4_1 */ + +int _nfs4_call_sync(struct nfs_server *server, + struct rpc_message *msg, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + int cache_reply) +{ + args->sa_session = res->sr_session = NULL; + return rpc_call_sync(server->client, msg, 0); +} + +#define nfs4_call_sync(server, msg, args, res, cache_reply) \ + (server)->nfs_client->cl_call_sync((server), (msg), &(args)->seq_args, \ + &(res)->seq_res, (cache_reply)) + +static void nfs4_sequence_done(const struct nfs_server *server, + struct nfs4_sequence_res *res, int rpc_status) +{ +#ifdef CONFIG_NFS_V4_1 + if (nfs4_has_session(server->nfs_client)) + nfs41_sequence_done(server->nfs_client, res, rpc_status); +#endif /* CONFIG_NFS_V4_1 */ +} + +/* no restart, therefore free slot here */ +static void nfs4_sequence_done_free_slot(const struct nfs_server *server, + struct nfs4_sequence_res *res, + int rpc_status) +{ + nfs4_sequence_done(server, res, rpc_status); + nfs4_sequence_free_slot(server->nfs_client, res); +} + static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) { struct nfs_inode *nfsi = NFS_I(dir); @@ -312,6 +680,7 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p) p->o_res.server = p->o_arg.server; nfs_fattr_init(&p->f_attr); nfs_fattr_init(&p->dir_attr); + p->o_res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; } static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, @@ -804,16 +1173,30 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state err = _nfs4_open_delegation_recall(ctx, state, stateid); switch (err) { case 0: - return err; + case -ENOENT: + case -ESTALE: + goto out; case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: /* Don't recall a delegation if it was lost */ nfs4_schedule_state_recovery(server->nfs_client); - return err; + goto out; + case -ERESTARTSYS: + /* + * The show must go on: exit, but mark the + * stateid as needing recovery. + */ + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_BAD_STATEID: + nfs4_state_mark_reclaim_nograce(server->nfs_client, state); + case -ENOMEM: + err = 0; + goto out; } err = nfs4_handle_exception(server, err, &exception); } while (exception.retry); +out: return err; } @@ -929,6 +1312,10 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) nfs_copy_fh(&data->o_res.fh, data->o_arg.fh); } data->timestamp = jiffies; + if (nfs4_setup_sequence(data->o_arg.server->nfs_client, + &data->o_arg.seq_args, + &data->o_res.seq_res, 1, task)) + return; rpc_call_start(task); return; out_no_action: @@ -941,6 +1328,10 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata) struct nfs4_opendata *data = calldata; data->rpc_status = task->tk_status; + + nfs4_sequence_done_free_slot(data->o_arg.server, &data->o_res.seq_res, + task->tk_status); + if (RPC_ASSASSINATED(task)) return; if (task->tk_status == 0) { @@ -1269,7 +1660,7 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, } else memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); - status = rpc_call_sync(server->client, &msg, 0); + status = nfs4_call_sync(server, &msg, &arg, &res, 1); if (status == 0 && state != NULL) renew_lease(server, timestamp); return status; @@ -1318,6 +1709,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) struct nfs4_state *state = calldata->state; struct nfs_server *server = NFS_SERVER(calldata->inode); + nfs4_sequence_done(server, &calldata->res.seq_res, task->tk_status); if (RPC_ASSASSINATED(task)) return; /* hmm. we are done with the inode, and in the process of freeing @@ -1336,10 +1728,11 @@ static void nfs4_close_done(struct rpc_task *task, void *data) break; default: if (nfs4_async_handle_error(task, server, state) == -EAGAIN) { - rpc_restart_call(task); + nfs4_restart_rpc(task, server->nfs_client); return; } } + nfs4_sequence_free_slot(server->nfs_client, &calldata->res.seq_res); nfs_refresh_inode(calldata->inode, calldata->res.fattr); } @@ -1380,6 +1773,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) calldata->arg.fmode = FMODE_WRITE; } calldata->timestamp = jiffies; + if (nfs4_setup_sequence((NFS_SERVER(calldata->inode))->nfs_client, + &calldata->arg.seq_args, &calldata->res.seq_res, + 1, task)) + return; rpc_call_start(task); } @@ -1419,13 +1816,15 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait) }; int status = -ENOMEM; - calldata = kmalloc(sizeof(*calldata), GFP_KERNEL); + calldata = kzalloc(sizeof(*calldata), GFP_KERNEL); if (calldata == NULL) goto out; calldata->inode = state->inode; calldata->state = state; calldata->arg.fh = NFS_FH(state->inode); calldata->arg.stateid = &state->open_stateid; + if (nfs4_has_session(server->nfs_client)) + memset(calldata->arg.stateid->data, 0, 4); /* clear seqid */ /* Serialization for the sequence id */ calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid); if (calldata->arg.seqid == NULL) @@ -1435,6 +1834,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait) calldata->res.fattr = &calldata->fattr; calldata->res.seqid = calldata->arg.seqid; calldata->res.server = server; + calldata->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; calldata->path.mnt = mntget(path->mnt); calldata->path.dentry = dget(path->dentry); @@ -1584,15 +1984,18 @@ void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) { + struct nfs4_server_caps_arg args = { + .fhandle = fhandle, + }; struct nfs4_server_caps_res res = {}; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SERVER_CAPS], - .rpc_argp = fhandle, + .rpc_argp = &args, .rpc_resp = &res, }; int status; - status = rpc_call_sync(server->client, &msg, 0); + status = nfs4_call_sync(server, &msg, &args, &res, 0); if (status == 0) { memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); if (res.attr_bitmask[0] & FATTR4_WORD0_ACL) @@ -1606,6 +2009,7 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; server->acl_bitmask = res.acl_bitmask; } + return status; } @@ -1637,8 +2041,15 @@ static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, .rpc_argp = &args, .rpc_resp = &res, }; + int status; + nfs_fattr_init(info->fattr); - return rpc_call_sync(server->client, &msg, 0); + status = nfs4_recover_expired_lease(server); + if (!status) + status = nfs4_check_client_ready(server->nfs_client); + if (!status) + status = nfs4_call_sync(server, &msg, &args, &res, 0); + return status; } static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, @@ -1728,7 +2139,7 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, }; nfs_fattr_init(fattr); - return rpc_call_sync(server->client, &msg, 0); + return nfs4_call_sync(server, &msg, &args, &res, 0); } static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) @@ -1812,7 +2223,7 @@ static int _nfs4_proc_lookupfh(struct nfs_server *server, const struct nfs_fh *d nfs_fattr_init(fattr); dprintk("NFS call lookupfh %s\n", name->name); - status = rpc_call_sync(server->client, &msg, 0); + status = nfs4_call_sync(server, &msg, &args, &res, 0); dprintk("NFS reply lookupfh: %d\n", status); return status; } @@ -1898,7 +2309,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry args.access |= NFS4_ACCESS_EXECUTE; } nfs_fattr_init(&fattr); - status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + status = nfs4_call_sync(server, &msg, &args, &res, 0); if (!status) { entry->mask = 0; if (res.access & NFS4_ACCESS_READ) @@ -1957,13 +2368,14 @@ static int _nfs4_proc_readlink(struct inode *inode, struct page *page, .pglen = pglen, .pages = &page, }; + struct nfs4_readlink_res res; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READLINK], .rpc_argp = &args, - .rpc_resp = NULL, + .rpc_resp = &res, }; - return rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + return nfs4_call_sync(NFS_SERVER(inode), &msg, &args, &res, 0); } static int nfs4_proc_readlink(struct inode *inode, struct page *page, @@ -2057,7 +2469,7 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name) int status; nfs_fattr_init(&res.dir_attr); - status = rpc_call_sync(server->client, &msg, 0); + status = nfs4_call_sync(server, &msg, &args, &res, 1); if (status == 0) { update_changeattr(dir, &res.cinfo); nfs_post_op_update_inode(dir, &res.dir_attr); @@ -2092,8 +2504,10 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) { struct nfs_removeres *res = task->tk_msg.rpc_resp; + nfs4_sequence_done(res->server, &res->seq_res, task->tk_status); if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN) return 0; + nfs4_sequence_free_slot(res->server->nfs_client, &res->seq_res); update_changeattr(dir, &res->cinfo); nfs_post_op_update_inode(dir, &res->dir_attr); return 1; @@ -2125,7 +2539,7 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, nfs_fattr_init(res.old_fattr); nfs_fattr_init(res.new_fattr); - status = rpc_call_sync(server->client, &msg, 0); + status = nfs4_call_sync(server, &msg, &arg, &res, 1); if (!status) { update_changeattr(old_dir, &res.old_cinfo); @@ -2174,7 +2588,7 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr * nfs_fattr_init(res.fattr); nfs_fattr_init(res.dir_attr); - status = rpc_call_sync(server->client, &msg, 0); + status = nfs4_call_sync(server, &msg, &arg, &res, 1); if (!status) { update_changeattr(dir, &res.cinfo); nfs_post_op_update_inode(dir, res.dir_attr); @@ -2235,7 +2649,8 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_createdata *data) { - int status = rpc_call_sync(NFS_CLIENT(dir), &data->msg, 0); + int status = nfs4_call_sync(NFS_SERVER(dir), &data->msg, + &data->arg, &data->res, 1); if (status == 0) { update_changeattr(dir, &data->res.dir_cinfo); nfs_post_op_update_inode(dir, data->res.dir_fattr); @@ -2344,7 +2759,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, (unsigned long long)cookie); nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); res.pgbase = args.pgbase; - status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + status = nfs4_call_sync(NFS_SERVER(dir), &msg, &args, &res, 0); if (status == 0) memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); @@ -2422,14 +2837,17 @@ static int _nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, .fh = fhandle, .bitmask = server->attr_bitmask, }; + struct nfs4_statfs_res res = { + .fsstat = fsstat, + }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_STATFS], .rpc_argp = &args, - .rpc_resp = fsstat, + .rpc_resp = &res, }; nfs_fattr_init(fsstat->fattr); - return rpc_call_sync(server->client, &msg, 0); + return nfs4_call_sync(server, &msg, &args, &res, 0); } static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat) @@ -2451,13 +2869,16 @@ static int _nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, .fh = fhandle, .bitmask = server->attr_bitmask, }; + struct nfs4_fsinfo_res res = { + .fsinfo = fsinfo, + }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FSINFO], .rpc_argp = &args, - .rpc_resp = fsinfo, + .rpc_resp = &res, }; - return rpc_call_sync(server->client, &msg, 0); + return nfs4_call_sync(server, &msg, &args, &res, 0); } static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo) @@ -2486,10 +2907,13 @@ static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle .fh = fhandle, .bitmask = server->attr_bitmask, }; + struct nfs4_pathconf_res res = { + .pathconf = pathconf, + }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PATHCONF], .rpc_argp = &args, - .rpc_resp = pathconf, + .rpc_resp = &res, }; /* None of the pathconf attributes are mandatory to implement */ @@ -2499,7 +2923,7 @@ static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle } nfs_fattr_init(pathconf->fattr); - return rpc_call_sync(server->client, &msg, 0); + return nfs4_call_sync(server, &msg, &args, &res, 0); } static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, @@ -2520,8 +2944,13 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) { struct nfs_server *server = NFS_SERVER(data->inode); + dprintk("--> %s\n", __func__); + + /* nfs4_sequence_free_slot called in the read rpc_call_done */ + nfs4_sequence_done(server, &data->res.seq_res, task->tk_status); + if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { - rpc_restart_call(task); + nfs4_restart_rpc(task, server->nfs_client); return -EAGAIN; } @@ -2541,8 +2970,12 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) { struct inode *inode = data->inode; + /* slot is freed in nfs_writeback_done */ + nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res, + task->tk_status); + if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { - rpc_restart_call(task); + nfs4_restart_rpc(task, NFS_SERVER(inode)->nfs_client); return -EAGAIN; } if (task->tk_status >= 0) { @@ -2567,10 +3000,14 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) { struct inode *inode = data->inode; + nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res, + task->tk_status); if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) { - rpc_restart_call(task); + nfs4_restart_rpc(task, NFS_SERVER(inode)->nfs_client); return -EAGAIN; } + nfs4_sequence_free_slot(NFS_SERVER(inode)->nfs_client, + &data->res.seq_res); nfs_refresh_inode(inode, data->res.fattr); return 0; } @@ -2603,6 +3040,9 @@ static void nfs4_renew_done(struct rpc_task *task, void *data) if (time_before(clp->cl_last_renewal,timestamp)) clp->cl_last_renewal = timestamp; spin_unlock(&clp->cl_lock); + dprintk("%s calling put_rpccred on rpc_cred %p\n", __func__, + task->tk_msg.rpc_cred); + put_rpccred(task->tk_msg.rpc_cred); } static const struct rpc_call_ops nfs4_renew_ops = { @@ -2742,12 +3182,14 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu .acl_pages = pages, .acl_len = buflen, }; - size_t resp_len = buflen; + struct nfs_getaclres res = { + .acl_len = buflen, + }; void *resp_buf; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL], .rpc_argp = &args, - .rpc_resp = &resp_len, + .rpc_resp = &res, }; struct page *localpage = NULL; int ret; @@ -2761,26 +3203,26 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu return -ENOMEM; args.acl_pages[0] = localpage; args.acl_pgbase = 0; - resp_len = args.acl_len = PAGE_SIZE; + args.acl_len = PAGE_SIZE; } else { resp_buf = buf; buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase); } - ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + ret = nfs4_call_sync(NFS_SERVER(inode), &msg, &args, &res, 0); if (ret) goto out_free; - if (resp_len > args.acl_len) - nfs4_write_cached_acl(inode, NULL, resp_len); + if (res.acl_len > args.acl_len) + nfs4_write_cached_acl(inode, NULL, res.acl_len); else - nfs4_write_cached_acl(inode, resp_buf, resp_len); + nfs4_write_cached_acl(inode, resp_buf, res.acl_len); if (buf) { ret = -ERANGE; - if (resp_len > buflen) + if (res.acl_len > buflen) goto out_free; if (localpage) - memcpy(buf, resp_buf, resp_len); + memcpy(buf, resp_buf, res.acl_len); } - ret = resp_len; + ret = res.acl_len; out_free: if (localpage) __free_page(localpage); @@ -2810,8 +3252,6 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) ret = nfs_revalidate_inode(server, inode); if (ret < 0) return ret; - if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL) - nfs_zap_acl_cache(inode); ret = nfs4_read_cached_acl(inode, buf, buflen); if (ret != -ENOENT) return ret; @@ -2827,10 +3267,11 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl .acl_pages = pages, .acl_len = buflen, }; + struct nfs_setaclres res; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETACL], .rpc_argp = &arg, - .rpc_resp = NULL, + .rpc_resp = &res, }; int ret; @@ -2838,7 +3279,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl return -EOPNOTSUPP; nfs_inode_return_delegation(inode); buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); - ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + ret = nfs4_call_sync(server, &msg, &arg, &res, 1); nfs_access_zap_cache(inode); nfs_zap_acl_cache(inode); return ret; @@ -2857,10 +3298,8 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen } static int -nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state) +_nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs_client *clp, struct nfs4_state *state) { - struct nfs_client *clp = server->nfs_client; - if (!clp || task->tk_status >= 0) return 0; switch(task->tk_status) { @@ -2879,8 +3318,23 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); task->tk_status = 0; return -EAGAIN; +#if defined(CONFIG_NFS_V4_1) + case -NFS4ERR_BADSESSION: + case -NFS4ERR_BADSLOT: + case -NFS4ERR_BAD_HIGH_SLOT: + case -NFS4ERR_DEADSESSION: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + case -NFS4ERR_SEQ_FALSE_RETRY: + case -NFS4ERR_SEQ_MISORDERED: + dprintk("%s ERROR %d, Reset session\n", __func__, + task->tk_status); + set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state); + task->tk_status = 0; + return -EAGAIN; +#endif /* CONFIG_NFS_V4_1 */ case -NFS4ERR_DELAY: - nfs_inc_server_stats(server, NFSIOS_DELAY); + if (server) + nfs_inc_server_stats(server, NFSIOS_DELAY); case -NFS4ERR_GRACE: rpc_delay(task, NFS4_POLL_RETRY_MAX); task->tk_status = 0; @@ -2893,6 +3347,12 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, return 0; } +static int +nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state) +{ + return _nfs4_async_handle_error(task, server, server->nfs_client, state); +} + int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred) { nfs4_verifier sc_verifier; @@ -3000,6 +3460,10 @@ struct nfs4_delegreturndata { static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) { struct nfs4_delegreturndata *data = calldata; + + nfs4_sequence_done_free_slot(data->res.server, &data->res.seq_res, + task->tk_status); + data->rpc_status = task->tk_status; if (data->rpc_status == 0) renew_lease(data->res.server, data->timestamp); @@ -3010,7 +3474,25 @@ static void nfs4_delegreturn_release(void *calldata) kfree(calldata); } +#if defined(CONFIG_NFS_V4_1) +static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data) +{ + struct nfs4_delegreturndata *d_data; + + d_data = (struct nfs4_delegreturndata *)data; + + if (nfs4_setup_sequence(d_data->res.server->nfs_client, + &d_data->args.seq_args, + &d_data->res.seq_res, 1, task)) + return; + rpc_call_start(task); +} +#endif /* CONFIG_NFS_V4_1 */ + static const struct rpc_call_ops nfs4_delegreturn_ops = { +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs4_delegreturn_prepare, +#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs4_delegreturn_done, .rpc_release = nfs4_delegreturn_release, }; @@ -3032,7 +3514,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co }; int status = 0; - data = kmalloc(sizeof(*data), GFP_KERNEL); + data = kzalloc(sizeof(*data), GFP_KERNEL); if (data == NULL) return -ENOMEM; data->args.fhandle = &data->fh; @@ -3042,6 +3524,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co memcpy(&data->stateid, stateid, sizeof(data->stateid)); data->res.fattr = &data->fattr; data->res.server = server; + data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; nfs_fattr_init(data->res.fattr); data->timestamp = jiffies; data->rpc_status = 0; @@ -3127,7 +3610,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock goto out; lsp = request->fl_u.nfs4_fl.owner; arg.lock_owner.id = lsp->ls_id.id; - status = rpc_call_sync(server->client, &msg, 0); + status = nfs4_call_sync(server, &msg, &arg, &res, 1); switch (status) { case 0: request->fl_type = F_UNLCK; @@ -3187,13 +3670,14 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, struct nfs4_unlockdata *p; struct inode *inode = lsp->ls_state->inode; - p = kmalloc(sizeof(*p), GFP_KERNEL); + p = kzalloc(sizeof(*p), GFP_KERNEL); if (p == NULL) return NULL; p->arg.fh = NFS_FH(inode); p->arg.fl = &p->fl; p->arg.seqid = seqid; p->res.seqid = seqid; + p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; p->arg.stateid = &lsp->ls_stateid; p->lsp = lsp; atomic_inc(&lsp->ls_count); @@ -3217,6 +3701,8 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) { struct nfs4_unlockdata *calldata = data; + nfs4_sequence_done(calldata->server, &calldata->res.seq_res, + task->tk_status); if (RPC_ASSASSINATED(task)) return; switch (task->tk_status) { @@ -3233,8 +3719,11 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) break; default: if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN) - rpc_restart_call(task); + nfs4_restart_rpc(task, + calldata->server->nfs_client); } + nfs4_sequence_free_slot(calldata->server->nfs_client, + &calldata->res.seq_res); } static void nfs4_locku_prepare(struct rpc_task *task, void *data) @@ -3249,6 +3738,10 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data) return; } calldata->timestamp = jiffies; + if (nfs4_setup_sequence(calldata->server->nfs_client, + &calldata->arg.seq_args, + &calldata->res.seq_res, 1, task)) + return; rpc_call_start(task); } @@ -3341,6 +3834,7 @@ struct nfs4_lockdata { unsigned long timestamp; int rpc_status; int cancelled; + struct nfs_server *server; }; static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, @@ -3366,7 +3860,9 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; p->arg.lock_owner.id = lsp->ls_id.id; p->res.lock_seqid = p->arg.lock_seqid; + p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; p->lsp = lsp; + p->server = server; atomic_inc(&lsp->ls_count); p->ctx = get_nfs_open_context(ctx); memcpy(&p->fl, fl, sizeof(p->fl)); @@ -3396,6 +3892,9 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) } else data->arg.new_lock_owner = 0; data->timestamp = jiffies; + if (nfs4_setup_sequence(data->server->nfs_client, &data->arg.seq_args, + &data->res.seq_res, 1, task)) + return; rpc_call_start(task); dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); } @@ -3406,6 +3905,9 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) dprintk("%s: begin!\n", __func__); + nfs4_sequence_done_free_slot(data->server, &data->res.seq_res, + task->tk_status); + data->rpc_status = task->tk_status; if (RPC_ASSASSINATED(task)) goto out; @@ -3487,8 +3989,6 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f ret = nfs4_wait_for_completion_rpc_task(task); if (ret == 0) { ret = data->rpc_status; - if (ret == -NFS4ERR_DENIED) - ret = -EAGAIN; } else data->cancelled = 1; rpc_put_task(task); @@ -3576,9 +4076,11 @@ static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock * int err; do { + err = _nfs4_proc_setlk(state, cmd, request); + if (err == -NFS4ERR_DENIED) + err = -EAGAIN; err = nfs4_handle_exception(NFS_SERVER(state->inode), - _nfs4_proc_setlk(state, cmd, request), - &exception); + err, &exception); } while (exception.retry); return err; } @@ -3630,8 +4132,37 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) goto out; do { err = _nfs4_do_setlk(state, F_SETLK, fl, 0); - if (err != -NFS4ERR_DELAY) - break; + switch (err) { + default: + printk(KERN_ERR "%s: unhandled error %d.\n", + __func__, err); + case 0: + case -ESTALE: + goto out; + case -NFS4ERR_EXPIRED: + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_STALE_STATEID: + nfs4_schedule_state_recovery(server->nfs_client); + goto out; + case -ERESTARTSYS: + /* + * The show must go on: exit, but mark the + * stateid as needing recovery. + */ + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_OPENMODE: + nfs4_state_mark_reclaim_nograce(server->nfs_client, state); + err = 0; + goto out; + case -ENOMEM: + case -NFS4ERR_DENIED: + /* kill_proc(fl->fl_pid, SIGLOST, 1); */ + err = 0; + goto out; + case -NFS4ERR_DELAY: + break; + } err = nfs4_handle_exception(server, err, &exception); } while (exception.retry); out: @@ -3706,10 +4237,13 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, .page = page, .bitmask = bitmask, }; + struct nfs4_fs_locations_res res = { + .fs_locations = fs_locations, + }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FS_LOCATIONS], .rpc_argp = &args, - .rpc_resp = fs_locations, + .rpc_resp = &res, }; int status; @@ -3717,24 +4251,720 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, nfs_fattr_init(&fs_locations->fattr); fs_locations->server = server; fs_locations->nlocations = 0; - status = rpc_call_sync(server->client, &msg, 0); + status = nfs4_call_sync(server, &msg, &args, &res, 0); nfs_fixup_referral_attributes(&fs_locations->fattr); dprintk("%s: returned status = %d\n", __func__, status); return status; } -struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops = { +#ifdef CONFIG_NFS_V4_1 +/* + * nfs4_proc_exchange_id() + * + * Since the clientid has expired, all compounds using sessions + * associated with the stale clientid will be returning + * NFS4ERR_BADSESSION in the sequence operation, and will therefore + * be in some phase of session reset. + */ +static int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) +{ + nfs4_verifier verifier; + struct nfs41_exchange_id_args args = { + .client = clp, + .flags = clp->cl_exchange_flags, + }; + struct nfs41_exchange_id_res res = { + .client = clp, + }; + int status; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_EXCHANGE_ID], + .rpc_argp = &args, + .rpc_resp = &res, + .rpc_cred = cred, + }; + __be32 *p; + + dprintk("--> %s\n", __func__); + BUG_ON(clp == NULL); + + p = (u32 *)verifier.data; + *p++ = htonl((u32)clp->cl_boot_time.tv_sec); + *p = htonl((u32)clp->cl_boot_time.tv_nsec); + args.verifier = &verifier; + + while (1) { + args.id_len = scnprintf(args.id, sizeof(args.id), + "%s/%s %u", + clp->cl_ipaddr, + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_ADDR), + clp->cl_id_uniquifier); + + status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); + + if (status != NFS4ERR_CLID_INUSE) + break; + + if (signalled()) + break; + + if (++clp->cl_id_uniquifier == 0) + break; + } + + dprintk("<-- %s status= %d\n", __func__, status); + return status; +} + +struct nfs4_get_lease_time_data { + struct nfs4_get_lease_time_args *args; + struct nfs4_get_lease_time_res *res; + struct nfs_client *clp; +}; + +static void nfs4_get_lease_time_prepare(struct rpc_task *task, + void *calldata) +{ + int ret; + struct nfs4_get_lease_time_data *data = + (struct nfs4_get_lease_time_data *)calldata; + + dprintk("--> %s\n", __func__); + /* just setup sequence, do not trigger session recovery + since we're invoked within one */ + ret = nfs41_setup_sequence(data->clp->cl_session, + &data->args->la_seq_args, + &data->res->lr_seq_res, 0, task); + + BUG_ON(ret == -EAGAIN); + rpc_call_start(task); + dprintk("<-- %s\n", __func__); +} + +/* + * Called from nfs4_state_manager thread for session setup, so don't recover + * from sequence operation or clientid errors. + */ +static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata) +{ + struct nfs4_get_lease_time_data *data = + (struct nfs4_get_lease_time_data *)calldata; + + dprintk("--> %s\n", __func__); + nfs41_sequence_done(data->clp, &data->res->lr_seq_res, task->tk_status); + switch (task->tk_status) { + case -NFS4ERR_DELAY: + case -NFS4ERR_GRACE: + dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status); + rpc_delay(task, NFS4_POLL_RETRY_MIN); + task->tk_status = 0; + nfs4_restart_rpc(task, data->clp); + return; + } + nfs41_sequence_free_slot(data->clp, &data->res->lr_seq_res); + dprintk("<-- %s\n", __func__); +} + +struct rpc_call_ops nfs4_get_lease_time_ops = { + .rpc_call_prepare = nfs4_get_lease_time_prepare, + .rpc_call_done = nfs4_get_lease_time_done, +}; + +int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) +{ + struct rpc_task *task; + struct nfs4_get_lease_time_args args; + struct nfs4_get_lease_time_res res = { + .lr_fsinfo = fsinfo, + }; + struct nfs4_get_lease_time_data data = { + .args = &args, + .res = &res, + .clp = clp, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GET_LEASE_TIME], + .rpc_argp = &args, + .rpc_resp = &res, + }; + struct rpc_task_setup task_setup = { + .rpc_client = clp->cl_rpcclient, + .rpc_message = &msg, + .callback_ops = &nfs4_get_lease_time_ops, + .callback_data = &data + }; + int status; + + res.lr_seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; + dprintk("--> %s\n", __func__); + task = rpc_run_task(&task_setup); + + if (IS_ERR(task)) + status = PTR_ERR(task); + else { + status = task->tk_status; + rpc_put_task(task); + } + dprintk("<-- %s return %d\n", __func__, status); + + return status; +} + +/* + * Reset a slot table + */ +static int nfs4_reset_slot_table(struct nfs4_slot_table *tbl, int max_slots, + int old_max_slots, int ivalue) +{ + int i; + int ret = 0; + + dprintk("--> %s: max_reqs=%u, tbl %p\n", __func__, max_slots, tbl); + + /* + * Until we have dynamic slot table adjustment, insist + * upon the same slot table size + */ + if (max_slots != old_max_slots) { + dprintk("%s reset slot table does't match old\n", + __func__); + ret = -EINVAL; /*XXX NFS4ERR_REQ_TOO_BIG ? */ + goto out; + } + spin_lock(&tbl->slot_tbl_lock); + for (i = 0; i < max_slots; ++i) + tbl->slots[i].seq_nr = ivalue; + tbl->highest_used_slotid = -1; + spin_unlock(&tbl->slot_tbl_lock); + dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__, + tbl, tbl->slots, tbl->max_slots); +out: + dprintk("<-- %s: return %d\n", __func__, ret); + return ret; +} + +/* + * Reset the forechannel and backchannel slot tables + */ +static int nfs4_reset_slot_tables(struct nfs4_session *session) +{ + int status; + + status = nfs4_reset_slot_table(&session->fc_slot_table, + session->fc_attrs.max_reqs, + session->fc_slot_table.max_slots, + 1); + if (status) + return status; + + status = nfs4_reset_slot_table(&session->bc_slot_table, + session->bc_attrs.max_reqs, + session->bc_slot_table.max_slots, + 0); + return status; +} + +/* Destroy the slot table */ +static void nfs4_destroy_slot_tables(struct nfs4_session *session) +{ + if (session->fc_slot_table.slots != NULL) { + kfree(session->fc_slot_table.slots); + session->fc_slot_table.slots = NULL; + } + if (session->bc_slot_table.slots != NULL) { + kfree(session->bc_slot_table.slots); + session->bc_slot_table.slots = NULL; + } + return; +} + +/* + * Initialize slot table + */ +static int nfs4_init_slot_table(struct nfs4_slot_table *tbl, + int max_slots, int ivalue) +{ + int i; + struct nfs4_slot *slot; + int ret = -ENOMEM; + + BUG_ON(max_slots > NFS4_MAX_SLOT_TABLE); + + dprintk("--> %s: max_reqs=%u\n", __func__, max_slots); + + slot = kcalloc(max_slots, sizeof(struct nfs4_slot), GFP_KERNEL); + if (!slot) + goto out; + for (i = 0; i < max_slots; ++i) + slot[i].seq_nr = ivalue; + ret = 0; + + spin_lock(&tbl->slot_tbl_lock); + if (tbl->slots != NULL) { + spin_unlock(&tbl->slot_tbl_lock); + dprintk("%s: slot table already initialized. tbl=%p slots=%p\n", + __func__, tbl, tbl->slots); + WARN_ON(1); + goto out_free; + } + tbl->max_slots = max_slots; + tbl->slots = slot; + tbl->highest_used_slotid = -1; /* no slot is currently used */ + spin_unlock(&tbl->slot_tbl_lock); + dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__, + tbl, tbl->slots, tbl->max_slots); +out: + dprintk("<-- %s: return %d\n", __func__, ret); + return ret; + +out_free: + kfree(slot); + goto out; +} + +/* + * Initialize the forechannel and backchannel tables + */ +static int nfs4_init_slot_tables(struct nfs4_session *session) +{ + int status; + + status = nfs4_init_slot_table(&session->fc_slot_table, + session->fc_attrs.max_reqs, 1); + if (status) + return status; + + status = nfs4_init_slot_table(&session->bc_slot_table, + session->bc_attrs.max_reqs, 0); + if (status) + nfs4_destroy_slot_tables(session); + + return status; +} + +struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp) +{ + struct nfs4_session *session; + struct nfs4_slot_table *tbl; + + session = kzalloc(sizeof(struct nfs4_session), GFP_KERNEL); + if (!session) + return NULL; + + set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state); + /* + * The create session reply races with the server back + * channel probe. Mark the client NFS_CS_SESSION_INITING + * so that the client back channel can find the + * nfs_client struct + */ + clp->cl_cons_state = NFS_CS_SESSION_INITING; + + tbl = &session->fc_slot_table; + spin_lock_init(&tbl->slot_tbl_lock); + rpc_init_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table"); + + tbl = &session->bc_slot_table; + spin_lock_init(&tbl->slot_tbl_lock); + rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table"); + + session->clp = clp; + return session; +} + +void nfs4_destroy_session(struct nfs4_session *session) +{ + nfs4_proc_destroy_session(session); + dprintk("%s Destroy backchannel for xprt %p\n", + __func__, session->clp->cl_rpcclient->cl_xprt); + xprt_destroy_backchannel(session->clp->cl_rpcclient->cl_xprt, + NFS41_BC_MIN_CALLBACKS); + nfs4_destroy_slot_tables(session); + kfree(session); +} + +/* + * Initialize the values to be used by the client in CREATE_SESSION + * If nfs4_init_session set the fore channel request and response sizes, + * use them. + * + * Set the back channel max_resp_sz_cached to zero to force the client to + * always set csa_cachethis to FALSE because the current implementation + * of the back channel DRC only supports caching the CB_SEQUENCE operation. + */ +static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args) +{ + struct nfs4_session *session = args->client->cl_session; + unsigned int mxrqst_sz = session->fc_attrs.max_rqst_sz, + mxresp_sz = session->fc_attrs.max_resp_sz; + + if (mxrqst_sz == 0) + mxrqst_sz = NFS_MAX_FILE_IO_SIZE; + if (mxresp_sz == 0) + mxresp_sz = NFS_MAX_FILE_IO_SIZE; + /* Fore channel attributes */ + args->fc_attrs.headerpadsz = 0; + args->fc_attrs.max_rqst_sz = mxrqst_sz; + args->fc_attrs.max_resp_sz = mxresp_sz; + args->fc_attrs.max_resp_sz_cached = mxresp_sz; + args->fc_attrs.max_ops = NFS4_MAX_OPS; + args->fc_attrs.max_reqs = session->clp->cl_rpcclient->cl_xprt->max_reqs; + + dprintk("%s: Fore Channel : max_rqst_sz=%u max_resp_sz=%u " + "max_resp_sz_cached=%u max_ops=%u max_reqs=%u\n", + __func__, + args->fc_attrs.max_rqst_sz, args->fc_attrs.max_resp_sz, + args->fc_attrs.max_resp_sz_cached, args->fc_attrs.max_ops, + args->fc_attrs.max_reqs); + + /* Back channel attributes */ + args->bc_attrs.headerpadsz = 0; + args->bc_attrs.max_rqst_sz = PAGE_SIZE; + args->bc_attrs.max_resp_sz = PAGE_SIZE; + args->bc_attrs.max_resp_sz_cached = 0; + args->bc_attrs.max_ops = NFS4_MAX_BACK_CHANNEL_OPS; + args->bc_attrs.max_reqs = 1; + + dprintk("%s: Back Channel : max_rqst_sz=%u max_resp_sz=%u " + "max_resp_sz_cached=%u max_ops=%u max_reqs=%u\n", + __func__, + args->bc_attrs.max_rqst_sz, args->bc_attrs.max_resp_sz, + args->bc_attrs.max_resp_sz_cached, args->bc_attrs.max_ops, + args->bc_attrs.max_reqs); +} + +static int _verify_channel_attr(char *chan, char *attr_name, u32 sent, u32 rcvd) +{ + if (rcvd <= sent) + return 0; + printk(KERN_WARNING "%s: Session INVALID: %s channel %s increased. " + "sent=%u rcvd=%u\n", __func__, chan, attr_name, sent, rcvd); + return -EINVAL; +} + +#define _verify_fore_channel_attr(_name_) \ + _verify_channel_attr("fore", #_name_, \ + args->fc_attrs._name_, \ + session->fc_attrs._name_) + +#define _verify_back_channel_attr(_name_) \ + _verify_channel_attr("back", #_name_, \ + args->bc_attrs._name_, \ + session->bc_attrs._name_) + +/* + * The server is not allowed to increase the fore channel header pad size, + * maximum response size, or maximum number of operations. + * + * The back channel attributes are only negotiatied down: We send what the + * (back channel) server insists upon. + */ +static int nfs4_verify_channel_attrs(struct nfs41_create_session_args *args, + struct nfs4_session *session) +{ + int ret = 0; + + ret |= _verify_fore_channel_attr(headerpadsz); + ret |= _verify_fore_channel_attr(max_resp_sz); + ret |= _verify_fore_channel_attr(max_ops); + + ret |= _verify_back_channel_attr(headerpadsz); + ret |= _verify_back_channel_attr(max_rqst_sz); + ret |= _verify_back_channel_attr(max_resp_sz); + ret |= _verify_back_channel_attr(max_resp_sz_cached); + ret |= _verify_back_channel_attr(max_ops); + ret |= _verify_back_channel_attr(max_reqs); + + return ret; +} + +static int _nfs4_proc_create_session(struct nfs_client *clp) +{ + struct nfs4_session *session = clp->cl_session; + struct nfs41_create_session_args args = { + .client = clp, + .cb_program = NFS4_CALLBACK, + }; + struct nfs41_create_session_res res = { + .client = clp, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE_SESSION], + .rpc_argp = &args, + .rpc_resp = &res, + }; + int status; + + nfs4_init_channel_attrs(&args); + args.flags = (SESSION4_PERSIST | SESSION4_BACK_CHAN); + + status = rpc_call_sync(session->clp->cl_rpcclient, &msg, 0); + + if (!status) + /* Verify the session's negotiated channel_attrs values */ + status = nfs4_verify_channel_attrs(&args, session); + if (!status) { + /* Increment the clientid slot sequence id */ + clp->cl_seqid++; + } + + return status; +} + +/* + * Issues a CREATE_SESSION operation to the server. + * It is the responsibility of the caller to verify the session is + * expired before calling this routine. + */ +int nfs4_proc_create_session(struct nfs_client *clp, int reset) +{ + int status; + unsigned *ptr; + struct nfs_fsinfo fsinfo; + struct nfs4_session *session = clp->cl_session; + + dprintk("--> %s clp=%p session=%p\n", __func__, clp, session); + + status = _nfs4_proc_create_session(clp); + if (status) + goto out; + + /* Init or reset the fore channel */ + if (reset) + status = nfs4_reset_slot_tables(session); + else + status = nfs4_init_slot_tables(session); + dprintk("fore channel slot table initialization returned %d\n", status); + if (status) + goto out; + + ptr = (unsigned *)&session->sess_id.data[0]; + dprintk("%s client>seqid %d sessionid %u:%u:%u:%u\n", __func__, + clp->cl_seqid, ptr[0], ptr[1], ptr[2], ptr[3]); + + if (reset) + /* Lease time is aleady set */ + goto out; + + /* Get the lease time */ + status = nfs4_proc_get_lease_time(clp, &fsinfo); + if (status == 0) { + /* Update lease time and schedule renewal */ + spin_lock(&clp->cl_lock); + clp->cl_lease_time = fsinfo.lease_time * HZ; + clp->cl_last_renewal = jiffies; + clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + spin_unlock(&clp->cl_lock); + + nfs4_schedule_state_renewal(clp); + } +out: + dprintk("<-- %s\n", __func__); + return status; +} + +/* + * Issue the over-the-wire RPC DESTROY_SESSION. + * The caller must serialize access to this routine. + */ +int nfs4_proc_destroy_session(struct nfs4_session *session) +{ + int status = 0; + struct rpc_message msg; + + dprintk("--> nfs4_proc_destroy_session\n"); + + /* session is still being setup */ + if (session->clp->cl_cons_state != NFS_CS_READY) + return status; + + msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DESTROY_SESSION]; + msg.rpc_argp = session; + msg.rpc_resp = NULL; + msg.rpc_cred = NULL; + status = rpc_call_sync(session->clp->cl_rpcclient, &msg, 0); + + if (status) + printk(KERN_WARNING + "Got error %d from the server on DESTROY_SESSION. " + "Session has been destroyed regardless...\n", status); + + dprintk("<-- nfs4_proc_destroy_session\n"); + return status; +} + +/* + * Renew the cl_session lease. + */ +static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred) +{ + struct nfs4_sequence_args args; + struct nfs4_sequence_res res; + + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE], + .rpc_argp = &args, + .rpc_resp = &res, + .rpc_cred = cred, + }; + + args.sa_cache_this = 0; + + return nfs4_call_sync_sequence(clp, clp->cl_rpcclient, &msg, &args, + &res, 0); +} + +void nfs41_sequence_call_done(struct rpc_task *task, void *data) +{ + struct nfs_client *clp = (struct nfs_client *)data; + + nfs41_sequence_done(clp, task->tk_msg.rpc_resp, task->tk_status); + + if (task->tk_status < 0) { + dprintk("%s ERROR %d\n", __func__, task->tk_status); + + if (_nfs4_async_handle_error(task, NULL, clp, NULL) + == -EAGAIN) { + nfs4_restart_rpc(task, clp); + return; + } + } + nfs41_sequence_free_slot(clp, task->tk_msg.rpc_resp); + dprintk("%s rpc_cred %p\n", __func__, task->tk_msg.rpc_cred); + + put_rpccred(task->tk_msg.rpc_cred); + kfree(task->tk_msg.rpc_argp); + kfree(task->tk_msg.rpc_resp); + + dprintk("<-- %s\n", __func__); +} + +static void nfs41_sequence_prepare(struct rpc_task *task, void *data) +{ + struct nfs_client *clp; + struct nfs4_sequence_args *args; + struct nfs4_sequence_res *res; + + clp = (struct nfs_client *)data; + args = task->tk_msg.rpc_argp; + res = task->tk_msg.rpc_resp; + + if (nfs4_setup_sequence(clp, args, res, 0, task)) + return; + rpc_call_start(task); +} + +static const struct rpc_call_ops nfs41_sequence_ops = { + .rpc_call_done = nfs41_sequence_call_done, + .rpc_call_prepare = nfs41_sequence_prepare, +}; + +static int nfs41_proc_async_sequence(struct nfs_client *clp, + struct rpc_cred *cred) +{ + struct nfs4_sequence_args *args; + struct nfs4_sequence_res *res; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEQUENCE], + .rpc_cred = cred, + }; + + args = kzalloc(sizeof(*args), GFP_KERNEL); + if (!args) + return -ENOMEM; + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (!res) { + kfree(args); + return -ENOMEM; + } + res->sr_slotid = NFS4_MAX_SLOT_TABLE; + msg.rpc_argp = args; + msg.rpc_resp = res; + + return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT, + &nfs41_sequence_ops, (void *)clp); +} + +#endif /* CONFIG_NFS_V4_1 */ + +struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { .owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT, .state_flag_bit = NFS_STATE_RECLAIM_REBOOT, .recover_open = nfs4_open_reclaim, .recover_lock = nfs4_lock_reclaim, + .establish_clid = nfs4_init_clientid, + .get_clid_cred = nfs4_get_setclientid_cred, +}; + +#if defined(CONFIG_NFS_V4_1) +struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { + .owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT, + .state_flag_bit = NFS_STATE_RECLAIM_REBOOT, + .recover_open = nfs4_open_reclaim, + .recover_lock = nfs4_lock_reclaim, + .establish_clid = nfs4_proc_exchange_id, + .get_clid_cred = nfs4_get_exchange_id_cred, +}; +#endif /* CONFIG_NFS_V4_1 */ + +struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { + .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, + .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, + .recover_open = nfs4_open_expired, + .recover_lock = nfs4_lock_expired, + .establish_clid = nfs4_init_clientid, + .get_clid_cred = nfs4_get_setclientid_cred, }; -struct nfs4_state_recovery_ops nfs4_nograce_recovery_ops = { +#if defined(CONFIG_NFS_V4_1) +struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = { .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, .recover_open = nfs4_open_expired, .recover_lock = nfs4_lock_expired, + .establish_clid = nfs4_proc_exchange_id, + .get_clid_cred = nfs4_get_exchange_id_cred, +}; +#endif /* CONFIG_NFS_V4_1 */ + +struct nfs4_state_maintenance_ops nfs40_state_renewal_ops = { + .sched_state_renewal = nfs4_proc_async_renew, + .get_state_renewal_cred_locked = nfs4_get_renew_cred_locked, + .renew_lease = nfs4_proc_renew, +}; + +#if defined(CONFIG_NFS_V4_1) +struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = { + .sched_state_renewal = nfs41_proc_async_sequence, + .get_state_renewal_cred_locked = nfs4_get_machine_cred_locked, + .renew_lease = nfs4_proc_sequence, +}; +#endif + +/* + * Per minor version reboot and network partition recovery ops + */ + +struct nfs4_state_recovery_ops *nfs4_reboot_recovery_ops[] = { + &nfs40_reboot_recovery_ops, +#if defined(CONFIG_NFS_V4_1) + &nfs41_reboot_recovery_ops, +#endif +}; + +struct nfs4_state_recovery_ops *nfs4_nograce_recovery_ops[] = { + &nfs40_nograce_recovery_ops, +#if defined(CONFIG_NFS_V4_1) + &nfs41_nograce_recovery_ops, +#endif +}; + +struct nfs4_state_maintenance_ops *nfs4_state_renewal_ops[] = { + &nfs40_state_renewal_ops, +#if defined(CONFIG_NFS_V4_1) + &nfs41_state_renewal_ops, +#endif }; static const struct inode_operations nfs4_file_inode_operations = { diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index f524e93..e27c6ce 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -59,12 +59,14 @@ void nfs4_renew_state(struct work_struct *work) { + struct nfs4_state_maintenance_ops *ops; struct nfs_client *clp = container_of(work, struct nfs_client, cl_renewd.work); struct rpc_cred *cred; long lease, timeout; unsigned long last, now; + ops = nfs4_state_renewal_ops[clp->cl_minorversion]; dprintk("%s: start\n", __func__); /* Are there any active superblocks? */ if (list_empty(&clp->cl_superblocks)) @@ -76,7 +78,7 @@ nfs4_renew_state(struct work_struct *work) timeout = (2 * lease) / 3 + (long)last - (long)now; /* Are we close to a lease timeout? */ if (time_after(now, last + lease/3)) { - cred = nfs4_get_renew_cred_locked(clp); + cred = ops->get_state_renewal_cred_locked(clp); spin_unlock(&clp->cl_lock); if (cred == NULL) { if (list_empty(&clp->cl_delegations)) { @@ -86,7 +88,7 @@ nfs4_renew_state(struct work_struct *work) nfs_expire_all_delegations(clp); } else { /* Queue an asynchronous RENEW. */ - nfs4_proc_async_renew(clp, cred); + ops->sched_state_renewal(clp, cred); put_rpccred(cred); } timeout = (2 * lease) / 3; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 0298e90..b73c5a7 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -60,7 +60,7 @@ const nfs4_stateid zero_stateid; static LIST_HEAD(nfs4_clientid_list); -static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred) +int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) { unsigned short port; int status; @@ -77,7 +77,7 @@ static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred) return status; } -static struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp) +struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp) { struct rpc_cred *cred = NULL; @@ -114,17 +114,21 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp) return cred; } -static struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp) +#if defined(CONFIG_NFS_V4_1) + +struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp) { struct rpc_cred *cred; spin_lock(&clp->cl_lock); - cred = nfs4_get_renew_cred_locked(clp); + cred = nfs4_get_machine_cred_locked(clp); spin_unlock(&clp->cl_lock); return cred; } -static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp) +#endif /* CONFIG_NFS_V4_1 */ + +struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp) { struct nfs4_state_owner *sp; struct rb_node *pos; @@ -738,12 +742,14 @@ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid) void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid) { - if (status == -NFS4ERR_BAD_SEQID) { - struct nfs4_state_owner *sp = container_of(seqid->sequence, - struct nfs4_state_owner, so_seqid); + struct nfs4_state_owner *sp = container_of(seqid->sequence, + struct nfs4_state_owner, so_seqid); + struct nfs_server *server = sp->so_server; + + if (status == -NFS4ERR_BAD_SEQID) nfs4_drop_state_owner(sp); - } - nfs_increment_seqid(status, seqid); + if (!nfs4_has_session(server->nfs_client)) + nfs_increment_seqid(status, seqid); } /* @@ -847,32 +853,45 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ struct file_lock *fl; int status = 0; + if (inode->i_flock == NULL) + return 0; + + /* Guard against delegation returns and new lock/unlock calls */ down_write(&nfsi->rwsem); + /* Protect inode->i_flock using the BKL */ + lock_kernel(); for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) continue; if (nfs_file_open_context(fl->fl_file)->state != state) continue; + unlock_kernel(); status = ops->recover_lock(state, fl); - if (status >= 0) - continue; switch (status) { + case 0: + break; + case -ESTALE: + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_EXPIRED: + case -NFS4ERR_NO_GRACE: + case -NFS4ERR_STALE_CLIENTID: + goto out; default: printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", __func__, status); - case -NFS4ERR_EXPIRED: - case -NFS4ERR_NO_GRACE: + case -ENOMEM: + case -NFS4ERR_DENIED: case -NFS4ERR_RECLAIM_BAD: case -NFS4ERR_RECLAIM_CONFLICT: /* kill_proc(fl->fl_pid, SIGLOST, 1); */ - break; - case -NFS4ERR_STALE_CLIENTID: - goto out_err; + status = 0; } + lock_kernel(); } - up_write(&nfsi->rwsem); - return 0; -out_err: + unlock_kernel(); +out: up_write(&nfsi->rwsem); return status; } @@ -918,6 +937,7 @@ restart: printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", __func__, status); case -ENOENT: + case -ENOMEM: case -ESTALE: /* * Open state on this file cannot be recovered @@ -928,6 +948,9 @@ restart: /* Mark the file as being 'closed' */ state->state = 0; break; + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_BAD_STATEID: case -NFS4ERR_RECLAIM_BAD: case -NFS4ERR_RECLAIM_CONFLICT: nfs4_state_mark_reclaim_nograce(sp->so_client, state); @@ -1042,6 +1065,14 @@ static void nfs4_recovery_handle_error(struct nfs_client *clp, int error) case -NFS4ERR_EXPIRED: set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); nfs4_state_start_reclaim_nograce(clp); + case -NFS4ERR_BADSESSION: + case -NFS4ERR_BADSLOT: + case -NFS4ERR_BAD_HIGH_SLOT: + case -NFS4ERR_DEADSESSION: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + case -NFS4ERR_SEQ_FALSE_RETRY: + case -NFS4ERR_SEQ_MISORDERED: + set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state); } } @@ -1075,18 +1106,22 @@ restart: static int nfs4_check_lease(struct nfs_client *clp) { struct rpc_cred *cred; + struct nfs4_state_maintenance_ops *ops = + nfs4_state_renewal_ops[clp->cl_minorversion]; int status = -NFS4ERR_EXPIRED; /* Is the client already known to have an expired lease? */ if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) return 0; - cred = nfs4_get_renew_cred(clp); + spin_lock(&clp->cl_lock); + cred = ops->get_state_renewal_cred_locked(clp); + spin_unlock(&clp->cl_lock); if (cred == NULL) { cred = nfs4_get_setclientid_cred(clp); if (cred == NULL) goto out; } - status = nfs4_proc_renew(clp, cred); + status = ops->renew_lease(clp, cred); put_rpccred(cred); out: nfs4_recovery_handle_error(clp, status); @@ -1096,21 +1131,98 @@ out: static int nfs4_reclaim_lease(struct nfs_client *clp) { struct rpc_cred *cred; + struct nfs4_state_recovery_ops *ops = + nfs4_reboot_recovery_ops[clp->cl_minorversion]; int status = -ENOENT; - cred = nfs4_get_setclientid_cred(clp); + cred = ops->get_clid_cred(clp); if (cred != NULL) { - status = nfs4_init_client(clp, cred); + status = ops->establish_clid(clp, cred); put_rpccred(cred); /* Handle case where the user hasn't set up machine creds */ if (status == -EACCES && cred == clp->cl_machine_cred) { nfs4_clear_machine_cred(clp); status = -EAGAIN; } + if (status == -NFS4ERR_MINOR_VERS_MISMATCH) + status = -EPROTONOSUPPORT; + } + return status; +} + +#ifdef CONFIG_NFS_V4_1 +static void nfs4_session_recovery_handle_error(struct nfs_client *clp, int err) +{ + switch (err) { + case -NFS4ERR_STALE_CLIENTID: + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state); + } +} + +static int nfs4_reset_session(struct nfs_client *clp) +{ + int status; + + status = nfs4_proc_destroy_session(clp->cl_session); + if (status && status != -NFS4ERR_BADSESSION && + status != -NFS4ERR_DEADSESSION) { + nfs4_session_recovery_handle_error(clp, status); + goto out; } + + memset(clp->cl_session->sess_id.data, 0, NFS4_MAX_SESSIONID_LEN); + status = nfs4_proc_create_session(clp, 1); + if (status) + nfs4_session_recovery_handle_error(clp, status); + /* fall through*/ +out: + /* Wake up the next rpc task even on error */ + rpc_wake_up_next(&clp->cl_session->fc_slot_table.slot_tbl_waitq); return status; } +static int nfs4_initialize_session(struct nfs_client *clp) +{ + int status; + + status = nfs4_proc_create_session(clp, 0); + if (!status) { + nfs_mark_client_ready(clp, NFS_CS_READY); + } else if (status == -NFS4ERR_STALE_CLIENTID) { + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state); + } else { + nfs_mark_client_ready(clp, status); + } + return status; +} +#else /* CONFIG_NFS_V4_1 */ +static int nfs4_reset_session(struct nfs_client *clp) { return 0; } +static int nfs4_initialize_session(struct nfs_client *clp) { return 0; } +#endif /* CONFIG_NFS_V4_1 */ + +/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors + * on EXCHANGE_ID for v4.1 + */ +static void nfs4_set_lease_expired(struct nfs_client *clp, int status) +{ + if (nfs4_has_session(clp)) { + switch (status) { + case -NFS4ERR_DELAY: + case -NFS4ERR_CLID_INUSE: + case -EAGAIN: + break; + + case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery + * in nfs4_exchange_id */ + default: + return; + } + } + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); +} + static void nfs4_state_manager(struct nfs_client *clp) { int status = 0; @@ -1121,9 +1233,12 @@ static void nfs4_state_manager(struct nfs_client *clp) /* We're going to have to re-establish a clientid */ status = nfs4_reclaim_lease(clp); if (status) { - set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + nfs4_set_lease_expired(clp, status); if (status == -EAGAIN) continue; + if (clp->cl_cons_state == + NFS_CS_SESSION_INITING) + nfs_mark_client_ready(clp, status); goto out_error; } clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); @@ -1134,25 +1249,44 @@ static void nfs4_state_manager(struct nfs_client *clp) if (status != 0) continue; } - + /* Initialize or reset the session */ + if (nfs4_has_session(clp) && + test_and_clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)) { + if (clp->cl_cons_state == NFS_CS_SESSION_INITING) + status = nfs4_initialize_session(clp); + else + status = nfs4_reset_session(clp); + if (status) { + if (status == -NFS4ERR_STALE_CLIENTID) + continue; + goto out_error; + } + } /* First recover reboot state... */ if (test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) { - status = nfs4_do_reclaim(clp, &nfs4_reboot_recovery_ops); + status = nfs4_do_reclaim(clp, + nfs4_reboot_recovery_ops[clp->cl_minorversion]); if (status == -NFS4ERR_STALE_CLIENTID) continue; + if (test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)) + continue; nfs4_state_end_reclaim_reboot(clp); continue; } /* Now recover expired state... */ if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) { - status = nfs4_do_reclaim(clp, &nfs4_nograce_recovery_ops); + status = nfs4_do_reclaim(clp, + nfs4_nograce_recovery_ops[clp->cl_minorversion]); if (status < 0) { set_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state); if (status == -NFS4ERR_STALE_CLIENTID) continue; if (status == -NFS4ERR_EXPIRED) continue; + if (test_bit(NFS4CLNT_SESSION_SETUP, + &clp->cl_state)) + continue; goto out_error; } else nfs4_state_end_reclaim_nograce(clp); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 1690f0e..617273e 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -192,12 +192,16 @@ static int nfs4_stat_to_errno(int); decode_verifier_maxsz) #define encode_remove_maxsz (op_encode_hdr_maxsz + \ nfs4_name_maxsz) +#define decode_remove_maxsz (op_decode_hdr_maxsz + \ + decode_change_info_maxsz) #define encode_rename_maxsz (op_encode_hdr_maxsz + \ 2 * nfs4_name_maxsz) -#define decode_rename_maxsz (op_decode_hdr_maxsz + 5 + 5) +#define decode_rename_maxsz (op_decode_hdr_maxsz + \ + decode_change_info_maxsz + \ + decode_change_info_maxsz) #define encode_link_maxsz (op_encode_hdr_maxsz + \ nfs4_name_maxsz) -#define decode_link_maxsz (op_decode_hdr_maxsz + 5) +#define decode_link_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz) #define encode_lock_maxsz (op_encode_hdr_maxsz + \ 7 + \ 1 + encode_stateid_maxsz + 8) @@ -240,43 +244,115 @@ static int nfs4_stat_to_errno(int); (encode_getattr_maxsz) #define decode_fs_locations_maxsz \ (0) + +#if defined(CONFIG_NFS_V4_1) +#define NFS4_MAX_MACHINE_NAME_LEN (64) + +#define encode_exchange_id_maxsz (op_encode_hdr_maxsz + \ + encode_verifier_maxsz + \ + 1 /* co_ownerid.len */ + \ + XDR_QUADLEN(NFS4_EXCHANGE_ID_LEN) + \ + 1 /* flags */ + \ + 1 /* spa_how */ + \ + 0 /* SP4_NONE (for now) */ + \ + 1 /* zero implemetation id array */) +#define decode_exchange_id_maxsz (op_decode_hdr_maxsz + \ + 2 /* eir_clientid */ + \ + 1 /* eir_sequenceid */ + \ + 1 /* eir_flags */ + \ + 1 /* spr_how */ + \ + 0 /* SP4_NONE (for now) */ + \ + 2 /* eir_server_owner.so_minor_id */ + \ + /* eir_server_owner.so_major_id<> */ \ + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 + \ + /* eir_server_scope<> */ \ + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 + \ + 1 /* eir_server_impl_id array length */ + \ + 0 /* ignored eir_server_impl_id contents */) +#define encode_channel_attrs_maxsz (6 + 1 /* ca_rdma_ird.len (0) */) +#define decode_channel_attrs_maxsz (6 + \ + 1 /* ca_rdma_ird.len */ + \ + 1 /* ca_rdma_ird */) +#define encode_create_session_maxsz (op_encode_hdr_maxsz + \ + 2 /* csa_clientid */ + \ + 1 /* csa_sequence */ + \ + 1 /* csa_flags */ + \ + encode_channel_attrs_maxsz + \ + encode_channel_attrs_maxsz + \ + 1 /* csa_cb_program */ + \ + 1 /* csa_sec_parms.len (1) */ + \ + 1 /* cb_secflavor (AUTH_SYS) */ + \ + 1 /* stamp */ + \ + 1 /* machinename.len */ + \ + XDR_QUADLEN(NFS4_MAX_MACHINE_NAME_LEN) + \ + 1 /* uid */ + \ + 1 /* gid */ + \ + 1 /* gids.len (0) */) +#define decode_create_session_maxsz (op_decode_hdr_maxsz + \ + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \ + 1 /* csr_sequence */ + \ + 1 /* csr_flags */ + \ + decode_channel_attrs_maxsz + \ + decode_channel_attrs_maxsz) +#define encode_destroy_session_maxsz (op_encode_hdr_maxsz + 4) +#define decode_destroy_session_maxsz (op_decode_hdr_maxsz) +#define encode_sequence_maxsz (op_encode_hdr_maxsz + \ + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 4) +#define decode_sequence_maxsz (op_decode_hdr_maxsz + \ + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) +#else /* CONFIG_NFS_V4_1 */ +#define encode_sequence_maxsz 0 +#define decode_sequence_maxsz 0 +#endif /* CONFIG_NFS_V4_1 */ + #define NFS4_enc_compound_sz (1024) /* XXX: large enough? */ #define NFS4_dec_compound_sz (1024) /* XXX: large enough? */ #define NFS4_enc_read_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_read_maxsz) #define NFS4_dec_read_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_read_maxsz) #define NFS4_enc_readlink_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_readlink_maxsz) #define NFS4_dec_readlink_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_readlink_maxsz) #define NFS4_enc_readdir_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_readdir_maxsz) #define NFS4_dec_readdir_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_readdir_maxsz) #define NFS4_enc_write_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_write_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_write_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_write_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_commit_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_commit_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_commit_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_commit_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_savefh_maxsz + \ encode_open_maxsz + \ @@ -285,6 +361,7 @@ static int nfs4_stat_to_errno(int); encode_restorefh_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_savefh_maxsz + \ decode_open_maxsz + \ @@ -301,43 +378,53 @@ static int nfs4_stat_to_errno(int); decode_putfh_maxsz + \ decode_open_confirm_maxsz) #define NFS4_enc_open_noattr_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_open_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_open_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_open_downgrade_sz \ (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_open_downgrade_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_open_downgrade_sz \ (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_open_downgrade_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_close_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_close_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_close_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_close_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_setattr_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_setattr_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_setattr_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_setattr_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_fsinfo_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_fsinfo_maxsz) #define NFS4_dec_fsinfo_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_fsinfo_maxsz) #define NFS4_enc_renew_sz (compound_encode_hdr_maxsz + \ @@ -359,64 +446,81 @@ static int nfs4_stat_to_errno(int); decode_putrootfh_maxsz + \ decode_fsinfo_maxsz) #define NFS4_enc_lock_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_lock_maxsz) #define NFS4_dec_lock_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_lock_maxsz) #define NFS4_enc_lockt_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_lockt_maxsz) #define NFS4_dec_lockt_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_lockt_maxsz) #define NFS4_enc_locku_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_locku_maxsz) #define NFS4_dec_locku_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_locku_maxsz) #define NFS4_enc_access_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_access_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_access_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_access_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_getattr_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_getattr_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_lookup_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_lookup_maxsz + \ encode_getattr_maxsz + \ encode_getfh_maxsz) #define NFS4_dec_lookup_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_lookup_maxsz + \ decode_getattr_maxsz + \ decode_getfh_maxsz) #define NFS4_enc_lookup_root_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putrootfh_maxsz + \ encode_getattr_maxsz + \ encode_getfh_maxsz) #define NFS4_dec_lookup_root_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putrootfh_maxsz + \ decode_getattr_maxsz + \ decode_getfh_maxsz) #define NFS4_enc_remove_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_remove_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_remove_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ - op_decode_hdr_maxsz + 5 + \ + decode_remove_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_rename_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_savefh_maxsz + \ encode_putfh_maxsz + \ @@ -425,6 +529,7 @@ static int nfs4_stat_to_errno(int); encode_restorefh_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_rename_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_savefh_maxsz + \ decode_putfh_maxsz + \ @@ -433,6 +538,7 @@ static int nfs4_stat_to_errno(int); decode_restorefh_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_link_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_savefh_maxsz + \ encode_putfh_maxsz + \ @@ -441,6 +547,7 @@ static int nfs4_stat_to_errno(int); encode_restorefh_maxsz + \ decode_getattr_maxsz) #define NFS4_dec_link_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_savefh_maxsz + \ decode_putfh_maxsz + \ @@ -449,16 +556,19 @@ static int nfs4_stat_to_errno(int); decode_restorefh_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_symlink_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_symlink_maxsz + \ encode_getattr_maxsz + \ encode_getfh_maxsz) #define NFS4_dec_symlink_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_symlink_maxsz + \ decode_getattr_maxsz + \ decode_getfh_maxsz) #define NFS4_enc_create_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_savefh_maxsz + \ encode_create_maxsz + \ @@ -467,6 +577,7 @@ static int nfs4_stat_to_errno(int); encode_restorefh_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_create_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_savefh_maxsz + \ decode_create_maxsz + \ @@ -475,52 +586,98 @@ static int nfs4_stat_to_errno(int); decode_restorefh_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_pathconf_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_pathconf_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_statfs_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_statfs_maxsz) #define NFS4_dec_statfs_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_statfs_maxsz) #define NFS4_enc_server_caps_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_server_caps_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_delegreturn_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_delegreturn_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_delegreturn_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_getacl_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_getacl_maxsz) #define NFS4_dec_getacl_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_getacl_maxsz) #define NFS4_enc_setacl_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_setacl_maxsz) #define NFS4_dec_setacl_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_setacl_maxsz) #define NFS4_enc_fs_locations_sz \ (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_lookup_maxsz + \ encode_fs_locations_maxsz) #define NFS4_dec_fs_locations_sz \ (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_lookup_maxsz + \ decode_fs_locations_maxsz) +#if defined(CONFIG_NFS_V4_1) +#define NFS4_enc_exchange_id_sz \ + (compound_encode_hdr_maxsz + \ + encode_exchange_id_maxsz) +#define NFS4_dec_exchange_id_sz \ + (compound_decode_hdr_maxsz + \ + decode_exchange_id_maxsz) +#define NFS4_enc_create_session_sz \ + (compound_encode_hdr_maxsz + \ + encode_create_session_maxsz) +#define NFS4_dec_create_session_sz \ + (compound_decode_hdr_maxsz + \ + decode_create_session_maxsz) +#define NFS4_enc_destroy_session_sz (compound_encode_hdr_maxsz + \ + encode_destroy_session_maxsz) +#define NFS4_dec_destroy_session_sz (compound_decode_hdr_maxsz + \ + decode_destroy_session_maxsz) +#define NFS4_enc_sequence_sz \ + (compound_decode_hdr_maxsz + \ + encode_sequence_maxsz) +#define NFS4_dec_sequence_sz \ + (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz) +#define NFS4_enc_get_lease_time_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putrootfh_maxsz + \ + encode_fsinfo_maxsz) +#define NFS4_dec_get_lease_time_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putrootfh_maxsz + \ + decode_fsinfo_maxsz) +#endif /* CONFIG_NFS_V4_1 */ static const umode_t nfs_type2fmt[] = { [NF4BAD] = 0, @@ -541,6 +698,8 @@ struct compound_hdr { __be32 * nops_p; uint32_t taglen; char * tag; + uint32_t replen; /* expected reply words */ + u32 minorversion; }; /* @@ -576,22 +735,31 @@ static void encode_string(struct xdr_stream *xdr, unsigned int len, const char * xdr_encode_opaque(p, str, len); } -static void encode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr) +static void encode_compound_hdr(struct xdr_stream *xdr, + struct rpc_rqst *req, + struct compound_hdr *hdr) { __be32 *p; + struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; + + /* initialize running count of expected bytes in reply. + * NOTE: the replied tag SHOULD be the same is the one sent, + * but this is not required as a MUST for the server to do so. */ + hdr->replen = RPC_REPHDRSIZE + auth->au_rslack + 3 + hdr->taglen; dprintk("encode_compound: tag=%.*s\n", (int)hdr->taglen, hdr->tag); BUG_ON(hdr->taglen > NFS4_MAXTAGLEN); RESERVE_SPACE(12+(XDR_QUADLEN(hdr->taglen)<<2)); WRITE32(hdr->taglen); WRITEMEM(hdr->tag, hdr->taglen); - WRITE32(NFS4_MINOR_VERSION); + WRITE32(hdr->minorversion); hdr->nops_p = p; WRITE32(hdr->nops); } static void encode_nops(struct compound_hdr *hdr) { + BUG_ON(hdr->nops > NFS4_MAX_OPS); *hdr->nops_p = htonl(hdr->nops); } @@ -736,6 +904,7 @@ static void encode_access(struct xdr_stream *xdr, u32 access, struct compound_hd WRITE32(OP_ACCESS); WRITE32(access); hdr->nops++; + hdr->replen += decode_access_maxsz; } static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr) @@ -747,6 +916,7 @@ static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg WRITE32(arg->seqid->sequence->counter); WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); hdr->nops++; + hdr->replen += decode_close_maxsz; } static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) @@ -758,6 +928,7 @@ static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *ar WRITE64(args->offset); WRITE32(args->count); hdr->nops++; + hdr->replen += decode_commit_maxsz; } static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *create, struct compound_hdr *hdr) @@ -789,6 +960,7 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg * WRITE32(create->name->len); WRITEMEM(create->name->name, create->name->len); hdr->nops++; + hdr->replen += decode_create_maxsz; encode_attrs(xdr, create->attrs, create->server); } @@ -802,6 +974,7 @@ static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct c WRITE32(1); WRITE32(bitmap); hdr->nops++; + hdr->replen += decode_getattr_maxsz; } static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1, struct compound_hdr *hdr) @@ -814,6 +987,7 @@ static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm WRITE32(bm0); WRITE32(bm1); hdr->nops++; + hdr->replen += decode_getattr_maxsz; } static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) @@ -841,6 +1015,7 @@ static void encode_getfh(struct xdr_stream *xdr, struct compound_hdr *hdr) RESERVE_SPACE(4); WRITE32(OP_GETFH); hdr->nops++; + hdr->replen += decode_getfh_maxsz; } static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) @@ -852,6 +1027,7 @@ static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct WRITE32(name->len); WRITEMEM(name->name, name->len); hdr->nops++; + hdr->replen += decode_link_maxsz; } static inline int nfs4_lock_type(struct file_lock *fl, int block) @@ -899,6 +1075,7 @@ static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args WRITE32(args->lock_seqid->sequence->counter); } hdr->nops++; + hdr->replen += decode_lock_maxsz; } static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *args, struct compound_hdr *hdr) @@ -915,6 +1092,7 @@ static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *ar WRITEMEM("lock id:", 8); WRITE64(args->lock_owner.id); hdr->nops++; + hdr->replen += decode_lockt_maxsz; } static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *args, struct compound_hdr *hdr) @@ -929,6 +1107,7 @@ static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *ar WRITE64(args->fl->fl_start); WRITE64(nfs4_lock_length(args->fl)); hdr->nops++; + hdr->replen += decode_locku_maxsz; } static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) @@ -941,6 +1120,7 @@ static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struc WRITE32(len); WRITEMEM(name->name, len); hdr->nops++; + hdr->replen += decode_lookup_maxsz; } static void encode_share_access(struct xdr_stream *xdr, fmode_t fmode) @@ -1080,6 +1260,7 @@ static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, BUG(); } hdr->nops++; + hdr->replen += decode_open_maxsz; } static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg, struct compound_hdr *hdr) @@ -1091,6 +1272,7 @@ static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_co WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); WRITE32(arg->seqid->sequence->counter); hdr->nops++; + hdr->replen += decode_open_confirm_maxsz; } static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr) @@ -1103,6 +1285,7 @@ static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_close WRITE32(arg->seqid->sequence->counter); encode_share_access(xdr, arg->fmode); hdr->nops++; + hdr->replen += decode_open_downgrade_maxsz; } static void @@ -1116,6 +1299,7 @@ encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh, struct compound_hd WRITE32(len); WRITEMEM(fh->data, len); hdr->nops++; + hdr->replen += decode_putfh_maxsz; } static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr) @@ -1125,6 +1309,7 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr) RESERVE_SPACE(4); WRITE32(OP_PUTROOTFH); hdr->nops++; + hdr->replen += decode_putrootfh_maxsz; } static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx) @@ -1153,6 +1338,7 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, WRITE64(args->offset); WRITE32(args->count); hdr->nops++; + hdr->replen += decode_read_maxsz; } static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) @@ -1178,6 +1364,7 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg WRITE32(attrs[0] & readdir->bitmask[0]); WRITE32(attrs[1] & readdir->bitmask[1]); hdr->nops++; + hdr->replen += decode_readdir_maxsz; dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n", __func__, (unsigned long long)readdir->cookie, @@ -1194,6 +1381,7 @@ static void encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink * RESERVE_SPACE(4); WRITE32(OP_READLINK); hdr->nops++; + hdr->replen += decode_readlink_maxsz; } static void encode_remove(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) @@ -1205,6 +1393,7 @@ static void encode_remove(struct xdr_stream *xdr, const struct qstr *name, struc WRITE32(name->len); WRITEMEM(name->name, name->len); hdr->nops++; + hdr->replen += decode_remove_maxsz; } static void encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, const struct qstr *newname, struct compound_hdr *hdr) @@ -1220,6 +1409,7 @@ static void encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, co WRITE32(newname->len); WRITEMEM(newname->name, newname->len); hdr->nops++; + hdr->replen += decode_rename_maxsz; } static void encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid, struct compound_hdr *hdr) @@ -1230,6 +1420,7 @@ static void encode_renew(struct xdr_stream *xdr, const struct nfs_client *client WRITE32(OP_RENEW); WRITE64(client_stateid->cl_clientid); hdr->nops++; + hdr->replen += decode_renew_maxsz; } static void @@ -1240,6 +1431,7 @@ encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr) RESERVE_SPACE(4); WRITE32(OP_RESTOREFH); hdr->nops++; + hdr->replen += decode_restorefh_maxsz; } static int @@ -1259,6 +1451,7 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun WRITE32(arg->acl_len); xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); hdr->nops++; + hdr->replen += decode_setacl_maxsz; return 0; } @@ -1270,6 +1463,7 @@ encode_savefh(struct xdr_stream *xdr, struct compound_hdr *hdr) RESERVE_SPACE(4); WRITE32(OP_SAVEFH); hdr->nops++; + hdr->replen += decode_savefh_maxsz; } static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs *arg, const struct nfs_server *server, struct compound_hdr *hdr) @@ -1280,6 +1474,7 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs WRITE32(OP_SETATTR); WRITEMEM(arg->stateid.data, NFS4_STATEID_SIZE); hdr->nops++; + hdr->replen += decode_setattr_maxsz; encode_attrs(xdr, arg->iap, server); } @@ -1299,6 +1494,7 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie RESERVE_SPACE(4); WRITE32(setclientid->sc_cb_ident); hdr->nops++; + hdr->replen += decode_setclientid_maxsz; } static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_client *client_state, struct compound_hdr *hdr) @@ -1310,6 +1506,7 @@ static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_ WRITE64(client_state->cl_clientid); WRITEMEM(client_state->cl_confirm.data, NFS4_VERIFIER_SIZE); hdr->nops++; + hdr->replen += decode_setclientid_confirm_maxsz; } static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) @@ -1328,6 +1525,7 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg xdr_write_pages(xdr, args->pages, args->pgbase, args->count); hdr->nops++; + hdr->replen += decode_write_maxsz; } static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *stateid, struct compound_hdr *hdr) @@ -1339,11 +1537,163 @@ static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *state WRITE32(OP_DELEGRETURN); WRITEMEM(stateid->data, NFS4_STATEID_SIZE); hdr->nops++; + hdr->replen += decode_delegreturn_maxsz; +} + +#if defined(CONFIG_NFS_V4_1) +/* NFSv4.1 operations */ +static void encode_exchange_id(struct xdr_stream *xdr, + struct nfs41_exchange_id_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + + RESERVE_SPACE(4 + sizeof(args->verifier->data)); + WRITE32(OP_EXCHANGE_ID); + WRITEMEM(args->verifier->data, sizeof(args->verifier->data)); + + encode_string(xdr, args->id_len, args->id); + + RESERVE_SPACE(12); + WRITE32(args->flags); + WRITE32(0); /* zero length state_protect4_a */ + WRITE32(0); /* zero length implementation id array */ + hdr->nops++; + hdr->replen += decode_exchange_id_maxsz; +} + +static void encode_create_session(struct xdr_stream *xdr, + struct nfs41_create_session_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + char machine_name[NFS4_MAX_MACHINE_NAME_LEN]; + uint32_t len; + struct nfs_client *clp = args->client; + + RESERVE_SPACE(4); + WRITE32(OP_CREATE_SESSION); + + RESERVE_SPACE(8); + WRITE64(clp->cl_ex_clid); + + RESERVE_SPACE(8); + WRITE32(clp->cl_seqid); /*Sequence id */ + WRITE32(args->flags); /*flags */ + + RESERVE_SPACE(2*28); /* 2 channel_attrs */ + /* Fore Channel */ + WRITE32(args->fc_attrs.headerpadsz); /* header padding size */ + WRITE32(args->fc_attrs.max_rqst_sz); /* max req size */ + WRITE32(args->fc_attrs.max_resp_sz); /* max resp size */ + WRITE32(args->fc_attrs.max_resp_sz_cached); /* Max resp sz cached */ + WRITE32(args->fc_attrs.max_ops); /* max operations */ + WRITE32(args->fc_attrs.max_reqs); /* max requests */ + WRITE32(0); /* rdmachannel_attrs */ + + /* Back Channel */ + WRITE32(args->fc_attrs.headerpadsz); /* header padding size */ + WRITE32(args->bc_attrs.max_rqst_sz); /* max req size */ + WRITE32(args->bc_attrs.max_resp_sz); /* max resp size */ + WRITE32(args->bc_attrs.max_resp_sz_cached); /* Max resp sz cached */ + WRITE32(args->bc_attrs.max_ops); /* max operations */ + WRITE32(args->bc_attrs.max_reqs); /* max requests */ + WRITE32(0); /* rdmachannel_attrs */ + + RESERVE_SPACE(4); + WRITE32(args->cb_program); /* cb_program */ + + RESERVE_SPACE(4); /* # of security flavors */ + WRITE32(1); + + RESERVE_SPACE(4); + WRITE32(RPC_AUTH_UNIX); /* auth_sys */ + + /* authsys_parms rfc1831 */ + RESERVE_SPACE(4); + WRITE32((u32)clp->cl_boot_time.tv_nsec); /* stamp */ + len = scnprintf(machine_name, sizeof(machine_name), "%s", + clp->cl_ipaddr); + RESERVE_SPACE(16 + len); + WRITE32(len); + WRITEMEM(machine_name, len); + WRITE32(0); /* UID */ + WRITE32(0); /* GID */ + WRITE32(0); /* No more gids */ + hdr->nops++; + hdr->replen += decode_create_session_maxsz; +} + +static void encode_destroy_session(struct xdr_stream *xdr, + struct nfs4_session *session, + struct compound_hdr *hdr) +{ + __be32 *p; + RESERVE_SPACE(4 + NFS4_MAX_SESSIONID_LEN); + WRITE32(OP_DESTROY_SESSION); + WRITEMEM(session->sess_id.data, NFS4_MAX_SESSIONID_LEN); + hdr->nops++; + hdr->replen += decode_destroy_session_maxsz; } +#endif /* CONFIG_NFS_V4_1 */ + +static void encode_sequence(struct xdr_stream *xdr, + const struct nfs4_sequence_args *args, + struct compound_hdr *hdr) +{ +#if defined(CONFIG_NFS_V4_1) + struct nfs4_session *session = args->sa_session; + struct nfs4_slot_table *tp; + struct nfs4_slot *slot; + __be32 *p; + + if (!session) + return; + + tp = &session->fc_slot_table; + + WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE); + slot = tp->slots + args->sa_slotid; + + RESERVE_SPACE(4); + WRITE32(OP_SEQUENCE); + + /* + * Sessionid + seqid + slotid + max slotid + cache_this + */ + dprintk("%s: sessionid=%u:%u:%u:%u seqid=%d slotid=%d " + "max_slotid=%d cache_this=%d\n", + __func__, + ((u32 *)session->sess_id.data)[0], + ((u32 *)session->sess_id.data)[1], + ((u32 *)session->sess_id.data)[2], + ((u32 *)session->sess_id.data)[3], + slot->seq_nr, args->sa_slotid, + tp->highest_used_slotid, args->sa_cache_this); + RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 16); + WRITEMEM(session->sess_id.data, NFS4_MAX_SESSIONID_LEN); + WRITE32(slot->seq_nr); + WRITE32(args->sa_slotid); + WRITE32(tp->highest_used_slotid); + WRITE32(args->sa_cache_this); + hdr->nops++; + hdr->replen += decode_sequence_maxsz; +#endif /* CONFIG_NFS_V4_1 */ +} + /* * END OF "GENERIC" ENCODE ROUTINES. */ +static u32 nfs4_xdr_minorversion(const struct nfs4_sequence_args *args) +{ +#if defined(CONFIG_NFS_V4_1) + if (args->sa_session) + return args->sa_session->clp->cl_minorversion; +#endif /* CONFIG_NFS_V4_1 */ + return 0; +} + /* * Encode an ACCESS request */ @@ -1351,11 +1701,12 @@ static int nfs4_xdr_enc_access(struct rpc_rqst *req, __be32 *p, const struct nfs { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_access(&xdr, args->access, &hdr); encode_getfattr(&xdr, args->bitmask, &hdr); @@ -1370,11 +1721,12 @@ static int nfs4_xdr_enc_lookup(struct rpc_rqst *req, __be32 *p, const struct nfs { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->dir_fh, &hdr); encode_lookup(&xdr, args->name, &hdr); encode_getfh(&xdr, &hdr); @@ -1390,11 +1742,12 @@ static int nfs4_xdr_enc_lookup_root(struct rpc_rqst *req, __be32 *p, const struc { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putrootfh(&xdr, &hdr); encode_getfh(&xdr, &hdr); encode_getfattr(&xdr, args->bitmask, &hdr); @@ -1409,11 +1762,12 @@ static int nfs4_xdr_enc_remove(struct rpc_rqst *req, __be32 *p, const struct nfs { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_remove(&xdr, &args->name, &hdr); encode_getfattr(&xdr, args->bitmask, &hdr); @@ -1428,11 +1782,12 @@ static int nfs4_xdr_enc_rename(struct rpc_rqst *req, __be32 *p, const struct nfs { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->old_dir, &hdr); encode_savefh(&xdr, &hdr); encode_putfh(&xdr, args->new_dir, &hdr); @@ -1451,11 +1806,12 @@ static int nfs4_xdr_enc_link(struct rpc_rqst *req, __be32 *p, const struct nfs4_ { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_savefh(&xdr, &hdr); encode_putfh(&xdr, args->dir_fh, &hdr); @@ -1474,11 +1830,12 @@ static int nfs4_xdr_enc_create(struct rpc_rqst *req, __be32 *p, const struct nfs { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->dir_fh, &hdr); encode_savefh(&xdr, &hdr); encode_create(&xdr, args, &hdr); @@ -1505,11 +1862,12 @@ static int nfs4_xdr_enc_getattr(struct rpc_rqst *req, __be32 *p, const struct nf { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_getfattr(&xdr, args->bitmask, &hdr); encode_nops(&hdr); @@ -1523,11 +1881,12 @@ static int nfs4_xdr_enc_close(struct rpc_rqst *req, __be32 *p, struct nfs_closea { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_close(&xdr, args, &hdr); encode_getfattr(&xdr, args->bitmask, &hdr); @@ -1542,11 +1901,12 @@ static int nfs4_xdr_enc_open(struct rpc_rqst *req, __be32 *p, struct nfs_openarg { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_savefh(&xdr, &hdr); encode_open(&xdr, args, &hdr); @@ -1569,7 +1929,7 @@ static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, __be32 *p, struct nfs }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_open_confirm(&xdr, args, &hdr); encode_nops(&hdr); @@ -1583,11 +1943,12 @@ static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, __be32 *p, struct nfs_ { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_open(&xdr, args, &hdr); encode_getfattr(&xdr, args->bitmask, &hdr); @@ -1602,11 +1963,12 @@ static int nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, __be32 *p, struct n { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_open_downgrade(&xdr, args, &hdr); encode_getfattr(&xdr, args->bitmask, &hdr); @@ -1621,11 +1983,12 @@ static int nfs4_xdr_enc_lock(struct rpc_rqst *req, __be32 *p, struct nfs_lock_ar { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_lock(&xdr, args, &hdr); encode_nops(&hdr); @@ -1639,11 +2002,12 @@ static int nfs4_xdr_enc_lockt(struct rpc_rqst *req, __be32 *p, struct nfs_lockt_ { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_lockt(&xdr, args, &hdr); encode_nops(&hdr); @@ -1657,11 +2021,12 @@ static int nfs4_xdr_enc_locku(struct rpc_rqst *req, __be32 *p, struct nfs_locku_ { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_locku(&xdr, args, &hdr); encode_nops(&hdr); @@ -1675,22 +2040,16 @@ static int nfs4_xdr_enc_readlink(struct rpc_rqst *req, __be32 *p, const struct n { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; - unsigned int replen; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_readlink(&xdr, args, req, &hdr); - /* set up reply kvec - * toplevel_status + taglen + rescount + OP_PUTFH + status - * + OP_READLINK + status + string length = 8 - */ - replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_readlink_sz) << 2; - xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, + xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages, args->pgbase, args->pglen); encode_nops(&hdr); return 0; @@ -1703,25 +2062,19 @@ static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nf { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; - int replen; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_readdir(&xdr, args, req, &hdr); - /* set up reply kvec - * toplevel_status + taglen + rescount + OP_PUTFH + status - * + OP_READDIR + status + verifer(2) = 9 - */ - replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_readdir_sz) << 2; - xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, + xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages, args->pgbase, args->count); dprintk("%s: inlined page args = (%u, %p, %u, %u)\n", - __func__, replen, args->pages, + __func__, hdr.replen << 2, args->pages, args->pgbase, args->count); encode_nops(&hdr); return 0; @@ -1732,24 +2085,18 @@ static int nfs4_xdr_enc_readdir(struct rpc_rqst *req, __be32 *p, const struct nf */ static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) { - struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - int replen; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_read(&xdr, args, &hdr); - /* set up reply kvec - * toplevel status + taglen=0 + rescount + OP_PUTFH + status - * + OP_READ + status + eof + datalen = 9 - */ - replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_read_sz) << 2; - xdr_inline_pages(&req->rq_rcv_buf, replen, + xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages, args->pgbase, args->count); req->rq_rcv_buf.flags |= XDRBUF_READ; encode_nops(&hdr); @@ -1763,11 +2110,12 @@ static int nfs4_xdr_enc_setattr(struct rpc_rqst *req, __be32 *p, struct nfs_seta { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_setattr(&xdr, args, args->server, &hdr); encode_getfattr(&xdr, args->bitmask, &hdr); @@ -1783,20 +2131,19 @@ nfs4_xdr_enc_getacl(struct rpc_rqst *req, __be32 *p, struct nfs_getaclargs *args) { struct xdr_stream xdr; - struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - int replen; + uint32_t replen; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); + replen = hdr.replen + nfs4_fattr_bitmap_maxsz + 1; encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0, &hdr); - /* set up reply buffer: */ - replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_getacl_sz) << 2; - xdr_inline_pages(&req->rq_rcv_buf, replen, + xdr_inline_pages(&req->rq_rcv_buf, replen << 2, args->acl_pages, args->acl_pgbase, args->acl_len); encode_nops(&hdr); return 0; @@ -1809,11 +2156,12 @@ static int nfs4_xdr_enc_write(struct rpc_rqst *req, __be32 *p, struct nfs_writea { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_write(&xdr, args, &hdr); req->rq_snd_buf.flags |= XDRBUF_WRITE; @@ -1829,11 +2177,12 @@ static int nfs4_xdr_enc_commit(struct rpc_rqst *req, __be32 *p, struct nfs_write { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_commit(&xdr, args, &hdr); encode_getfattr(&xdr, args->bitmask, &hdr); @@ -1848,11 +2197,12 @@ static int nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs4_fsin { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_fsinfo(&xdr, args->bitmask, &hdr); encode_nops(&hdr); @@ -1866,11 +2216,12 @@ static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, __be32 *p, const struct n { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_getattr_one(&xdr, args->bitmask[0] & nfs4_pathconf_bitmap[0], &hdr); @@ -1885,11 +2236,12 @@ static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, __be32 *p, const struct nfs { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); encode_getattr_two(&xdr, args->bitmask[0] & nfs4_statfs_bitmap[0], args->bitmask[1] & nfs4_statfs_bitmap[1], &hdr); @@ -1900,16 +2252,18 @@ static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, __be32 *p, const struct nfs /* * GETATTR_BITMAP request */ -static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, __be32 *p, const struct nfs_fh *fhandle) +static int nfs4_xdr_enc_server_caps(struct rpc_rqst *req, __be32 *p, + struct nfs4_server_caps_arg *args) { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); - encode_putfh(&xdr, fhandle, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_putfh(&xdr, args->fhandle, &hdr); encode_getattr_one(&xdr, FATTR4_WORD0_SUPPORTED_ATTRS| FATTR4_WORD0_LINK_SUPPORT| FATTR4_WORD0_SYMLINK_SUPPORT| @@ -1929,7 +2283,7 @@ static int nfs4_xdr_enc_renew(struct rpc_rqst *req, __be32 *p, struct nfs_client }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); encode_renew(&xdr, clp, &hdr); encode_nops(&hdr); return 0; @@ -1946,7 +2300,7 @@ static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, __be32 *p, struct nfs4 }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); encode_setclientid(&xdr, sc, &hdr); encode_nops(&hdr); return 0; @@ -1964,7 +2318,7 @@ static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, __be32 *p, str const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); encode_setclientid_confirm(&xdr, clp, &hdr); encode_putrootfh(&xdr, &hdr); encode_fsinfo(&xdr, lease_bitmap, &hdr); @@ -1979,11 +2333,12 @@ static int nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, __be32 *p, const struc { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fhandle, &hdr); encode_delegreturn(&xdr, args->stateid, &hdr); encode_getfattr(&xdr, args->bitmask, &hdr); @@ -1998,28 +2353,119 @@ static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; - struct rpc_auth *auth = req->rq_task->tk_msg.rpc_cred->cr_auth; - int replen; + uint32_t replen; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->dir_fh, &hdr); encode_lookup(&xdr, args->name, &hdr); + replen = hdr.replen; /* get the attribute into args->page */ encode_fs_locations(&xdr, args->bitmask, &hdr); - /* set up reply - * toplevel_status + OP_PUTFH + status - * + OP_LOOKUP + status + OP_GETATTR + status = 7 - */ - replen = (RPC_REPHDRSIZE + auth->au_rslack + 7) << 2; - xdr_inline_pages(&req->rq_rcv_buf, replen, &args->page, + xdr_inline_pages(&req->rq_rcv_buf, replen << 2, &args->page, 0, PAGE_SIZE); encode_nops(&hdr); return 0; } +#if defined(CONFIG_NFS_V4_1) +/* + * EXCHANGE_ID request + */ +static int nfs4_xdr_enc_exchange_id(struct rpc_rqst *req, uint32_t *p, + struct nfs41_exchange_id_args *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = args->client->cl_minorversion, + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_exchange_id(&xdr, args, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * a CREATE_SESSION request + */ +static int nfs4_xdr_enc_create_session(struct rpc_rqst *req, uint32_t *p, + struct nfs41_create_session_args *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = args->client->cl_minorversion, + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_create_session(&xdr, args, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * a DESTROY_SESSION request + */ +static int nfs4_xdr_enc_destroy_session(struct rpc_rqst *req, uint32_t *p, + struct nfs4_session *session) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = session->clp->cl_minorversion, + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_destroy_session(&xdr, session, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * a SEQUENCE request + */ +static int nfs4_xdr_enc_sequence(struct rpc_rqst *req, uint32_t *p, + struct nfs4_sequence_args *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(args), + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, args, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * a GET_LEASE_TIME request + */ +static int nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req, uint32_t *p, + struct nfs4_get_lease_time_args *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->la_seq_args), + }; + const u32 lease_bitmap[2] = { FATTR4_WORD0_LEASE_TIME, 0 }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->la_seq_args, &hdr); + encode_putrootfh(&xdr, &hdr); + encode_fsinfo(&xdr, lease_bitmap, &hdr); + encode_nops(&hdr); + return 0; +} +#endif /* CONFIG_NFS_V4_1 */ + /* * START OF "GENERIC" DECODE ROUTINES. * These may look a little ugly since they are imported from a "generic" @@ -3657,7 +4103,7 @@ decode_savefh(struct xdr_stream *xdr) return decode_op_hdr(xdr, OP_SAVEFH); } -static int decode_setattr(struct xdr_stream *xdr, struct nfs_setattrres *res) +static int decode_setattr(struct xdr_stream *xdr) { __be32 *p; uint32_t bmlen; @@ -3735,6 +4181,169 @@ static int decode_delegreturn(struct xdr_stream *xdr) return decode_op_hdr(xdr, OP_DELEGRETURN); } +#if defined(CONFIG_NFS_V4_1) +static int decode_exchange_id(struct xdr_stream *xdr, + struct nfs41_exchange_id_res *res) +{ + __be32 *p; + uint32_t dummy; + int status; + struct nfs_client *clp = res->client; + + status = decode_op_hdr(xdr, OP_EXCHANGE_ID); + if (status) + return status; + + READ_BUF(8); + READ64(clp->cl_ex_clid); + READ_BUF(12); + READ32(clp->cl_seqid); + READ32(clp->cl_exchange_flags); + + /* We ask for SP4_NONE */ + READ32(dummy); + if (dummy != SP4_NONE) + return -EIO; + + /* Throw away minor_id */ + READ_BUF(8); + + /* Throw away Major id */ + READ_BUF(4); + READ32(dummy); + READ_BUF(dummy); + + /* Throw away server_scope */ + READ_BUF(4); + READ32(dummy); + READ_BUF(dummy); + + /* Throw away Implementation id array */ + READ_BUF(4); + READ32(dummy); + READ_BUF(dummy); + + return 0; +} + +static int decode_chan_attrs(struct xdr_stream *xdr, + struct nfs4_channel_attrs *attrs) +{ + __be32 *p; + u32 nr_attrs; + + READ_BUF(28); + READ32(attrs->headerpadsz); + READ32(attrs->max_rqst_sz); + READ32(attrs->max_resp_sz); + READ32(attrs->max_resp_sz_cached); + READ32(attrs->max_ops); + READ32(attrs->max_reqs); + READ32(nr_attrs); + if (unlikely(nr_attrs > 1)) { + printk(KERN_WARNING "%s: Invalid rdma channel attrs count %u\n", + __func__, nr_attrs); + return -EINVAL; + } + if (nr_attrs == 1) + READ_BUF(4); /* skip rdma_attrs */ + return 0; +} + +static int decode_create_session(struct xdr_stream *xdr, + struct nfs41_create_session_res *res) +{ + __be32 *p; + int status; + struct nfs_client *clp = res->client; + struct nfs4_session *session = clp->cl_session; + + status = decode_op_hdr(xdr, OP_CREATE_SESSION); + + if (status) + return status; + + /* sessionid */ + READ_BUF(NFS4_MAX_SESSIONID_LEN); + COPYMEM(&session->sess_id, NFS4_MAX_SESSIONID_LEN); + + /* seqid, flags */ + READ_BUF(8); + READ32(clp->cl_seqid); + READ32(session->flags); + + /* Channel attributes */ + status = decode_chan_attrs(xdr, &session->fc_attrs); + if (!status) + status = decode_chan_attrs(xdr, &session->bc_attrs); + return status; +} + +static int decode_destroy_session(struct xdr_stream *xdr, void *dummy) +{ + return decode_op_hdr(xdr, OP_DESTROY_SESSION); +} +#endif /* CONFIG_NFS_V4_1 */ + +static int decode_sequence(struct xdr_stream *xdr, + struct nfs4_sequence_res *res, + struct rpc_rqst *rqstp) +{ +#if defined(CONFIG_NFS_V4_1) + struct nfs4_slot *slot; + struct nfs4_sessionid id; + u32 dummy; + int status; + __be32 *p; + + if (!res->sr_session) + return 0; + + status = decode_op_hdr(xdr, OP_SEQUENCE); + if (status) + goto out_err; + + /* + * If the server returns different values for sessionID, slotID or + * sequence number, the server is looney tunes. + */ + status = -ESERVERFAULT; + + slot = &res->sr_session->fc_slot_table.slots[res->sr_slotid]; + READ_BUF(NFS4_MAX_SESSIONID_LEN + 20); + COPYMEM(id.data, NFS4_MAX_SESSIONID_LEN); + if (memcmp(id.data, res->sr_session->sess_id.data, + NFS4_MAX_SESSIONID_LEN)) { + dprintk("%s Invalid session id\n", __func__); + goto out_err; + } + /* seqid */ + READ32(dummy); + if (dummy != slot->seq_nr) { + dprintk("%s Invalid sequence number\n", __func__); + goto out_err; + } + /* slot id */ + READ32(dummy); + if (dummy != res->sr_slotid) { + dprintk("%s Invalid slot id\n", __func__); + goto out_err; + } + /* highest slot id - currently not processed */ + READ32(dummy); + /* target highest slot id - currently not processed */ + READ32(dummy); + /* result flags - currently not processed */ + READ32(dummy); + status = 0; +out_err: + res->sr_status = status; + return status; +#else /* CONFIG_NFS_V4_1 */ + return 0; +#endif /* CONFIG_NFS_V4_1 */ +} + /* * END OF "GENERIC" DECODE ROUTINES. */ @@ -3752,6 +4361,9 @@ static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, __be32 *p, struct status = decode_compound_hdr(&xdr, &hdr); if (status) goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; status = decode_putfh(&xdr); if (status) goto out; @@ -3773,7 +4385,11 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_ac int status; xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) goto out; status = decode_putfh(&xdr); if (status != 0) @@ -3796,7 +4412,11 @@ static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_lo int status; xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) goto out; if ((status = decode_putfh(&xdr)) != 0) goto out; @@ -3819,7 +4439,11 @@ static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, __be32 *p, struct nf int status; xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) goto out; if ((status = decode_putrootfh(&xdr)) != 0) goto out; @@ -3839,7 +4463,11 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, __be32 *p, struct nfs_rem int status; xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) goto out; if ((status = decode_putfh(&xdr)) != 0) goto out; @@ -3860,7 +4488,11 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_re int status; xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) goto out; if ((status = decode_putfh(&xdr)) != 0) goto out; @@ -3890,7 +4522,11 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_link int status; xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) goto out; if ((status = decode_putfh(&xdr)) != 0) goto out; @@ -3923,7 +4559,11 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_cr int status; xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); - if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) goto out; if ((status = decode_putfh(&xdr)) != 0) goto out; @@ -3963,6 +4603,9 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_g status = decode_compound_hdr(&xdr, &hdr); if (status) goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; status = decode_putfh(&xdr); if (status) goto out; @@ -3979,12 +4622,13 @@ nfs4_xdr_enc_setacl(struct rpc_rqst *req, __be32 *p, struct nfs_setaclargs *args { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 0, + .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); - encode_compound_hdr(&xdr, &hdr); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); status = encode_setacl(&xdr, args, &hdr); encode_nops(&hdr); @@ -3995,7 +4639,8 @@ nfs4_xdr_enc_setacl(struct rpc_rqst *req, __be32 *p, struct nfs_setaclargs *args * Decode SETACL response */ static int -nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, __be32 *p, void *res) +nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, __be32 *p, + struct nfs_setaclres *res) { struct xdr_stream xdr; struct compound_hdr hdr; @@ -4005,10 +4650,13 @@ nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, __be32 *p, void *res) status = decode_compound_hdr(&xdr, &hdr); if (status) goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; status = decode_putfh(&xdr); if (status) goto out; - status = decode_setattr(&xdr, res); + status = decode_setattr(&xdr); out: return status; } @@ -4017,7 +4665,8 @@ out: * Decode GETACL response */ static int -nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, __be32 *p, size_t *acl_len) +nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, __be32 *p, + struct nfs_getaclres *res) { struct xdr_stream xdr; struct compound_hdr hdr; @@ -4027,10 +4676,13 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, __be32 *p, size_t *acl_len) status = decode_compound_hdr(&xdr, &hdr); if (status) goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; status = decode_putfh(&xdr); if (status) goto out; - status = decode_getacl(&xdr, rqstp, acl_len); + status = decode_getacl(&xdr, rqstp, &res->acl_len); out: return status; @@ -4049,6 +4701,9 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_clos status = decode_compound_hdr(&xdr, &hdr); if (status) goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; status = decode_putfh(&xdr); if (status) goto out; @@ -4079,6 +4734,9 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openr status = decode_compound_hdr(&xdr, &hdr); if (status) goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; status = decode_putfh(&xdr); if (status) goto out; @@ -4133,6 +4791,9 @@ static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, __be32 *p, struct nf status = decode_compound_hdr(&xdr, &hdr); if (status) goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; status = decode_putfh(&xdr); if (status) goto out; @@ -4157,10 +4818,13 @@ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_se status = decode_compound_hdr(&xdr, &hdr); if (status) goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; status = decode_putfh(&xdr); if (status) goto out; - status = decode_setattr(&xdr, res); + status = decode_setattr(&xdr); if (status) goto out; decode_getfattr(&xdr, res->fattr, res->server); @@ -4181,6 +4845,9 @@ static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, __be32 *p, struct nfs_lock_ status = decode_compound_hdr(&xdr, &hdr); if (status) goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; status = decode_putfh(&xdr); if (status) goto out; @@ -4202,6 +4869,9 @@ static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, __be32 *p, struct nfs_lock status = decode_compound_hdr(&xdr, &hdr); if (status) goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; status = decode_putfh(&xdr); if (status) goto out; @@ -4223,6 +4893,9 @@ static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, __be32 *p, struct nfs_lock status = decode_compound_hdr(&xdr, &hdr); if (status) goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; status = decode_putfh(&xdr); if (status) goto out; @@ -4234,7 +4907,8 @@ out: /* * Decode READLINK response */ -static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp, __be32 *p, void *res) +static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp, __be32 *p, + struct nfs4_readlink_res *res) { struct xdr_stream xdr; struct compound_hdr hdr; @@ -4244,6 +4918,9 @@ static int nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp, __be32 *p, void *res) status = decode_compound_hdr(&xdr, &hdr); if (status) goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; status = decode_putfh(&xdr); if (status) goto out; @@ -4265,6 +4942,9 @@ static int nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_r status = decode_compound_hdr(&xdr, &hdr); if (status) goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; status = decode_putfh(&xdr); if (status) goto out; @@ -4286,6 +4966,9 @@ static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, __be32 *p, struct nfs_readr status = decode_compound_hdr(&xdr, &hdr); if (status) goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; status = decode_putfh(&xdr); if (status) goto out; @@ -4309,6 +4992,9 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, __be32 *p, struct nfs_writ status = decode_compound_hdr(&xdr, &hdr); if (status) goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; status = decode_putfh(&xdr); if (status) goto out; @@ -4335,6 +5021,9 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, __be32 *p, struct nfs_wri status = decode_compound_hdr(&xdr, &hdr); if (status) goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) + goto out; status = decode_putfh(&xdr); if (status) goto out; @@ -4349,7 +5038,8 @@ out: /* * FSINFO request */ -static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *fsinfo) +static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p, + struct nfs4_fsinfo_res *res) { struct xdr_stream xdr; struct compound_hdr hdr; @@ -4358,16 +5048,19 @@ static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs_fsinf xdr_init_decode(&xdr, &req->rq_rcv_buf, p); status = decode_compound_hdr(&xdr, &hdr); if (!status) + status = decode_sequence(&xdr, &res->seq_res, req); + if (!status) status = decode_putfh(&xdr); if (!status) - status = decode_fsinfo(&xdr, fsinfo); + status = decode_fsinfo(&xdr, res->fsinfo); return status; } /* * PATHCONF request */ -static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, __be32 *p, struct nfs_pathconf *pathconf) +static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, __be32 *p, + struct nfs4_pathconf_res *res) { struct xdr_stream xdr; struct compound_hdr hdr; @@ -4376,16 +5069,19 @@ static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, __be32 *p, struct nfs_pat xdr_init_decode(&xdr, &req->rq_rcv_buf, p); status = decode_compound_hdr(&xdr, &hdr); if (!status) + status = decode_sequence(&xdr, &res->seq_res, req); + if (!status) status = decode_putfh(&xdr); if (!status) - status = decode_pathconf(&xdr, pathconf); + status = decode_pathconf(&xdr, res->pathconf); return status; } /* * STATFS request */ -static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, __be32 *p, struct nfs_fsstat *fsstat) +static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, __be32 *p, + struct nfs4_statfs_res *res) { struct xdr_stream xdr; struct compound_hdr hdr; @@ -4394,9 +5090,11 @@ static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, __be32 *p, struct nfs_fssta xdr_init_decode(&xdr, &req->rq_rcv_buf, p); status = decode_compound_hdr(&xdr, &hdr); if (!status) + status = decode_sequence(&xdr, &res->seq_res, req); + if (!status) status = decode_putfh(&xdr); if (!status) - status = decode_statfs(&xdr, fsstat); + status = decode_statfs(&xdr, res->fsstat); return status; } @@ -4410,7 +5108,11 @@ static int nfs4_xdr_dec_server_caps(struct rpc_rqst *req, __be32 *p, struct nfs4 int status; xdr_init_decode(&xdr, &req->rq_rcv_buf, p); - if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, req); + if (status) goto out; if ((status = decode_putfh(&xdr)) != 0) goto out; @@ -4483,7 +5185,10 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, __be32 *p, struct nf xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); status = decode_compound_hdr(&xdr, &hdr); - if (status != 0) + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (status) goto out; status = decode_putfh(&xdr); if (status != 0) @@ -4497,7 +5202,8 @@ out: /* * FS_LOCATIONS request */ -static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs4_fs_locations *res) +static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p, + struct nfs4_fs_locations_res *res) { struct xdr_stream xdr; struct compound_hdr hdr; @@ -4505,18 +5211,113 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs xdr_init_decode(&xdr, &req->rq_rcv_buf, p); status = decode_compound_hdr(&xdr, &hdr); - if (status != 0) + if (status) + goto out; + status = decode_sequence(&xdr, &res->seq_res, req); + if (status) goto out; if ((status = decode_putfh(&xdr)) != 0) goto out; if ((status = decode_lookup(&xdr)) != 0) goto out; xdr_enter_page(&xdr, PAGE_SIZE); - status = decode_getfattr(&xdr, &res->fattr, res->server); + status = decode_getfattr(&xdr, &res->fs_locations->fattr, + res->fs_locations->server); out: return status; } +#if defined(CONFIG_NFS_V4_1) +/* + * EXCHANGE_ID request + */ +static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp, uint32_t *p, + void *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_exchange_id(&xdr, res); + return status; +} + +/* + * a CREATE_SESSION request + */ +static int nfs4_xdr_dec_create_session(struct rpc_rqst *rqstp, uint32_t *p, + struct nfs41_create_session_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_create_session(&xdr, res); + return status; +} + +/* + * a DESTROY_SESSION request + */ +static int nfs4_xdr_dec_destroy_session(struct rpc_rqst *rqstp, uint32_t *p, + void *dummy) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_destroy_session(&xdr, dummy); + return status; +} + +/* + * a SEQUENCE request + */ +static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp, uint32_t *p, + struct nfs4_sequence_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_sequence(&xdr, res, rqstp); + return status; +} + +/* + * a GET_LEASE_TIME request + */ +static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp, uint32_t *p, + struct nfs4_get_lease_time_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_sequence(&xdr, &res->lr_seq_res, rqstp); + if (!status) + status = decode_putrootfh(&xdr); + if (!status) + status = decode_fsinfo(&xdr, res->lr_fsinfo); + return status; +} +#endif /* CONFIG_NFS_V4_1 */ + __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) { uint32_t bitmap[2] = {0}; @@ -4686,6 +5487,13 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(GETACL, enc_getacl, dec_getacl), PROC(SETACL, enc_setacl, dec_setacl), PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations), +#if defined(CONFIG_NFS_V4_1) + PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id), + PROC(CREATE_SESSION, enc_create_session, dec_create_session), + PROC(DESTROY_SESSION, enc_destroy_session, dec_destroy_session), + PROC(SEQUENCE, enc_sequence, dec_sequence), + PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), +#endif /* CONFIG_NFS_V4_1 */ }; struct rpc_version nfs_version4 = { diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index e3ed590..8c55b27 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -92,6 +92,9 @@ #undef NFSROOT_DEBUG #define NFSDBG_FACILITY NFSDBG_ROOT +/* Default port to use if server is not running a portmapper */ +#define NFS_MNT_PORT 627 + /* Default path we try to mount. "%s" gets replaced by our IP address */ #define NFS_ROOT "/tftpboot/%s" @@ -487,6 +490,7 @@ static int __init root_nfs_get_handle(void) { struct nfs_fh fh; struct sockaddr_in sin; + unsigned int auth_flav_len = 0; struct nfs_mount_request request = { .sap = (struct sockaddr *)&sin, .salen = sizeof(sin), @@ -496,6 +500,7 @@ static int __init root_nfs_get_handle(void) .protocol = (nfs_data.flags & NFS_MOUNT_TCP) ? XPRT_TRANSPORT_TCP : XPRT_TRANSPORT_UDP, .fh = &fh, + .auth_flav_len = &auth_flav_len, }; int status; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 4ace3c5..96c4ebf 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -22,6 +22,7 @@ #include <asm/system.h> +#include "nfs4_fs.h" #include "internal.h" #include "iostat.h" #include "fscache.h" @@ -46,6 +47,7 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); p->npages = pagecount; + p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; if (pagecount <= ARRAY_SIZE(p->page_array)) p->pagevec = p->page_array; else { @@ -357,19 +359,25 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data struct nfs_readres *resp = &data->res; if (resp->eof || resp->count == argp->count) - return; + goto out; /* This is a short read! */ nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); /* Has the server at least made some progress? */ if (resp->count == 0) - return; + goto out; /* Yes, so retry the read at the end of the data */ argp->offset += resp->count; argp->pgbase += resp->count; argp->count -= resp->count; - rpc_restart_call(task); + nfs4_restart_rpc(task, NFS_SERVER(data->inode)->nfs_client); + return; +out: + nfs4_sequence_free_slot(NFS_SERVER(data->inode)->nfs_client, + &data->res.seq_res); + return; + } /* @@ -406,7 +414,23 @@ static void nfs_readpage_release_partial(void *calldata) nfs_readdata_release(calldata); } +#if defined(CONFIG_NFS_V4_1) +void nfs_read_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs_read_data *data = calldata; + + if (nfs4_setup_sequence(NFS_SERVER(data->inode)->nfs_client, + &data->args.seq_args, &data->res.seq_res, + 0, task)) + return; + rpc_call_start(task); +} +#endif /* CONFIG_NFS_V4_1 */ + static const struct rpc_call_ops nfs_read_partial_ops = { +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs_read_prepare, +#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_readpage_result_partial, .rpc_release = nfs_readpage_release_partial, }; @@ -470,6 +494,9 @@ static void nfs_readpage_release_full(void *calldata) } static const struct rpc_call_ops nfs_read_full_ops = { +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs_read_prepare, +#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_readpage_result_full, .rpc_release = nfs_readpage_release_full, }; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 26127b6..0b4cbdc 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -42,6 +42,8 @@ #include <linux/smp_lock.h> #include <linux/seq_file.h> #include <linux/mount.h> +#include <linux/mnt_namespace.h> +#include <linux/namei.h> #include <linux/nfs_idmap.h> #include <linux/vfs.h> #include <linux/inet.h> @@ -90,6 +92,7 @@ enum { Opt_mountport, Opt_mountvers, Opt_nfsvers, + Opt_minorversion, /* Mount options that take string arguments */ Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, @@ -139,22 +142,23 @@ static const match_table_t nfs_mount_option_tokens = { { Opt_fscache_uniq, "fsc=%s" }, { Opt_nofscache, "nofsc" }, - { Opt_port, "port=%u" }, - { Opt_rsize, "rsize=%u" }, - { Opt_wsize, "wsize=%u" }, - { Opt_bsize, "bsize=%u" }, - { Opt_timeo, "timeo=%u" }, - { Opt_retrans, "retrans=%u" }, - { Opt_acregmin, "acregmin=%u" }, - { Opt_acregmax, "acregmax=%u" }, - { Opt_acdirmin, "acdirmin=%u" }, - { Opt_acdirmax, "acdirmax=%u" }, - { Opt_actimeo, "actimeo=%u" }, - { Opt_namelen, "namlen=%u" }, - { Opt_mountport, "mountport=%u" }, - { Opt_mountvers, "mountvers=%u" }, - { Opt_nfsvers, "nfsvers=%u" }, - { Opt_nfsvers, "vers=%u" }, + { Opt_port, "port=%s" }, + { Opt_rsize, "rsize=%s" }, + { Opt_wsize, "wsize=%s" }, + { Opt_bsize, "bsize=%s" }, + { Opt_timeo, "timeo=%s" }, + { Opt_retrans, "retrans=%s" }, + { Opt_acregmin, "acregmin=%s" }, + { Opt_acregmax, "acregmax=%s" }, + { Opt_acdirmin, "acdirmin=%s" }, + { Opt_acdirmax, "acdirmax=%s" }, + { Opt_actimeo, "actimeo=%s" }, + { Opt_namelen, "namlen=%s" }, + { Opt_mountport, "mountport=%s" }, + { Opt_mountvers, "mountvers=%s" }, + { Opt_nfsvers, "nfsvers=%s" }, + { Opt_nfsvers, "vers=%s" }, + { Opt_minorversion, "minorversion=%u" }, { Opt_sec, "sec=%s" }, { Opt_proto, "proto=%s" }, @@ -270,10 +274,14 @@ static const struct super_operations nfs_sops = { #ifdef CONFIG_NFS_V4 static int nfs4_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); +static int nfs4_remote_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); +static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); static void nfs4_kill_super(struct super_block *sb); static struct file_system_type nfs4_fs_type = { @@ -284,6 +292,14 @@ static struct file_system_type nfs4_fs_type = { .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; +static struct file_system_type nfs4_remote_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .get_sb = nfs4_remote_get_sb, + .kill_sb = nfs4_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + struct file_system_type nfs4_xdev_fs_type = { .owner = THIS_MODULE, .name = "nfs4", @@ -292,6 +308,14 @@ struct file_system_type nfs4_xdev_fs_type = { .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; +static struct file_system_type nfs4_remote_referral_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .get_sb = nfs4_remote_referral_get_sb, + .kill_sb = nfs4_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + struct file_system_type nfs4_referral_fs_type = { .owner = THIS_MODULE, .name = "nfs4", @@ -514,7 +538,6 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, const char *nostr; } nfs_info[] = { { NFS_MOUNT_SOFT, ",soft", ",hard" }, - { NFS_MOUNT_INTR, ",intr", ",nointr" }, { NFS_MOUNT_POSIX, ",posix", "" }, { NFS_MOUNT_NOCTO, ",nocto", "" }, { NFS_MOUNT_NOAC, ",noac", "" }, @@ -943,11 +966,6 @@ static int nfs_parse_security_flavors(char *value, return 1; } -static void nfs_parse_invalid_value(const char *option) -{ - dfprintk(MOUNT, "NFS: bad value specified for %s option\n", option); -} - /* * Error-check and convert a string of mount options from user space into * a data structure. The whole mount string is processed; bad options are @@ -958,7 +976,7 @@ static int nfs_parse_mount_options(char *raw, struct nfs_parsed_mount_data *mnt) { char *p, *string, *secdata; - int rc, sloppy = 0, errors = 0; + int rc, sloppy = 0, invalid_option = 0; if (!raw) { dfprintk(MOUNT, "NFS: mount options string was NULL.\n"); @@ -982,7 +1000,9 @@ static int nfs_parse_mount_options(char *raw, while ((p = strsep(&raw, ",")) != NULL) { substring_t args[MAX_OPT_ARGS]; - int option, token; + unsigned long option; + int int_option; + int token; if (!*p) continue; @@ -1091,114 +1111,156 @@ static int nfs_parse_mount_options(char *raw, * options that take numeric values */ case Opt_port: - if (match_int(args, &option) || - option < 0 || option > USHORT_MAX) { - errors++; - nfs_parse_invalid_value("port"); - } else - mnt->nfs_server.port = option; + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0 || option > USHORT_MAX) + goto out_invalid_value; + mnt->nfs_server.port = option; break; case Opt_rsize: - if (match_int(args, &option) || option < 0) { - errors++; - nfs_parse_invalid_value("rsize"); - } else - mnt->rsize = option; + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0) + goto out_invalid_value; + mnt->rsize = option; break; case Opt_wsize: - if (match_int(args, &option) || option < 0) { - errors++; - nfs_parse_invalid_value("wsize"); - } else - mnt->wsize = option; + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0) + goto out_invalid_value; + mnt->wsize = option; break; case Opt_bsize: - if (match_int(args, &option) || option < 0) { - errors++; - nfs_parse_invalid_value("bsize"); - } else - mnt->bsize = option; + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0) + goto out_invalid_value; + mnt->bsize = option; break; case Opt_timeo: - if (match_int(args, &option) || option <= 0) { - errors++; - nfs_parse_invalid_value("timeo"); - } else - mnt->timeo = option; + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0 || option == 0) + goto out_invalid_value; + mnt->timeo = option; break; case Opt_retrans: - if (match_int(args, &option) || option <= 0) { - errors++; - nfs_parse_invalid_value("retrans"); - } else - mnt->retrans = option; + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0 || option == 0) + goto out_invalid_value; + mnt->retrans = option; break; case Opt_acregmin: - if (match_int(args, &option) || option < 0) { - errors++; - nfs_parse_invalid_value("acregmin"); - } else - mnt->acregmin = option; + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0) + goto out_invalid_value; + mnt->acregmin = option; break; case Opt_acregmax: - if (match_int(args, &option) || option < 0) { - errors++; - nfs_parse_invalid_value("acregmax"); - } else - mnt->acregmax = option; + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0) + goto out_invalid_value; + mnt->acregmax = option; break; case Opt_acdirmin: - if (match_int(args, &option) || option < 0) { - errors++; - nfs_parse_invalid_value("acdirmin"); - } else - mnt->acdirmin = option; + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0) + goto out_invalid_value; + mnt->acdirmin = option; break; case Opt_acdirmax: - if (match_int(args, &option) || option < 0) { - errors++; - nfs_parse_invalid_value("acdirmax"); - } else - mnt->acdirmax = option; + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0) + goto out_invalid_value; + mnt->acdirmax = option; break; case Opt_actimeo: - if (match_int(args, &option) || option < 0) { - errors++; - nfs_parse_invalid_value("actimeo"); - } else - mnt->acregmin = mnt->acregmax = - mnt->acdirmin = mnt->acdirmax = option; + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0) + goto out_invalid_value; + mnt->acregmin = mnt->acregmax = + mnt->acdirmin = mnt->acdirmax = option; break; case Opt_namelen: - if (match_int(args, &option) || option < 0) { - errors++; - nfs_parse_invalid_value("namlen"); - } else - mnt->namlen = option; + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0) + goto out_invalid_value; + mnt->namlen = option; break; case Opt_mountport: - if (match_int(args, &option) || - option < 0 || option > USHORT_MAX) { - errors++; - nfs_parse_invalid_value("mountport"); - } else - mnt->mount_server.port = option; + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0 || option > USHORT_MAX) + goto out_invalid_value; + mnt->mount_server.port = option; break; case Opt_mountvers: - if (match_int(args, &option) || + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0 || option < NFS_MNT_VERSION || - option > NFS_MNT3_VERSION) { - errors++; - nfs_parse_invalid_value("mountvers"); - } else - mnt->mount_server.version = option; + option > NFS_MNT3_VERSION) + goto out_invalid_value; + mnt->mount_server.version = option; break; case Opt_nfsvers: - if (match_int(args, &option)) { - errors++; - nfs_parse_invalid_value("nfsvers"); - break; - } + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0) + goto out_invalid_value; switch (option) { case NFS2_VERSION: mnt->flags &= ~NFS_MOUNT_VER3; @@ -1207,10 +1269,16 @@ static int nfs_parse_mount_options(char *raw, mnt->flags |= NFS_MOUNT_VER3; break; default: - errors++; - nfs_parse_invalid_value("nfsvers"); + goto out_invalid_value; } break; + case Opt_minorversion: + if (match_int(args, &int_option)) + return 0; + if (int_option < 0 || int_option > NFS4_MAX_MINOR_VERSION) + return 0; + mnt->minorversion = int_option; + break; /* * options that take text values @@ -1222,9 +1290,9 @@ static int nfs_parse_mount_options(char *raw, rc = nfs_parse_security_flavors(string, mnt); kfree(string); if (!rc) { - errors++; dfprintk(MOUNT, "NFS: unrecognized " "security flavor\n"); + return 0; } break; case Opt_proto: @@ -1238,23 +1306,25 @@ static int nfs_parse_mount_options(char *raw, case Opt_xprt_udp: mnt->flags &= ~NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; + kfree(string); break; case Opt_xprt_tcp: mnt->flags |= NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; + kfree(string); break; case Opt_xprt_rdma: /* vector side protocols to TCP */ mnt->flags |= NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; xprt_load_transport(string); + kfree(string); break; default: - errors++; dfprintk(MOUNT, "NFS: unrecognized " "transport protocol\n"); + return 0; } - kfree(string); break; case Opt_mountproto: string = match_strdup(args); @@ -1273,9 +1343,9 @@ static int nfs_parse_mount_options(char *raw, break; case Opt_xprt_rdma: /* not used for side protocols */ default: - errors++; dfprintk(MOUNT, "NFS: unrecognized " "transport protocol\n"); + return 0; } break; case Opt_addr: @@ -1331,9 +1401,9 @@ static int nfs_parse_mount_options(char *raw, mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE; break; default: - errors++; dfprintk(MOUNT, "NFS: invalid " "lookupcache argument\n"); + return 0; }; break; @@ -1351,20 +1421,20 @@ static int nfs_parse_mount_options(char *raw, break; default: - errors++; + invalid_option = 1; dfprintk(MOUNT, "NFS: unrecognized mount option " "'%s'\n", p); } } - if (errors > 0) { - dfprintk(MOUNT, "NFS: parsing encountered %d error%s\n", - errors, (errors == 1 ? "" : "s")); - if (!sloppy) - return 0; - } + if (!sloppy && invalid_option) + return 0; + return 1; +out_invalid_value: + printk(KERN_INFO "NFS: bad mount option value specified: %s \n", p); + return 0; out_nomem: printk(KERN_INFO "NFS: not enough memory to parse option\n"); return 0; @@ -1381,6 +1451,7 @@ out_security_failure: static int nfs_try_mount(struct nfs_parsed_mount_data *args, struct nfs_fh *root_fh) { + unsigned int auth_flavor_len = 0; struct nfs_mount_request request = { .sap = (struct sockaddr *) &args->mount_server.address, @@ -1388,6 +1459,7 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, .protocol = args->mount_server.protocol, .fh = root_fh, .noresvport = args->flags & NFS_MOUNT_NORESVPORT, + .auth_flav_len = &auth_flavor_len, }; int status; @@ -2240,6 +2312,11 @@ static void nfs4_fill_super(struct super_block *sb) nfs_initialise_sb(sb); } +static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args) +{ + args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3); +} + /* * Validate NFSv4 mount options */ @@ -2263,6 +2340,7 @@ static int nfs4_validate_mount_data(void *options, args->nfs_server.port = NFS_PORT; /* 2049 unless user set port= */ args->auth_flavors[0] = RPC_AUTH_UNIX; args->auth_flavor_len = 0; + args->minorversion = 0; switch (data->version) { case 1: @@ -2336,6 +2414,8 @@ static int nfs4_validate_mount_data(void *options, nfs_validate_transport_protocol(args); + nfs4_validate_mount_flags(args); + if (args->auth_flavor_len > 1) goto out_inval_auth; @@ -2375,12 +2455,12 @@ out_no_client_address: } /* - * Get the superblock for an NFS4 mountpoint + * Get the superblock for the NFS4 root partition */ -static int nfs4_get_sb(struct file_system_type *fs_type, +static int nfs4_remote_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) { - struct nfs_parsed_mount_data *data; + struct nfs_parsed_mount_data *data = raw_data; struct super_block *s; struct nfs_server *server; struct nfs_fh *mntfh; @@ -2391,18 +2471,12 @@ static int nfs4_get_sb(struct file_system_type *fs_type, }; int error = -ENOMEM; - data = kzalloc(sizeof(*data), GFP_KERNEL); mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL); if (data == NULL || mntfh == NULL) goto out_free_fh; security_init_mnt_opts(&data->lsm_opts); - /* Validate the mount data */ - error = nfs4_validate_mount_data(raw_data, data, dev_name); - if (error < 0) - goto out; - /* Get a volume representation */ server = nfs4_create_server(data, mntfh); if (IS_ERR(server)) { @@ -2415,7 +2489,7 @@ static int nfs4_get_sb(struct file_system_type *fs_type, compare_super = NULL; /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata); + s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); if (IS_ERR(s)) { error = PTR_ERR(s); goto out_free; @@ -2452,14 +2526,9 @@ static int nfs4_get_sb(struct file_system_type *fs_type, error = 0; out: - kfree(data->client_address); - kfree(data->nfs_server.export_path); - kfree(data->nfs_server.hostname); - kfree(data->fscache_uniq); security_free_mnt_opts(&data->lsm_opts); out_free_fh: kfree(mntfh); - kfree(data); return error; out_free: @@ -2473,16 +2542,137 @@ error_splat_super: goto out; } +static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type, + int flags, void *data, const char *hostname) +{ + struct vfsmount *root_mnt; + char *root_devname; + size_t len; + + len = strlen(hostname) + 3; + root_devname = kmalloc(len, GFP_KERNEL); + if (root_devname == NULL) + return ERR_PTR(-ENOMEM); + snprintf(root_devname, len, "%s:/", hostname); + root_mnt = vfs_kern_mount(fs_type, flags, root_devname, data); + kfree(root_devname); + return root_mnt; +} + +static void nfs_fix_devname(const struct path *path, struct vfsmount *mnt) +{ + char *page = (char *) __get_free_page(GFP_KERNEL); + char *devname, *tmp; + + if (page == NULL) + return; + devname = nfs_path(path->mnt->mnt_devname, + path->mnt->mnt_root, path->dentry, + page, PAGE_SIZE); + if (devname == NULL) + goto out_freepage; + tmp = kstrdup(devname, GFP_KERNEL); + if (tmp == NULL) + goto out_freepage; + kfree(mnt->mnt_devname); + mnt->mnt_devname = tmp; +out_freepage: + free_page((unsigned long)page); +} + +static int nfs_follow_remote_path(struct vfsmount *root_mnt, + const char *export_path, struct vfsmount *mnt_target) +{ + struct mnt_namespace *ns_private; + struct nameidata nd; + struct super_block *s; + int ret; + + ns_private = create_mnt_ns(root_mnt); + ret = PTR_ERR(ns_private); + if (IS_ERR(ns_private)) + goto out_mntput; + + ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt, + export_path, LOOKUP_FOLLOW, &nd); + + put_mnt_ns(ns_private); + + if (ret != 0) + goto out_err; + + s = nd.path.mnt->mnt_sb; + atomic_inc(&s->s_active); + mnt_target->mnt_sb = s; + mnt_target->mnt_root = dget(nd.path.dentry); + + /* Correct the device pathname */ + nfs_fix_devname(&nd.path, mnt_target); + + path_put(&nd.path); + down_write(&s->s_umount); + return 0; +out_mntput: + mntput(root_mnt); +out_err: + return ret; +} + +/* + * Get the superblock for an NFS4 mountpoint + */ +static int nfs4_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) +{ + struct nfs_parsed_mount_data *data; + char *export_path; + struct vfsmount *root_mnt; + int error = -ENOMEM; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (data == NULL) + goto out_free_data; + + /* Validate the mount data */ + error = nfs4_validate_mount_data(raw_data, data, dev_name); + if (error < 0) + goto out; + + export_path = data->nfs_server.export_path; + data->nfs_server.export_path = "/"; + root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, data, + data->nfs_server.hostname); + data->nfs_server.export_path = export_path; + + error = PTR_ERR(root_mnt); + if (IS_ERR(root_mnt)) + goto out; + + error = nfs_follow_remote_path(root_mnt, export_path, mnt); + +out: + kfree(data->client_address); + kfree(data->nfs_server.export_path); + kfree(data->nfs_server.hostname); + kfree(data->fscache_uniq); +out_free_data: + kfree(data); + dprintk("<-- nfs4_get_sb() = %d%s\n", error, + error != 0 ? " [error]" : ""); + return error; +} + static void nfs4_kill_super(struct super_block *sb) { struct nfs_server *server = NFS_SB(sb); + dprintk("--> %s\n", __func__); nfs_super_return_all_delegations(sb); kill_anon_super(sb); - nfs4_renewd_prepare_shutdown(server); nfs_fscache_release_super_cookie(sb); nfs_free_server(server); + dprintk("<-- %s\n", __func__); } /* @@ -2568,12 +2758,9 @@ error_splat_super: return error; } -/* - * Create an NFS4 server record on referral traversal - */ -static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *raw_data, - struct vfsmount *mnt) +static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, + struct vfsmount *mnt) { struct nfs_clone_mount *data = raw_data; struct super_block *s; @@ -2652,4 +2839,36 @@ error_splat_super: return error; } +/* + * Create an NFS4 server record on referral traversal + */ +static int nfs4_referral_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, + struct vfsmount *mnt) +{ + struct nfs_clone_mount *data = raw_data; + char *export_path; + struct vfsmount *root_mnt; + int error; + + dprintk("--> nfs4_referral_get_sb()\n"); + + export_path = data->mnt_path; + data->mnt_path = "/"; + + root_mnt = nfs_do_root_mount(&nfs4_remote_referral_fs_type, + flags, data, data->hostname); + data->mnt_path = export_path; + + error = PTR_ERR(root_mnt); + if (IS_ERR(root_mnt)) + goto out; + + error = nfs_follow_remote_path(root_mnt, export_path, mnt); +out: + dprintk("<-- nfs4_referral_get_sb() = %d%s\n", error, + error != 0 ? " [error]" : ""); + return error; +} + #endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index ecc2953..1064c91 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -15,6 +15,7 @@ #include <linux/wait.h> #include "internal.h" +#include "nfs4_fs.h" struct nfs_unlinkdata { struct hlist_node list; @@ -82,7 +83,7 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata) struct inode *dir = data->dir; if (!NFS_PROTO(dir)->unlink_done(task, dir)) - rpc_restart_call(task); + nfs4_restart_rpc(task, NFS_SERVER(dir)->nfs_client); } /** @@ -102,9 +103,25 @@ static void nfs_async_unlink_release(void *calldata) nfs_sb_deactive(sb); } +#if defined(CONFIG_NFS_V4_1) +void nfs_unlink_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs_unlinkdata *data = calldata; + struct nfs_server *server = NFS_SERVER(data->dir); + + if (nfs4_setup_sequence(server->nfs_client, &data->args.seq_args, + &data->res.seq_res, 1, task)) + return; + rpc_call_start(task); +} +#endif /* CONFIG_NFS_V4_1 */ + static const struct rpc_call_ops nfs_unlink_ops = { .rpc_call_done = nfs_async_unlink_done, .rpc_release = nfs_async_unlink_release, +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs_unlink_prepare, +#endif /* CONFIG_NFS_V4_1 */ }; static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct nfs_unlinkdata *data) @@ -241,6 +258,7 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry) status = PTR_ERR(data->cred); goto out_free; } + data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; status = -EBUSY; spin_lock(&dentry->d_lock); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e560a78..ce72882 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -25,6 +25,7 @@ #include "delegation.h" #include "internal.h" #include "iostat.h" +#include "nfs4_fs.h" #define NFSDBG_FACILITY NFSDBG_PAGECACHE @@ -52,6 +53,7 @@ struct nfs_write_data *nfs_commitdata_alloc(void) if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); + p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; } return p; } @@ -71,6 +73,7 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); p->npages = pagecount; + p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; if (pagecount <= ARRAY_SIZE(p->page_array)) p->pagevec = p->page_array; else { @@ -1048,7 +1051,23 @@ out: nfs_writedata_release(calldata); } +#if defined(CONFIG_NFS_V4_1) +void nfs_write_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs_write_data *data = calldata; + struct nfs_client *clp = (NFS_SERVER(data->inode))->nfs_client; + + if (nfs4_setup_sequence(clp, &data->args.seq_args, + &data->res.seq_res, 1, task)) + return; + rpc_call_start(task); +} +#endif /* CONFIG_NFS_V4_1 */ + static const struct rpc_call_ops nfs_write_partial_ops = { +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs_write_prepare, +#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_writeback_done_partial, .rpc_release = nfs_writeback_release_partial, }; @@ -1111,6 +1130,9 @@ remove_request: } static const struct rpc_call_ops nfs_write_full_ops = { +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs_write_prepare, +#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_writeback_done_full, .rpc_release = nfs_writeback_release_full, }; @@ -1123,6 +1145,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) { struct nfs_writeargs *argp = &data->args; struct nfs_writeres *resp = &data->res; + struct nfs_server *server = NFS_SERVER(data->inode); int status; dprintk("NFS: %5u nfs_writeback_done (status %d)\n", @@ -1155,7 +1178,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) if (time_before(complain, jiffies)) { dprintk("NFS: faulty NFS server %s:" " (committed = %d) != (stable = %d)\n", - NFS_SERVER(data->inode)->nfs_client->cl_hostname, + server->nfs_client->cl_hostname, resp->verf->committed, argp->stable); complain = jiffies + 300 * HZ; } @@ -1181,7 +1204,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) */ argp->stable = NFS_FILE_SYNC; } - rpc_restart_call(task); + nfs4_restart_rpc(task, server->nfs_client); return -EAGAIN; } if (time_before(complain, jiffies)) { @@ -1193,6 +1216,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) /* Can't do anything about it except throw an error. */ task->tk_status = -EIO; } + nfs4_sequence_free_slot(server->nfs_client, &data->res.seq_res); return 0; } @@ -1349,6 +1373,9 @@ static void nfs_commit_release(void *calldata) } static const struct rpc_call_ops nfs_commit_ops = { +#if defined(CONFIG_NFS_V4_1) + .rpc_call_prepare = nfs_write_prepare, +#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_commit_done, .rpc_release = nfs_commit_release, }; diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 8b1f8ef..b92a276 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -464,16 +464,11 @@ static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp) if (err) return err; /* - * Just a quick sanity check; we could also try to check - * whether this pseudoflavor is supported, but at worst - * an unsupported pseudoflavor on the export would just - * be a pseudoflavor that won't match the flavor of any - * authenticated request. The administrator will - * probably discover the problem when someone fails to - * authenticate. + * XXX: It would be nice to also check whether this + * pseudoflavor is supported, so we can discover the + * problem at export time instead of when a client fails + * to authenticate. */ - if (f->pseudoflavor < 0) - return -EINVAL; err = get_int(mesg, &f->flags); if (err) return err; diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 7c9fe83..a713c41 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -652,8 +652,6 @@ nfsd3_proc_commit(struct svc_rqst * rqstp, struct nfsd3_commitargs *argp, * NFSv3 Server procedures. * Only the results of non-idempotent operations are cached. */ -#define nfs3svc_decode_voidargs NULL -#define nfs3svc_release_void NULL #define nfs3svc_decode_fhandleargs nfs3svc_decode_fhandle #define nfs3svc_encode_attrstatres nfs3svc_encode_attrstat #define nfs3svc_encode_wccstatres nfs3svc_encode_wccstat @@ -686,28 +684,219 @@ struct nfsd3_voidargs { int dummy; }; #define WC (7+pAT) /* WCC attributes */ static struct svc_procedure nfsd_procedures3[22] = { - PROC(null, void, void, void, RC_NOCACHE, ST), - PROC(getattr, fhandle, attrstat, fhandle, RC_NOCACHE, ST+AT), - PROC(setattr, sattr, wccstat, fhandle, RC_REPLBUFF, ST+WC), - PROC(lookup, dirop, dirop, fhandle2, RC_NOCACHE, ST+FH+pAT+pAT), - PROC(access, access, access, fhandle, RC_NOCACHE, ST+pAT+1), - PROC(readlink, readlink, readlink, fhandle, RC_NOCACHE, ST+pAT+1+NFS3_MAXPATHLEN/4), - PROC(read, read, read, fhandle, RC_NOCACHE, ST+pAT+4+NFSSVC_MAXBLKSIZE/4), - PROC(write, write, write, fhandle, RC_REPLBUFF, ST+WC+4), - PROC(create, create, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC), - PROC(mkdir, mkdir, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC), - PROC(symlink, symlink, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC), - PROC(mknod, mknod, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC), - PROC(remove, dirop, wccstat, fhandle, RC_REPLBUFF, ST+WC), - PROC(rmdir, dirop, wccstat, fhandle, RC_REPLBUFF, ST+WC), - PROC(rename, rename, rename, fhandle2, RC_REPLBUFF, ST+WC+WC), - PROC(link, link, link, fhandle2, RC_REPLBUFF, ST+pAT+WC), - PROC(readdir, readdir, readdir, fhandle, RC_NOCACHE, 0), - PROC(readdirplus,readdirplus, readdir, fhandle, RC_NOCACHE, 0), - PROC(fsstat, fhandle, fsstat, void, RC_NOCACHE, ST+pAT+2*6+1), - PROC(fsinfo, fhandle, fsinfo, void, RC_NOCACHE, ST+pAT+12), - PROC(pathconf, fhandle, pathconf, void, RC_NOCACHE, ST+pAT+6), - PROC(commit, commit, commit, fhandle, RC_NOCACHE, ST+WC+2), + [NFS3PROC_NULL] = { + .pc_func = (svc_procfunc) nfsd3_proc_null, + .pc_encode = (kxdrproc_t) nfs3svc_encode_voidres, + .pc_argsize = sizeof(struct nfsd3_voidargs), + .pc_ressize = sizeof(struct nfsd3_voidres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST, + }, + [NFS3PROC_GETATTR] = { + .pc_func = (svc_procfunc) nfsd3_proc_getattr, + .pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_attrstatres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_fhandleargs), + .pc_ressize = sizeof(struct nfsd3_attrstatres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+AT, + }, + [NFS3PROC_SETATTR] = { + .pc_func = (svc_procfunc) nfsd3_proc_setattr, + .pc_decode = (kxdrproc_t) nfs3svc_decode_sattrargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_sattrargs), + .pc_ressize = sizeof(struct nfsd3_wccstatres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+WC, + }, + [NFS3PROC_LOOKUP] = { + .pc_func = (svc_procfunc) nfsd3_proc_lookup, + .pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_diropres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2, + .pc_argsize = sizeof(struct nfsd3_diropargs), + .pc_ressize = sizeof(struct nfsd3_diropres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+FH+pAT+pAT, + }, + [NFS3PROC_ACCESS] = { + .pc_func = (svc_procfunc) nfsd3_proc_access, + .pc_decode = (kxdrproc_t) nfs3svc_decode_accessargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_accessres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_accessargs), + .pc_ressize = sizeof(struct nfsd3_accessres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+pAT+1, + }, + [NFS3PROC_READLINK] = { + .pc_func = (svc_procfunc) nfsd3_proc_readlink, + .pc_decode = (kxdrproc_t) nfs3svc_decode_readlinkargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_readlinkres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_readlinkargs), + .pc_ressize = sizeof(struct nfsd3_readlinkres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+pAT+1+NFS3_MAXPATHLEN/4, + }, + [NFS3PROC_READ] = { + .pc_func = (svc_procfunc) nfsd3_proc_read, + .pc_decode = (kxdrproc_t) nfs3svc_decode_readargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_readres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_readargs), + .pc_ressize = sizeof(struct nfsd3_readres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+pAT+4+NFSSVC_MAXBLKSIZE/4, + }, + [NFS3PROC_WRITE] = { + .pc_func = (svc_procfunc) nfsd3_proc_write, + .pc_decode = (kxdrproc_t) nfs3svc_decode_writeargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_writeres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_writeargs), + .pc_ressize = sizeof(struct nfsd3_writeres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+WC+4, + }, + [NFS3PROC_CREATE] = { + .pc_func = (svc_procfunc) nfsd3_proc_create, + .pc_decode = (kxdrproc_t) nfs3svc_decode_createargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_createres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2, + .pc_argsize = sizeof(struct nfsd3_createargs), + .pc_ressize = sizeof(struct nfsd3_createres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+(1+FH+pAT)+WC, + }, + [NFS3PROC_MKDIR] = { + .pc_func = (svc_procfunc) nfsd3_proc_mkdir, + .pc_decode = (kxdrproc_t) nfs3svc_decode_mkdirargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_createres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2, + .pc_argsize = sizeof(struct nfsd3_mkdirargs), + .pc_ressize = sizeof(struct nfsd3_createres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+(1+FH+pAT)+WC, + }, + [NFS3PROC_SYMLINK] = { + .pc_func = (svc_procfunc) nfsd3_proc_symlink, + .pc_decode = (kxdrproc_t) nfs3svc_decode_symlinkargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_createres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2, + .pc_argsize = sizeof(struct nfsd3_symlinkargs), + .pc_ressize = sizeof(struct nfsd3_createres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+(1+FH+pAT)+WC, + }, + [NFS3PROC_MKNOD] = { + .pc_func = (svc_procfunc) nfsd3_proc_mknod, + .pc_decode = (kxdrproc_t) nfs3svc_decode_mknodargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_createres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2, + .pc_argsize = sizeof(struct nfsd3_mknodargs), + .pc_ressize = sizeof(struct nfsd3_createres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+(1+FH+pAT)+WC, + }, + [NFS3PROC_REMOVE] = { + .pc_func = (svc_procfunc) nfsd3_proc_remove, + .pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_diropargs), + .pc_ressize = sizeof(struct nfsd3_wccstatres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+WC, + }, + [NFS3PROC_RMDIR] = { + .pc_func = (svc_procfunc) nfsd3_proc_rmdir, + .pc_decode = (kxdrproc_t) nfs3svc_decode_diropargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_wccstatres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_diropargs), + .pc_ressize = sizeof(struct nfsd3_wccstatres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+WC, + }, + [NFS3PROC_RENAME] = { + .pc_func = (svc_procfunc) nfsd3_proc_rename, + .pc_decode = (kxdrproc_t) nfs3svc_decode_renameargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_renameres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2, + .pc_argsize = sizeof(struct nfsd3_renameargs), + .pc_ressize = sizeof(struct nfsd3_renameres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+WC+WC, + }, + [NFS3PROC_LINK] = { + .pc_func = (svc_procfunc) nfsd3_proc_link, + .pc_decode = (kxdrproc_t) nfs3svc_decode_linkargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_linkres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle2, + .pc_argsize = sizeof(struct nfsd3_linkargs), + .pc_ressize = sizeof(struct nfsd3_linkres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+pAT+WC, + }, + [NFS3PROC_READDIR] = { + .pc_func = (svc_procfunc) nfsd3_proc_readdir, + .pc_decode = (kxdrproc_t) nfs3svc_decode_readdirargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_readdirargs), + .pc_ressize = sizeof(struct nfsd3_readdirres), + .pc_cachetype = RC_NOCACHE, + }, + [NFS3PROC_READDIRPLUS] = { + .pc_func = (svc_procfunc) nfsd3_proc_readdirplus, + .pc_decode = (kxdrproc_t) nfs3svc_decode_readdirplusargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_readdirres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_readdirplusargs), + .pc_ressize = sizeof(struct nfsd3_readdirres), + .pc_cachetype = RC_NOCACHE, + }, + [NFS3PROC_FSSTAT] = { + .pc_func = (svc_procfunc) nfsd3_proc_fsstat, + .pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_fsstatres, + .pc_argsize = sizeof(struct nfsd3_fhandleargs), + .pc_ressize = sizeof(struct nfsd3_fsstatres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+pAT+2*6+1, + }, + [NFS3PROC_FSINFO] = { + .pc_func = (svc_procfunc) nfsd3_proc_fsinfo, + .pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_fsinfores, + .pc_argsize = sizeof(struct nfsd3_fhandleargs), + .pc_ressize = sizeof(struct nfsd3_fsinfores), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+pAT+12, + }, + [NFS3PROC_PATHCONF] = { + .pc_func = (svc_procfunc) nfsd3_proc_pathconf, + .pc_decode = (kxdrproc_t) nfs3svc_decode_fhandleargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_pathconfres, + .pc_argsize = sizeof(struct nfsd3_fhandleargs), + .pc_ressize = sizeof(struct nfsd3_pathconfres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+pAT+6, + }, + [NFS3PROC_COMMIT] = { + .pc_func = (svc_procfunc) nfsd3_proc_commit, + .pc_decode = (kxdrproc_t) nfs3svc_decode_commitargs, + .pc_encode = (kxdrproc_t) nfs3svc_encode_commitres, + .pc_release = (kxdrproc_t) nfs3svc_release_fhandle, + .pc_argsize = sizeof(struct nfsd3_commitargs), + .pc_ressize = sizeof(struct nfsd3_commitres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+WC+2, + }, }; struct svc_version nfsd_version3 = { diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 17d0dd9..01d4ec1 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -272,6 +272,7 @@ void fill_post_wcc(struct svc_fh *fhp) err = vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry, &fhp->fh_post_attr); + fhp->fh_post_change = fhp->fh_dentry->d_inode->i_version; if (err) fhp->fh_post_saved = 0; else diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 290289b..3fd23f7 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -140,8 +140,10 @@ struct nfs4_cb_compound_hdr { int status; u32 ident; u32 nops; + __be32 *nops_p; + u32 minorversion; u32 taglen; - char * tag; + char *tag; }; static struct { @@ -201,33 +203,39 @@ nfs_cb_stat_to_errno(int stat) * XDR encode */ -static int +static void encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr) { __be32 * p; RESERVE_SPACE(16); WRITE32(0); /* tag length is always 0 */ - WRITE32(NFS4_MINOR_VERSION); + WRITE32(hdr->minorversion); WRITE32(hdr->ident); + hdr->nops_p = p; WRITE32(hdr->nops); - return 0; } -static int -encode_cb_recall(struct xdr_stream *xdr, struct nfs4_cb_recall *cb_rec) +static void encode_cb_nops(struct nfs4_cb_compound_hdr *hdr) +{ + *hdr->nops_p = htonl(hdr->nops); +} + +static void +encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp, + struct nfs4_cb_compound_hdr *hdr) { __be32 *p; - int len = cb_rec->cbr_fh.fh_size; + int len = dp->dl_fh.fh_size; - RESERVE_SPACE(12+sizeof(cb_rec->cbr_stateid) + len); + RESERVE_SPACE(12+sizeof(dp->dl_stateid) + len); WRITE32(OP_CB_RECALL); - WRITE32(cb_rec->cbr_stateid.si_generation); - WRITEMEM(&cb_rec->cbr_stateid.si_opaque, sizeof(stateid_opaque_t)); - WRITE32(cb_rec->cbr_trunc); + WRITE32(dp->dl_stateid.si_generation); + WRITEMEM(&dp->dl_stateid.si_opaque, sizeof(stateid_opaque_t)); + WRITE32(0); /* truncate optimization not implemented */ WRITE32(len); - WRITEMEM(&cb_rec->cbr_fh.fh_base, len); - return 0; + WRITEMEM(&dp->dl_fh.fh_base, len); + hdr->nops++; } static int @@ -241,17 +249,18 @@ nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p) } static int -nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, struct nfs4_cb_recall *args) +nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, struct nfs4_delegation *args) { struct xdr_stream xdr; struct nfs4_cb_compound_hdr hdr = { - .ident = args->cbr_ident, - .nops = 1, + .ident = args->dl_ident, }; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_cb_compound_hdr(&xdr, &hdr); - return (encode_cb_recall(&xdr, args)); + encode_cb_recall(&xdr, args, &hdr); + encode_cb_nops(&hdr); + return 0; } @@ -358,18 +367,21 @@ static struct rpc_program cb_program = { .pipe_dir_name = "/nfsd4_cb", }; +static int max_cb_time(void) +{ + return max(NFSD_LEASE_TIME/10, (time_t)1) * HZ; +} + /* Reference counting, callback cleanup, etc., all look racy as heck. * And why is cb_set an atomic? */ -static struct rpc_clnt *setup_callback_client(struct nfs4_client *clp) +int setup_callback_client(struct nfs4_client *clp) { struct sockaddr_in addr; - struct nfs4_callback *cb = &clp->cl_callback; + struct nfs4_cb_conn *cb = &clp->cl_cb_conn; struct rpc_timeout timeparms = { - .to_initval = (NFSD_LEASE_TIME/4) * HZ, - .to_retries = 5, - .to_maxval = (NFSD_LEASE_TIME/2) * HZ, - .to_exponential = 1, + .to_initval = max_cb_time(), + .to_retries = 0, }; struct rpc_create_args args = { .protocol = IPPROTO_TCP, @@ -386,7 +398,7 @@ static struct rpc_clnt *setup_callback_client(struct nfs4_client *clp) struct rpc_clnt *client; if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) - return ERR_PTR(-EINVAL); + return -EINVAL; /* Initialize address */ memset(&addr, 0, sizeof(addr)); @@ -396,48 +408,77 @@ static struct rpc_clnt *setup_callback_client(struct nfs4_client *clp) /* Create RPC client */ client = rpc_create(&args); - if (IS_ERR(client)) + if (IS_ERR(client)) { dprintk("NFSD: couldn't create callback client: %ld\n", PTR_ERR(client)); - return client; + return PTR_ERR(client); + } + cb->cb_client = client; + return 0; + +} + +static void warn_no_callback_path(struct nfs4_client *clp, int reason) +{ + dprintk("NFSD: warning: no callback path to client %.*s: error %d\n", + (int)clp->cl_name.len, clp->cl_name.data, reason); +} + +static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata) +{ + struct nfs4_client *clp = calldata; + + if (task->tk_status) + warn_no_callback_path(clp, task->tk_status); + else + atomic_set(&clp->cl_cb_conn.cb_set, 1); + put_nfs4_client(clp); +} + +static const struct rpc_call_ops nfsd4_cb_probe_ops = { + .rpc_call_done = nfsd4_cb_probe_done, +}; +static struct rpc_cred *lookup_cb_cred(struct nfs4_cb_conn *cb) +{ + struct auth_cred acred = { + .machine_cred = 1 + }; + + /* + * Note in the gss case this doesn't actually have to wait for a + * gss upcall (or any calls to the client); this just creates a + * non-uptodate cred which the rpc state machine will fill in with + * a refresh_upcall later. + */ + return rpcauth_lookup_credcache(cb->cb_client->cl_auth, &acred, + RPCAUTH_LOOKUP_NEW); } -static int do_probe_callback(void *data) +void do_probe_callback(struct nfs4_client *clp) { - struct nfs4_client *clp = data; - struct nfs4_callback *cb = &clp->cl_callback; + struct nfs4_cb_conn *cb = &clp->cl_cb_conn; struct rpc_message msg = { .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], .rpc_argp = clp, }; - struct rpc_clnt *client; + struct rpc_cred *cred; int status; - client = setup_callback_client(clp); - if (IS_ERR(client)) { - status = PTR_ERR(client); - dprintk("NFSD: couldn't create callback client: %d\n", - status); - goto out_err; + cred = lookup_cb_cred(cb); + if (IS_ERR(cred)) { + status = PTR_ERR(cred); + goto out; + } + cb->cb_cred = cred; + msg.rpc_cred = cb->cb_cred; + status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_SOFT, + &nfsd4_cb_probe_ops, (void *)clp); +out: + if (status) { + warn_no_callback_path(clp, status); + put_nfs4_client(clp); } - - status = rpc_call_sync(client, &msg, RPC_TASK_SOFT); - - if (status) - goto out_release_client; - - cb->cb_client = client; - atomic_set(&cb->cb_set, 1); - put_nfs4_client(clp); - return 0; -out_release_client: - rpc_shutdown_client(client); -out_err: - dprintk("NFSD: warning: no callback path to client %.*s: error %d\n", - (int)clp->cl_name.len, clp->cl_name.data, status); - put_nfs4_client(clp); - return 0; } /* @@ -446,21 +487,65 @@ out_err: void nfsd4_probe_callback(struct nfs4_client *clp) { - struct task_struct *t; + int status; - BUG_ON(atomic_read(&clp->cl_callback.cb_set)); + BUG_ON(atomic_read(&clp->cl_cb_conn.cb_set)); + + status = setup_callback_client(clp); + if (status) { + warn_no_callback_path(clp, status); + return; + } /* the task holds a reference to the nfs4_client struct */ atomic_inc(&clp->cl_count); - t = kthread_run(do_probe_callback, clp, "nfs4_cb_probe"); + do_probe_callback(clp); +} - if (IS_ERR(t)) - atomic_dec(&clp->cl_count); +static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) +{ + struct nfs4_delegation *dp = calldata; + struct nfs4_client *clp = dp->dl_client; - return; + switch (task->tk_status) { + case -EIO: + /* Network partition? */ + atomic_set(&clp->cl_cb_conn.cb_set, 0); + warn_no_callback_path(clp, task->tk_status); + case -EBADHANDLE: + case -NFS4ERR_BAD_STATEID: + /* Race: client probably got cb_recall + * before open reply granting delegation */ + break; + default: + /* success, or error we can't handle */ + return; + } + if (dp->dl_retries--) { + rpc_delay(task, 2*HZ); + task->tk_status = 0; + rpc_restart_call(task); + } else { + atomic_set(&clp->cl_cb_conn.cb_set, 0); + warn_no_callback_path(clp, task->tk_status); + } +} + +static void nfsd4_cb_recall_release(void *calldata) +{ + struct nfs4_delegation *dp = calldata; + struct nfs4_client *clp = dp->dl_client; + + nfs4_put_delegation(dp); + put_nfs4_client(clp); } +static const struct rpc_call_ops nfsd4_cb_recall_ops = { + .rpc_call_done = nfsd4_cb_recall_done, + .rpc_release = nfsd4_cb_recall_release, +}; + /* * called with dp->dl_count inc'ed. */ @@ -468,41 +553,19 @@ void nfsd4_cb_recall(struct nfs4_delegation *dp) { struct nfs4_client *clp = dp->dl_client; - struct rpc_clnt *clnt = clp->cl_callback.cb_client; - struct nfs4_cb_recall *cbr = &dp->dl_recall; + struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client; struct rpc_message msg = { .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL], - .rpc_argp = cbr, + .rpc_argp = dp, + .rpc_cred = clp->cl_cb_conn.cb_cred }; - int retries = 1; - int status = 0; - - cbr->cbr_trunc = 0; /* XXX need to implement truncate optimization */ - cbr->cbr_dp = dp; - - status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT); - while (retries--) { - switch (status) { - case -EIO: - /* Network partition? */ - atomic_set(&clp->cl_callback.cb_set, 0); - case -EBADHANDLE: - case -NFS4ERR_BAD_STATEID: - /* Race: client probably got cb_recall - * before open reply granting delegation */ - break; - default: - goto out_put_cred; - } - ssleep(2); - status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT); + int status; + + dp->dl_retries = 1; + status = rpc_call_async(clnt, &msg, RPC_TASK_SOFT, + &nfsd4_cb_recall_ops, dp); + if (status) { + put_nfs4_client(clp); + nfs4_put_delegation(dp); } -out_put_cred: - /* - * Success or failure, now we're either waiting for lease expiration - * or deleg_return. - */ - put_nfs4_client(clp); - nfs4_put_delegation(dp); - return; } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index b2883e9..7c88017 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -51,6 +51,78 @@ #define NFSDDBG_FACILITY NFSDDBG_PROC +static u32 nfsd_attrmask[] = { + NFSD_WRITEABLE_ATTRS_WORD0, + NFSD_WRITEABLE_ATTRS_WORD1, + NFSD_WRITEABLE_ATTRS_WORD2 +}; + +static u32 nfsd41_ex_attrmask[] = { + NFSD_SUPPATTR_EXCLCREAT_WORD0, + NFSD_SUPPATTR_EXCLCREAT_WORD1, + NFSD_SUPPATTR_EXCLCREAT_WORD2 +}; + +static __be32 +check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + u32 *bmval, u32 *writable) +{ + struct dentry *dentry = cstate->current_fh.fh_dentry; + struct svc_export *exp = cstate->current_fh.fh_export; + + /* + * Check about attributes are supported by the NFSv4 server or not. + * According to spec, unsupported attributes return ERR_ATTRNOTSUPP. + */ + if ((bmval[0] & ~nfsd_suppattrs0(cstate->minorversion)) || + (bmval[1] & ~nfsd_suppattrs1(cstate->minorversion)) || + (bmval[2] & ~nfsd_suppattrs2(cstate->minorversion))) + return nfserr_attrnotsupp; + + /* + * Check FATTR4_WORD0_ACL & FATTR4_WORD0_FS_LOCATIONS can be supported + * in current environment or not. + */ + if (bmval[0] & FATTR4_WORD0_ACL) { + if (!IS_POSIXACL(dentry->d_inode)) + return nfserr_attrnotsupp; + } + if (bmval[0] & FATTR4_WORD0_FS_LOCATIONS) { + if (exp->ex_fslocs.locations == NULL) + return nfserr_attrnotsupp; + } + + /* + * According to spec, read-only attributes return ERR_INVAL. + */ + if (writable) { + if ((bmval[0] & ~writable[0]) || (bmval[1] & ~writable[1]) || + (bmval[2] & ~writable[2])) + return nfserr_inval; + } + + return nfs_ok; +} + +static __be32 +nfsd4_check_open_attributes(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, struct nfsd4_open *open) +{ + __be32 status = nfs_ok; + + if (open->op_create == NFS4_OPEN_CREATE) { + if (open->op_createmode == NFS4_CREATE_UNCHECKED + || open->op_createmode == NFS4_CREATE_GUARDED) + status = check_attr_support(rqstp, cstate, + open->op_bmval, nfsd_attrmask); + else if (open->op_createmode == NFS4_CREATE_EXCLUSIVE4_1) + status = check_attr_support(rqstp, cstate, + open->op_bmval, nfsd41_ex_attrmask); + } + + return status; +} + static inline void fh_dup2(struct svc_fh *dst, struct svc_fh *src) { @@ -225,6 +297,10 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) goto out; + status = nfsd4_check_open_attributes(rqstp, cstate, open); + if (status) + goto out; + /* Openowner is now set, so sequence id will get bumped. Now we need * these checks before we do any creates: */ status = nfserr_grace; @@ -395,6 +471,11 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) return status; + status = check_attr_support(rqstp, cstate, create->cr_bmval, + nfsd_attrmask); + if (status) + return status; + switch (create->cr_type) { case NF4LNK: /* ugh! we have to null-terminate the linktext, or @@ -689,6 +770,12 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) return status; status = nfs_ok; + + status = check_attr_support(rqstp, cstate, setattr->sa_bmval, + nfsd_attrmask); + if (status) + goto out; + if (setattr->sa_acl != NULL) status = nfsd4_set_nfs4_acl(rqstp, &cstate->current_fh, setattr->sa_acl); @@ -763,10 +850,10 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) return status; - if ((verify->ve_bmval[0] & ~nfsd_suppattrs0(cstate->minorversion)) - || (verify->ve_bmval[1] & ~nfsd_suppattrs1(cstate->minorversion)) - || (verify->ve_bmval[2] & ~nfsd_suppattrs2(cstate->minorversion))) - return nfserr_attrnotsupp; + status = check_attr_support(rqstp, cstate, verify->ve_bmval, NULL); + if (status) + return status; + if ((verify->ve_bmval[0] & FATTR4_WORD0_RDATTR_ERROR) || (verify->ve_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1)) return nfserr_inval; @@ -1226,24 +1313,9 @@ static const char *nfsd4_op_name(unsigned opnum) return "unknown_operation"; } -#define nfs4svc_decode_voidargs NULL -#define nfs4svc_release_void NULL #define nfsd4_voidres nfsd4_voidargs -#define nfs4svc_release_compound NULL struct nfsd4_voidargs { int dummy; }; -#define PROC(name, argt, rest, relt, cache, respsize) \ - { (svc_procfunc) nfsd4_proc_##name, \ - (kxdrproc_t) nfs4svc_decode_##argt##args, \ - (kxdrproc_t) nfs4svc_encode_##rest##res, \ - (kxdrproc_t) nfs4svc_release_##relt, \ - sizeof(struct nfsd4_##argt##args), \ - sizeof(struct nfsd4_##rest##res), \ - 0, \ - cache, \ - respsize, \ - } - /* * TODO: At the present time, the NFSv4 server does not do XID caching * of requests. Implementing XID caching would not be a serious problem, @@ -1255,8 +1327,23 @@ struct nfsd4_voidargs { int dummy; }; * better XID's. */ static struct svc_procedure nfsd_procedures4[2] = { - PROC(null, void, void, void, RC_NOCACHE, 1), - PROC(compound, compound, compound, compound, RC_NOCACHE, NFSD_BUFSIZE/4) + [NFSPROC4_NULL] = { + .pc_func = (svc_procfunc) nfsd4_proc_null, + .pc_encode = (kxdrproc_t) nfs4svc_encode_voidres, + .pc_argsize = sizeof(struct nfsd4_voidargs), + .pc_ressize = sizeof(struct nfsd4_voidres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = 1, + }, + [NFSPROC4_COMPOUND] = { + .pc_func = (svc_procfunc) nfsd4_proc_compound, + .pc_decode = (kxdrproc_t) nfs4svc_decode_compoundargs, + .pc_encode = (kxdrproc_t) nfs4svc_encode_compoundres, + .pc_argsize = sizeof(struct nfsd4_compoundargs), + .pc_ressize = sizeof(struct nfsd4_compoundres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = NFSD_BUFSIZE/4, + }, }; struct svc_version nfsd_version4 = { diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3b711f5..980a216 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -182,7 +182,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f { struct nfs4_delegation *dp; struct nfs4_file *fp = stp->st_file; - struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback; + struct nfs4_cb_conn *cb = &stp->st_stateowner->so_client->cl_cb_conn; dprintk("NFSD alloc_init_deleg\n"); if (fp->fi_had_conflict) @@ -203,10 +203,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f get_file(stp->st_vfs_file); dp->dl_vfs_file = stp->st_vfs_file; dp->dl_type = type; - dp->dl_recall.cbr_dp = NULL; - dp->dl_recall.cbr_ident = cb->cb_ident; - dp->dl_recall.cbr_trunc = 0; - dp->dl_stateid.si_boot = boot_time; + dp->dl_ident = cb->cb_ident; + dp->dl_stateid.si_boot = get_seconds(); dp->dl_stateid.si_stateownerid = current_delegid++; dp->dl_stateid.si_fileid = 0; dp->dl_stateid.si_generation = 0; @@ -427,6 +425,11 @@ static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan) { int status = 0, np = fchan->maxreqs * NFSD_PAGES_PER_SLOT; + if (fchan->maxreqs < 1) + return nfserr_inval; + else if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION) + fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION; + spin_lock(&nfsd_serv->sv_lock); if (np + nfsd_serv->sv_drc_pages_used > nfsd_serv->sv_drc_max_pages) np = nfsd_serv->sv_drc_max_pages - nfsd_serv->sv_drc_pages_used; @@ -446,8 +449,8 @@ static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan) * fchan holds the client values on input, and the server values on output */ static int init_forechannel_attrs(struct svc_rqst *rqstp, - struct nfsd4_session *session, - struct nfsd4_channel_attrs *fchan) + struct nfsd4_channel_attrs *session_fchan, + struct nfsd4_channel_attrs *fchan) { int status = 0; __u32 maxcount = svc_max_payload(rqstp); @@ -457,21 +460,21 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp, /* Use the client's max request and max response size if possible */ if (fchan->maxreq_sz > maxcount) fchan->maxreq_sz = maxcount; - session->se_fmaxreq_sz = fchan->maxreq_sz; + session_fchan->maxreq_sz = fchan->maxreq_sz; if (fchan->maxresp_sz > maxcount) fchan->maxresp_sz = maxcount; - session->se_fmaxresp_sz = fchan->maxresp_sz; + session_fchan->maxresp_sz = fchan->maxresp_sz; /* Set the max response cached size our default which is * a multiple of PAGE_SIZE and small */ - session->se_fmaxresp_cached = NFSD_PAGES_PER_SLOT * PAGE_SIZE; - fchan->maxresp_cached = session->se_fmaxresp_cached; + session_fchan->maxresp_cached = NFSD_PAGES_PER_SLOT * PAGE_SIZE; + fchan->maxresp_cached = session_fchan->maxresp_cached; /* Use the client's maxops if possible */ if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND) fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND; - session->se_fmaxops = fchan->maxops; + session_fchan->maxops = fchan->maxops; /* try to use the client requested number of slots */ if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION) @@ -483,7 +486,7 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp, */ status = set_forechannel_maxreqs(fchan); - session->se_fnumslots = fchan->maxreqs; + session_fchan->maxreqs = fchan->maxreqs; return status; } @@ -497,12 +500,14 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, memset(&tmp, 0, sizeof(tmp)); /* FIXME: For now, we just accept the client back channel attributes. */ - status = init_forechannel_attrs(rqstp, &tmp, &cses->fore_channel); + tmp.se_bchannel = cses->back_channel; + status = init_forechannel_attrs(rqstp, &tmp.se_fchannel, + &cses->fore_channel); if (status) goto out; /* allocate struct nfsd4_session and slot table in one piece */ - slotsize = tmp.se_fnumslots * sizeof(struct nfsd4_slot); + slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot); new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL); if (!new) goto out; @@ -576,7 +581,7 @@ free_session(struct kref *kref) int i; ses = container_of(kref, struct nfsd4_session, se_ref); - for (i = 0; i < ses->se_fnumslots; i++) { + for (i = 0; i < ses->se_fchannel.maxreqs; i++) { struct nfsd4_cache_entry *e = &ses->se_slots[i].sl_cache_entry; nfsd4_release_respages(e->ce_respages, e->ce_resused); } @@ -632,16 +637,20 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) static void shutdown_callback_client(struct nfs4_client *clp) { - struct rpc_clnt *clnt = clp->cl_callback.cb_client; + struct rpc_clnt *clnt = clp->cl_cb_conn.cb_client; if (clnt) { /* * Callback threads take a reference on the client, so there * should be no outstanding callbacks at this point. */ - clp->cl_callback.cb_client = NULL; + clp->cl_cb_conn.cb_client = NULL; rpc_shutdown_client(clnt); } + if (clp->cl_cb_conn.cb_cred) { + put_rpccred(clp->cl_cb_conn.cb_cred); + clp->cl_cb_conn.cb_cred = NULL; + } } static inline void @@ -714,7 +723,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir) return NULL; memcpy(clp->cl_recdir, recdir, HEXDIR_LEN); atomic_set(&clp->cl_count, 1); - atomic_set(&clp->cl_callback.cb_set, 0); + atomic_set(&clp->cl_cb_conn.cb_set, 0); INIT_LIST_HEAD(&clp->cl_idhash); INIT_LIST_HEAD(&clp->cl_strhash); INIT_LIST_HEAD(&clp->cl_openowners); @@ -966,7 +975,7 @@ parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigne static void gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se) { - struct nfs4_callback *cb = &clp->cl_callback; + struct nfs4_cb_conn *cb = &clp->cl_cb_conn; /* Currently, we only support tcp for the callback channel */ if ((se->se_callback_netid_len != 3) || memcmp((char *)se->se_callback_netid_val, "tcp", 3)) @@ -975,6 +984,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se) if ( !(parse_ipv4(se->se_callback_addr_len, se->se_callback_addr_val, &cb->cb_addr, &cb->cb_port))) goto out_err; + cb->cb_minorversion = 0; cb->cb_prog = se->se_callback_prog; cb->cb_ident = se->se_callback_ident; return; @@ -1128,7 +1138,7 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp, * is sent (lease renewal). */ if (seq && nfsd4_not_cached(resp)) { - seq->maxslots = resp->cstate.session->se_fnumslots; + seq->maxslots = resp->cstate.session->se_fchannel.maxreqs; return nfs_ok; } @@ -1238,12 +1248,6 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, expire_client(conf); goto out_new; } - if (ip_addr != conf->cl_addr && - !(exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A)) { - /* Client collision. 18.35.4 case 3 */ - status = nfserr_clid_inuse; - goto out; - } /* * Set bit when the owner id and verifier map to an already * confirmed client id (18.35.3). @@ -1257,12 +1261,12 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, copy_verf(conf, &verf); new = conf; goto out_copy; - } else { - /* 18.35.4 case 7 */ - if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) { - status = nfserr_noent; - goto out; - } + } + + /* 18.35.4 case 7 */ + if (exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A) { + status = nfserr_noent; + goto out; } unconf = find_unconfirmed_client_by_str(dname, strhashval, true); @@ -1471,7 +1475,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, goto out; status = nfserr_badslot; - if (seq->slotid >= session->se_fnumslots) + if (seq->slotid >= session->se_fchannel.maxreqs) goto out; slot = &session->se_slots[seq->slotid]; @@ -1686,9 +1690,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, else { /* XXX: We just turn off callbacks until we can handle * change request correctly. */ - atomic_set(&conf->cl_callback.cb_set, 0); - gen_confirm(conf); - nfsd4_remove_clid_dir(unconf); + atomic_set(&conf->cl_cb_conn.cb_set, 0); expire_client(unconf); status = nfs_ok; @@ -1882,7 +1884,7 @@ init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open * stp->st_stateowner = sop; get_nfs4_file(fp); stp->st_file = fp; - stp->st_stateid.si_boot = boot_time; + stp->st_stateid.si_boot = get_seconds(); stp->st_stateid.si_stateownerid = sop->so_id; stp->st_stateid.si_fileid = fp->fi_id; stp->st_stateid.si_generation = 0; @@ -2059,19 +2061,6 @@ nfs4_file_downgrade(struct file *filp, unsigned int share_access) } /* - * Recall a delegation - */ -static int -do_recall(void *__dp) -{ - struct nfs4_delegation *dp = __dp; - - dp->dl_file->fi_had_conflict = true; - nfsd4_cb_recall(dp); - return 0; -} - -/* * Spawn a thread to perform a recall on the delegation represented * by the lease (file_lock) * @@ -2082,8 +2071,7 @@ do_recall(void *__dp) static void nfsd_break_deleg_cb(struct file_lock *fl) { - struct nfs4_delegation *dp= (struct nfs4_delegation *)fl->fl_owner; - struct task_struct *t; + struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner; dprintk("NFSD nfsd_break_deleg_cb: dp %p fl %p\n",dp,fl); if (!dp) @@ -2111,16 +2099,8 @@ void nfsd_break_deleg_cb(struct file_lock *fl) */ fl->fl_break_time = 0; - t = kthread_run(do_recall, dp, "%s", "nfs4_cb_recall"); - if (IS_ERR(t)) { - struct nfs4_client *clp = dp->dl_client; - - printk(KERN_INFO "NFSD: Callback thread failed for " - "for client (clientid %08x/%08x)\n", - clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); - put_nfs4_client(dp->dl_client); - nfs4_put_delegation(dp); - } + dp->dl_file->fi_had_conflict = true; + nfsd4_cb_recall(dp); } /* @@ -2422,7 +2402,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta { struct nfs4_delegation *dp; struct nfs4_stateowner *sop = stp->st_stateowner; - struct nfs4_callback *cb = &sop->so_client->cl_callback; + struct nfs4_cb_conn *cb = &sop->so_client->cl_cb_conn; struct file_lock fl, *flp = &fl; int status, flag = 0; @@ -2614,7 +2594,7 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, renew_client(clp); status = nfserr_cb_path_down; if (!list_empty(&clp->cl_delegations) - && !atomic_read(&clp->cl_callback.cb_set)) + && !atomic_read(&clp->cl_cb_conn.cb_set)) goto out; status = nfs_ok; out: @@ -2738,12 +2718,42 @@ nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stateid *stp) static int STALE_STATEID(stateid_t *stateid) { - if (stateid->si_boot == boot_time) - return 0; - dprintk("NFSD: stale stateid (%08x/%08x/%08x/%08x)!\n", - stateid->si_boot, stateid->si_stateownerid, stateid->si_fileid, - stateid->si_generation); - return 1; + if (time_after((unsigned long)boot_time, + (unsigned long)stateid->si_boot)) { + dprintk("NFSD: stale stateid (%08x/%08x/%08x/%08x)!\n", + stateid->si_boot, stateid->si_stateownerid, + stateid->si_fileid, stateid->si_generation); + return 1; + } + return 0; +} + +static int +EXPIRED_STATEID(stateid_t *stateid) +{ + if (time_before((unsigned long)boot_time, + ((unsigned long)stateid->si_boot)) && + time_before((unsigned long)(stateid->si_boot + lease_time), get_seconds())) { + dprintk("NFSD: expired stateid (%08x/%08x/%08x/%08x)!\n", + stateid->si_boot, stateid->si_stateownerid, + stateid->si_fileid, stateid->si_generation); + return 1; + } + return 0; +} + +static __be32 +stateid_error_map(stateid_t *stateid) +{ + if (STALE_STATEID(stateid)) + return nfserr_stale_stateid; + if (EXPIRED_STATEID(stateid)) + return nfserr_expired; + + dprintk("NFSD: bad stateid (%08x/%08x/%08x/%08x)!\n", + stateid->si_boot, stateid->si_stateownerid, + stateid->si_fileid, stateid->si_generation); + return nfserr_bad_stateid; } static inline int @@ -2867,8 +2877,10 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, status = nfserr_bad_stateid; if (is_delegation_stateid(stateid)) { dp = find_delegation_stateid(ino, stateid); - if (!dp) + if (!dp) { + status = stateid_error_map(stateid); goto out; + } status = check_stateid_generation(stateid, &dp->dl_stateid, flags); if (status) @@ -2881,8 +2893,10 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, *filpp = dp->dl_vfs_file; } else { /* open or lock stateid */ stp = find_stateid(stateid, flags); - if (!stp) + if (!stp) { + status = stateid_error_map(stateid); goto out; + } if (nfs4_check_fh(current_fh, stp)) goto out; if (!stp->st_stateowner->so_confirmed) @@ -2956,7 +2970,7 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, */ sop = search_close_lru(stateid->si_stateownerid, flags); if (sop == NULL) - return nfserr_bad_stateid; + return stateid_error_map(stateid); *sopp = sop; goto check_replay; } @@ -3227,8 +3241,10 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (!is_delegation_stateid(stateid)) goto out; dp = find_delegation_stateid(inode, stateid); - if (!dp) + if (!dp) { + status = stateid_error_map(stateid); goto out; + } status = check_stateid_generation(stateid, &dp->dl_stateid, flags); if (status) goto out; @@ -3455,7 +3471,7 @@ alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struc stp->st_stateowner = sop; get_nfs4_file(fp); stp->st_file = fp; - stp->st_stateid.si_boot = boot_time; + stp->st_stateid.si_boot = get_seconds(); stp->st_stateid.si_stateownerid = sop->so_id; stp->st_stateid.si_fileid = fp->fi_id; stp->st_stateid.si_generation = 0; @@ -3987,6 +4003,7 @@ nfs4_state_init(void) INIT_LIST_HEAD(&conf_str_hashtbl[i]); INIT_LIST_HEAD(&unconf_str_hashtbl[i]); INIT_LIST_HEAD(&unconf_id_hashtbl[i]); + INIT_LIST_HEAD(&reclaim_str_hashtbl[i]); } for (i = 0; i < SESSION_HASH_SIZE; i++) INIT_LIST_HEAD(&sessionid_hashtbl[i]); @@ -4009,8 +4026,6 @@ nfs4_state_init(void) INIT_LIST_HEAD(&close_lru); INIT_LIST_HEAD(&client_lru); INIT_LIST_HEAD(&del_recall_lru); - for (i = 0; i < CLIENT_HASH_SIZE; i++) - INIT_LIST_HEAD(&reclaim_str_hashtbl[i]); reclaim_str_hashtbl_size = 0; return 0; } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index b73549d..2dcc7fe 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -83,16 +83,6 @@ check_filename(char *str, int len, __be32 err) return 0; } -/* - * START OF "GENERIC" DECODE ROUTINES. - * These may look a little ugly since they are imported from a "generic" - * set of XDR encode/decode routines which are intended to be shared by - * all of our NFSv4 implementations (OpenBSD, MacOS X...). - * - * If the pain of reading these is too great, it should be a straightforward - * task to translate them into Linux-specific versions which are more - * consistent with the style used in NFSv2/v3... - */ #define DECODE_HEAD \ __be32 *p; \ __be32 status @@ -254,20 +244,8 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) DECODE_TAIL; } -static u32 nfsd_attrmask[] = { - NFSD_WRITEABLE_ATTRS_WORD0, - NFSD_WRITEABLE_ATTRS_WORD1, - NFSD_WRITEABLE_ATTRS_WORD2 -}; - -static u32 nfsd41_ex_attrmask[] = { - NFSD_SUPPATTR_EXCLCREAT_WORD0, - NFSD_SUPPATTR_EXCLCREAT_WORD1, - NFSD_SUPPATTR_EXCLCREAT_WORD2 -}; - static __be32 -nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, u32 *writable, +nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *iattr, struct nfs4_acl **acl) { int expected_len, len = 0; @@ -280,18 +258,6 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, u32 *writable, if ((status = nfsd4_decode_bitmap(argp, bmval))) return status; - /* - * According to spec, unsupported attributes return ERR_ATTRNOTSUPP; - * read-only attributes return ERR_INVAL. - */ - if ((bmval[0] & ~nfsd_suppattrs0(argp->minorversion)) || - (bmval[1] & ~nfsd_suppattrs1(argp->minorversion)) || - (bmval[2] & ~nfsd_suppattrs2(argp->minorversion))) - return nfserr_attrnotsupp; - if ((bmval[0] & ~writable[0]) || (bmval[1] & ~writable[1]) || - (bmval[2] & ~writable[2])) - return nfserr_inval; - READ_BUF(4); READ32(expected_len); @@ -424,8 +390,11 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, u32 *writable, goto xdr_error; } } - BUG_ON(bmval[2]); /* no such writeable attr supported yet */ - if (len != expected_len) + if (bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0 + || bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1 + || bmval[2] & ~NFSD_WRITEABLE_ATTRS_WORD2) + READ_BUF(expected_len - len); + else if (len != expected_len) goto xdr_error; DECODE_TAIL; @@ -518,8 +487,8 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create if ((status = check_filename(create->cr_name, create->cr_namelen, nfserr_inval))) return status; - status = nfsd4_decode_fattr(argp, create->cr_bmval, nfsd_attrmask, - &create->cr_iattr, &create->cr_acl); + status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, + &create->cr_acl); if (status) goto out; @@ -682,7 +651,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) case NFS4_CREATE_UNCHECKED: case NFS4_CREATE_GUARDED: status = nfsd4_decode_fattr(argp, open->op_bmval, - nfsd_attrmask, &open->op_iattr, &open->op_acl); + &open->op_iattr, &open->op_acl); if (status) goto out; break; @@ -696,8 +665,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) READ_BUF(8); COPYMEM(open->op_verf.data, 8); status = nfsd4_decode_fattr(argp, open->op_bmval, - nfsd41_ex_attrmask, &open->op_iattr, - &open->op_acl); + &open->op_iattr, &open->op_acl); if (status) goto out; break; @@ -893,8 +861,8 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta status = nfsd4_decode_stateid(argp, &setattr->sa_stateid); if (status) return status; - return nfsd4_decode_fattr(argp, setattr->sa_bmval, nfsd_attrmask, - &setattr->sa_iattr, &setattr->sa_acl); + return nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr, + &setattr->sa_acl); } static __be32 @@ -1328,64 +1296,64 @@ static nfsd4_dec nfsd4_dec_ops[] = { }; static nfsd4_dec nfsd41_dec_ops[] = { - [OP_ACCESS] (nfsd4_dec)nfsd4_decode_access, - [OP_CLOSE] (nfsd4_dec)nfsd4_decode_close, - [OP_COMMIT] (nfsd4_dec)nfsd4_decode_commit, - [OP_CREATE] (nfsd4_dec)nfsd4_decode_create, - [OP_DELEGPURGE] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_DELEGRETURN] (nfsd4_dec)nfsd4_decode_delegreturn, - [OP_GETATTR] (nfsd4_dec)nfsd4_decode_getattr, - [OP_GETFH] (nfsd4_dec)nfsd4_decode_noop, - [OP_LINK] (nfsd4_dec)nfsd4_decode_link, - [OP_LOCK] (nfsd4_dec)nfsd4_decode_lock, - [OP_LOCKT] (nfsd4_dec)nfsd4_decode_lockt, - [OP_LOCKU] (nfsd4_dec)nfsd4_decode_locku, - [OP_LOOKUP] (nfsd4_dec)nfsd4_decode_lookup, - [OP_LOOKUPP] (nfsd4_dec)nfsd4_decode_noop, - [OP_NVERIFY] (nfsd4_dec)nfsd4_decode_verify, - [OP_OPEN] (nfsd4_dec)nfsd4_decode_open, - [OP_OPENATTR] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_OPEN_CONFIRM] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_OPEN_DOWNGRADE] (nfsd4_dec)nfsd4_decode_open_downgrade, - [OP_PUTFH] (nfsd4_dec)nfsd4_decode_putfh, - [OP_PUTPUBFH] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_PUTROOTFH] (nfsd4_dec)nfsd4_decode_noop, - [OP_READ] (nfsd4_dec)nfsd4_decode_read, - [OP_READDIR] (nfsd4_dec)nfsd4_decode_readdir, - [OP_READLINK] (nfsd4_dec)nfsd4_decode_noop, - [OP_REMOVE] (nfsd4_dec)nfsd4_decode_remove, - [OP_RENAME] (nfsd4_dec)nfsd4_decode_rename, - [OP_RENEW] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_RESTOREFH] (nfsd4_dec)nfsd4_decode_noop, - [OP_SAVEFH] (nfsd4_dec)nfsd4_decode_noop, - [OP_SECINFO] (nfsd4_dec)nfsd4_decode_secinfo, - [OP_SETATTR] (nfsd4_dec)nfsd4_decode_setattr, - [OP_SETCLIENTID] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_SETCLIENTID_CONFIRM](nfsd4_dec)nfsd4_decode_notsupp, - [OP_VERIFY] (nfsd4_dec)nfsd4_decode_verify, - [OP_WRITE] (nfsd4_dec)nfsd4_decode_write, - [OP_RELEASE_LOCKOWNER] (nfsd4_dec)nfsd4_decode_notsupp, + [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access, + [OP_CLOSE] = (nfsd4_dec)nfsd4_decode_close, + [OP_COMMIT] = (nfsd4_dec)nfsd4_decode_commit, + [OP_CREATE] = (nfsd4_dec)nfsd4_decode_create, + [OP_DELEGPURGE] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_DELEGRETURN] = (nfsd4_dec)nfsd4_decode_delegreturn, + [OP_GETATTR] = (nfsd4_dec)nfsd4_decode_getattr, + [OP_GETFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_LINK] = (nfsd4_dec)nfsd4_decode_link, + [OP_LOCK] = (nfsd4_dec)nfsd4_decode_lock, + [OP_LOCKT] = (nfsd4_dec)nfsd4_decode_lockt, + [OP_LOCKU] = (nfsd4_dec)nfsd4_decode_locku, + [OP_LOOKUP] = (nfsd4_dec)nfsd4_decode_lookup, + [OP_LOOKUPP] = (nfsd4_dec)nfsd4_decode_noop, + [OP_NVERIFY] = (nfsd4_dec)nfsd4_decode_verify, + [OP_OPEN] = (nfsd4_dec)nfsd4_decode_open, + [OP_OPENATTR] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade, + [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh, + [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_READ] = (nfsd4_dec)nfsd4_decode_read, + [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir, + [OP_READLINK] = (nfsd4_dec)nfsd4_decode_noop, + [OP_REMOVE] = (nfsd4_dec)nfsd4_decode_remove, + [OP_RENAME] = (nfsd4_dec)nfsd4_decode_rename, + [OP_RENEW] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_RESTOREFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_SAVEFH] = (nfsd4_dec)nfsd4_decode_noop, + [OP_SECINFO] = (nfsd4_dec)nfsd4_decode_secinfo, + [OP_SETATTR] = (nfsd4_dec)nfsd4_decode_setattr, + [OP_SETCLIENTID] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_SETCLIENTID_CONFIRM]= (nfsd4_dec)nfsd4_decode_notsupp, + [OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify, + [OP_WRITE] = (nfsd4_dec)nfsd4_decode_write, + [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_notsupp, /* new operations for NFSv4.1 */ - [OP_BACKCHANNEL_CTL] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_BIND_CONN_TO_SESSION](nfsd4_dec)nfsd4_decode_notsupp, - [OP_EXCHANGE_ID] (nfsd4_dec)nfsd4_decode_exchange_id, - [OP_CREATE_SESSION] (nfsd4_dec)nfsd4_decode_create_session, - [OP_DESTROY_SESSION] (nfsd4_dec)nfsd4_decode_destroy_session, - [OP_FREE_STATEID] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_GET_DIR_DELEGATION] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_GETDEVICEINFO] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_GETDEVICELIST] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTCOMMIT] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTGET] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_LAYOUTRETURN] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_SECINFO_NO_NAME] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_SEQUENCE] (nfsd4_dec)nfsd4_decode_sequence, - [OP_SET_SSV] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_TEST_STATEID] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_WANT_DELEGATION] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_DESTROY_CLIENTID] (nfsd4_dec)nfsd4_decode_notsupp, - [OP_RECLAIM_COMPLETE] (nfsd4_dec)nfsd4_decode_notsupp, + [OP_BACKCHANNEL_CTL] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_BIND_CONN_TO_SESSION]= (nfsd4_dec)nfsd4_decode_notsupp, + [OP_EXCHANGE_ID] = (nfsd4_dec)nfsd4_decode_exchange_id, + [OP_CREATE_SESSION] = (nfsd4_dec)nfsd4_decode_create_session, + [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session, + [OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence, + [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_notsupp, }; struct nfsd4_minorversion_ops { @@ -1489,21 +1457,6 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) DECODE_TAIL; } -/* - * END OF "GENERIC" DECODE ROUTINES. - */ - -/* - * START OF "GENERIC" ENCODE ROUTINES. - * These may look a little ugly since they are imported from a "generic" - * set of XDR encode/decode routines which are intended to be shared by - * all of our NFSv4 implementations (OpenBSD, MacOS X...). - * - * If the pain of reading these is too great, it should be a straightforward - * task to translate them into Linux-specific versions which are more - * consistent with the style used in NFSv2/v3... - */ -#define ENCODE_HEAD __be32 *p #define WRITE32(n) *p++ = htonl(n) #define WRITE64(n) do { \ @@ -1515,13 +1468,41 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) memcpy(p, ptr, nbytes); \ p += XDR_QUADLEN(nbytes); \ }} while (0) -#define WRITECINFO(c) do { \ - *p++ = htonl(c.atomic); \ - *p++ = htonl(c.before_ctime_sec); \ - *p++ = htonl(c.before_ctime_nsec); \ - *p++ = htonl(c.after_ctime_sec); \ - *p++ = htonl(c.after_ctime_nsec); \ -} while (0) + +static void write32(__be32 **p, u32 n) +{ + *(*p)++ = n; +} + +static void write64(__be32 **p, u64 n) +{ + write32(p, (u32)(n >> 32)); + write32(p, (u32)n); +} + +static void write_change(__be32 **p, struct kstat *stat, struct inode *inode) +{ + if (IS_I_VERSION(inode)) { + write64(p, inode->i_version); + } else { + write32(p, stat->ctime.tv_sec); + write32(p, stat->ctime.tv_nsec); + } +} + +static void write_cinfo(__be32 **p, struct nfsd4_change_info *c) +{ + write32(p, c->atomic); + if (c->change_supported) { + write64(p, c->before_change); + write64(p, c->after_change); + } else { + write32(p, c->before_ctime_sec); + write32(p, c->before_ctime_nsec); + write32(p, c->after_ctime_sec); + write32(p, c->after_ctime_nsec); + } +} #define RESERVE_SPACE(nbytes) do { \ p = resp->p; \ @@ -1874,16 +1855,9 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, WRITE32(NFS4_FH_PERSISTENT|NFS4_FH_VOL_RENAME); } if (bmval0 & FATTR4_WORD0_CHANGE) { - /* - * Note: This _must_ be consistent with the scheme for writing - * change_info, so any changes made here must be reflected there - * as well. (See xdr4.h:set_change_info() and the WRITECINFO() - * macro above.) - */ if ((buflen -= 8) < 0) goto out_resource; - WRITE32(stat.ctime.tv_sec); - WRITE32(stat.ctime.tv_nsec); + write_change(&p, &stat, dentry->d_inode); } if (bmval0 & FATTR4_WORD0_SIZE) { if ((buflen -= 8) < 0) @@ -2348,7 +2322,7 @@ fail: static void nfsd4_encode_stateid(struct nfsd4_compoundres *resp, stateid_t *sid) { - ENCODE_HEAD; + __be32 *p; RESERVE_SPACE(sizeof(stateid_t)); WRITE32(sid->si_generation); @@ -2359,7 +2333,7 @@ nfsd4_encode_stateid(struct nfsd4_compoundres *resp, stateid_t *sid) static __be32 nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access) { - ENCODE_HEAD; + __be32 *p; if (!nfserr) { RESERVE_SPACE(8); @@ -2386,7 +2360,7 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c static __be32 nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit) { - ENCODE_HEAD; + __be32 *p; if (!nfserr) { RESERVE_SPACE(8); @@ -2399,11 +2373,11 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ static __be32 nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create) { - ENCODE_HEAD; + __be32 *p; if (!nfserr) { RESERVE_SPACE(32); - WRITECINFO(create->cr_cinfo); + write_cinfo(&p, &create->cr_cinfo); WRITE32(2); WRITE32(create->cr_bmval[0]); WRITE32(create->cr_bmval[1]); @@ -2435,7 +2409,7 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh { struct svc_fh *fhp = *fhpp; unsigned int len; - ENCODE_HEAD; + __be32 *p; if (!nfserr) { len = fhp->fh_handle.fh_size; @@ -2454,7 +2428,7 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh static void nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denied *ld) { - ENCODE_HEAD; + __be32 *p; RESERVE_SPACE(32 + XDR_LEN(ld->ld_sop ? ld->ld_sop->so_owner.len : 0)); WRITE64(ld->ld_start); @@ -2510,11 +2484,11 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l static __be32 nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link) { - ENCODE_HEAD; + __be32 *p; if (!nfserr) { RESERVE_SPACE(20); - WRITECINFO(link->li_cinfo); + write_cinfo(&p, &link->li_cinfo); ADJUST_ARGS(); } return nfserr; @@ -2524,7 +2498,7 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li static __be32 nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open) { - ENCODE_HEAD; + __be32 *p; ENCODE_SEQID_OP_HEAD; if (nfserr) @@ -2532,7 +2506,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op nfsd4_encode_stateid(resp, &open->op_stateid); RESERVE_SPACE(40); - WRITECINFO(open->op_cinfo); + write_cinfo(&p, &open->op_cinfo); WRITE32(open->op_rflags); WRITE32(2); WRITE32(open->op_bmval[0]); @@ -2619,7 +2593,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, int v, pn; unsigned long maxcount; long len; - ENCODE_HEAD; + __be32 *p; if (nfserr) return nfserr; @@ -2681,7 +2655,7 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd { int maxcount; char *page; - ENCODE_HEAD; + __be32 *p; if (nfserr) return nfserr; @@ -2730,7 +2704,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 int maxcount; loff_t offset; __be32 *page, *savep, *tailbase; - ENCODE_HEAD; + __be32 *p; if (nfserr) return nfserr; @@ -2806,11 +2780,11 @@ err_no_verf: static __be32 nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove) { - ENCODE_HEAD; + __be32 *p; if (!nfserr) { RESERVE_SPACE(20); - WRITECINFO(remove->rm_cinfo); + write_cinfo(&p, &remove->rm_cinfo); ADJUST_ARGS(); } return nfserr; @@ -2819,12 +2793,12 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ static __be32 nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename) { - ENCODE_HEAD; + __be32 *p; if (!nfserr) { RESERVE_SPACE(40); - WRITECINFO(rename->rn_sinfo); - WRITECINFO(rename->rn_tinfo); + write_cinfo(&p, &rename->rn_sinfo); + write_cinfo(&p, &rename->rn_tinfo); ADJUST_ARGS(); } return nfserr; @@ -2839,7 +2813,7 @@ nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, u32 nflavs; struct exp_flavor_info *flavs; struct exp_flavor_info def_flavs[2]; - ENCODE_HEAD; + __be32 *p; if (nfserr) goto out; @@ -2904,7 +2878,7 @@ out: static __be32 nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr) { - ENCODE_HEAD; + __be32 *p; RESERVE_SPACE(12); if (nfserr) { @@ -2924,7 +2898,7 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 static __be32 nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd) { - ENCODE_HEAD; + __be32 *p; if (!nfserr) { RESERVE_SPACE(8 + sizeof(nfs4_verifier)); @@ -2944,7 +2918,7 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n static __be32 nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write) { - ENCODE_HEAD; + __be32 *p; if (!nfserr) { RESERVE_SPACE(16); @@ -2960,7 +2934,7 @@ static __be32 nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_exchange_id *exid) { - ENCODE_HEAD; + __be32 *p; char *major_id; char *server_scope; int major_id_sz; @@ -3015,7 +2989,7 @@ static __be32 nfsd4_encode_create_session(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_create_session *sess) { - ENCODE_HEAD; + __be32 *p; if (nfserr) return nfserr; @@ -3071,7 +3045,7 @@ __be32 nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_sequence *seq) { - ENCODE_HEAD; + __be32 *p; if (nfserr) return nfserr; @@ -3209,7 +3183,7 @@ static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp) dprintk("%s length %u, xb->page_len %u tlen %u pad %u\n", __func__, length, xb->page_len, tlen, pad); - if (length <= session->se_fmaxresp_cached) + if (length <= session->se_fchannel.maxresp_cached) return status; else return nfserr_rep_too_big_to_cache; @@ -3219,7 +3193,7 @@ void nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) { __be32 *statp; - ENCODE_HEAD; + __be32 *p; RESERVE_SPACE(8); WRITE32(op->opnum); @@ -3253,7 +3227,7 @@ status: void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op) { - ENCODE_HEAD; + __be32 *p; struct nfs4_replay *rp = op->replay; BUG_ON(!rp); @@ -3268,10 +3242,6 @@ nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op) ADJUST_ARGS(); } -/* - * END OF "GENERIC" ENCODE ROUTINES. - */ - int nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy) { diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 5bfc2ac..4638635 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -29,15 +29,24 @@ */ #define CACHESIZE 1024 #define HASHSIZE 64 -#define REQHASH(xid) (((((__force __u32)xid) >> 24) ^ ((__force __u32)xid)) & (HASHSIZE-1)) -static struct hlist_head * hash_list; +static struct hlist_head * cache_hash; static struct list_head lru_head; static int cache_disabled = 1; +/* + * Calculate the hash index from an XID. + */ +static inline u32 request_hash(u32 xid) +{ + u32 h = xid; + h ^= (xid >> 24); + return h & (HASHSIZE-1); +} + static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); -/* +/* * locking for the reply cache: * A cache entry is "single use" if c_state == RC_INPROG * Otherwise, it when accessing _prev or _next, the lock must be held. @@ -62,8 +71,8 @@ int nfsd_reply_cache_init(void) i--; } - hash_list = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL); - if (!hash_list) + cache_hash = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL); + if (!cache_hash) goto out_nomem; cache_disabled = 0; @@ -88,8 +97,8 @@ void nfsd_reply_cache_shutdown(void) cache_disabled = 1; - kfree (hash_list); - hash_list = NULL; + kfree (cache_hash); + cache_hash = NULL; } /* @@ -108,7 +117,7 @@ static void hash_refile(struct svc_cacherep *rp) { hlist_del_init(&rp->c_hash); - hlist_add_head(&rp->c_hash, hash_list + REQHASH(rp->c_xid)); + hlist_add_head(&rp->c_hash, cache_hash + request_hash(rp->c_xid)); } /* @@ -138,7 +147,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp, int type) spin_lock(&cache_lock); rtn = RC_DOIT; - rh = &hash_list[REQHASH(xid)]; + rh = &cache_hash[request_hash(xid)]; hlist_for_each_entry(rp, hn, rh, c_hash) { if (rp->c_state != RC_UNUSED && xid == rp->c_xid && proc == rp->c_proc && @@ -165,8 +174,8 @@ nfsd_cache_lookup(struct svc_rqst *rqstp, int type) } } - /* This should not happen */ - if (rp == NULL) { + /* All entries on the LRU are in-progress. This should not happen */ + if (&rp->c_lru == &lru_head) { static int complaints; printk(KERN_WARNING "nfsd: all repcache entries locked!\n"); @@ -264,7 +273,7 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) len = resv->iov_len - ((char*)statp - (char*)resv->iov_base); len >>= 2; - + /* Don't cache excessive amounts of data and XDR failures */ if (!statp || len > (256 >> 2)) { rp->c_state = RC_UNUSED; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index af16849..1250fb9 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -207,10 +207,14 @@ static struct file_operations pool_stats_operations = { static ssize_t write_svc(struct file *file, char *buf, size_t size) { struct nfsctl_svc *data; + int err; if (size < sizeof(*data)) return -EINVAL; data = (struct nfsctl_svc*) buf; - return nfsd_svc(data->svc_port, data->svc_nthreads); + err = nfsd_svc(data->svc_port, data->svc_nthreads); + if (err < 0) + return err; + return 0; } /** @@ -692,11 +696,12 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size) if (newthreads < 0) return -EINVAL; rv = nfsd_svc(NFS_PORT, newthreads); - if (rv) + if (rv < 0) return rv; - } - sprintf(buf, "%d\n", nfsd_nrthreads()); - return strlen(buf); + } else + rv = nfsd_nrthreads(); + + return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%d\n", rv); } /** @@ -793,7 +798,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) { char *mesg = buf; char *vers, *minorp, sign; - int len, num; + int len, num, remaining; unsigned minor; ssize_t tlen = 0; char *sep; @@ -840,32 +845,50 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) } next: vers += len + 1; - tlen += len; } while ((len = qword_get(&mesg, vers, size)) > 0); /* If all get turned off, turn them back on, as * having no versions is BAD */ nfsd_reset_versions(); } + /* Now write current state into reply buffer */ len = 0; sep = ""; + remaining = SIMPLE_TRANSACTION_LIMIT; for (num=2 ; num <= 4 ; num++) if (nfsd_vers(num, NFSD_AVAIL)) { - len += sprintf(buf+len, "%s%c%d", sep, + len = snprintf(buf, remaining, "%s%c%d", sep, nfsd_vers(num, NFSD_TEST)?'+':'-', num); sep = " "; + + if (len > remaining) + break; + remaining -= len; + buf += len; + tlen += len; } if (nfsd_vers(4, NFSD_AVAIL)) - for (minor = 1; minor <= NFSD_SUPPORTED_MINOR_VERSION; minor++) - len += sprintf(buf+len, " %c4.%u", + for (minor = 1; minor <= NFSD_SUPPORTED_MINOR_VERSION; + minor++) { + len = snprintf(buf, remaining, " %c4.%u", (nfsd_vers(4, NFSD_TEST) && nfsd_minorversion(minor, NFSD_TEST)) ? '+' : '-', minor); - len += sprintf(buf+len, "\n"); - return len; + + if (len > remaining) + break; + remaining -= len; + buf += len; + tlen += len; + } + + len = snprintf(buf, remaining, "\n"); + if (len > remaining) + return -EINVAL; + return tlen + len; } /** @@ -910,104 +933,143 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size) return rv; } -static ssize_t __write_ports(struct file *file, char *buf, size_t size) +/* + * Zero-length write. Return a list of NFSD's current listener + * transports. + */ +static ssize_t __write_ports_names(char *buf) { - if (size == 0) { - int len = 0; + if (nfsd_serv == NULL) + return 0; + return svc_xprt_names(nfsd_serv, buf, SIMPLE_TRANSACTION_LIMIT); +} - if (nfsd_serv) - len = svc_xprt_names(nfsd_serv, buf, 0); - return len; - } - /* Either a single 'fd' number is written, in which - * case it must be for a socket of a supported family/protocol, - * and we use it as an nfsd socket, or - * A '-' followed by the 'name' of a socket in which case - * we close the socket. - */ - if (isdigit(buf[0])) { - char *mesg = buf; - int fd; - int err; - err = get_int(&mesg, &fd); - if (err) - return -EINVAL; - if (fd < 0) - return -EINVAL; - err = nfsd_create_serv(); - if (!err) { - err = svc_addsock(nfsd_serv, fd, buf); - if (err >= 0) { - err = lockd_up(); - if (err < 0) - svc_sock_names(buf+strlen(buf)+1, nfsd_serv, buf); - } - /* Decrease the count, but don't shutdown the - * the service - */ - nfsd_serv->sv_nrthreads--; - } - return err < 0 ? err : 0; - } - if (buf[0] == '-' && isdigit(buf[1])) { - char *toclose = kstrdup(buf+1, GFP_KERNEL); - int len = 0; - if (!toclose) - return -ENOMEM; - if (nfsd_serv) - len = svc_sock_names(buf, nfsd_serv, toclose); - if (len >= 0) - lockd_down(); - kfree(toclose); - return len; - } - /* - * Add a transport listener by writing it's transport name - */ - if (isalpha(buf[0])) { - int err; - char transport[16]; - int port; - if (sscanf(buf, "%15s %4d", transport, &port) == 2) { - if (port < 1 || port > 65535) - return -EINVAL; - err = nfsd_create_serv(); - if (!err) { - err = svc_create_xprt(nfsd_serv, - transport, PF_INET, port, - SVC_SOCK_ANONYMOUS); - if (err == -ENOENT) - /* Give a reasonable perror msg for - * bad transport string */ - err = -EPROTONOSUPPORT; - } - return err < 0 ? err : 0; - } - } - /* - * Remove a transport by writing it's transport name and port number - */ - if (buf[0] == '-' && isalpha(buf[1])) { - struct svc_xprt *xprt; - int err = -EINVAL; - char transport[16]; - int port; - if (sscanf(&buf[1], "%15s %4d", transport, &port) == 2) { - if (port < 1 || port > 65535) - return -EINVAL; - if (nfsd_serv) { - xprt = svc_find_xprt(nfsd_serv, transport, - AF_UNSPEC, port); - if (xprt) { - svc_close_xprt(xprt); - svc_xprt_put(xprt); - err = 0; - } else - err = -ENOTCONN; - } - return err < 0 ? err : 0; - } +/* + * A single 'fd' number was written, in which case it must be for + * a socket of a supported family/protocol, and we use it as an + * nfsd listener. + */ +static ssize_t __write_ports_addfd(char *buf) +{ + char *mesg = buf; + int fd, err; + + err = get_int(&mesg, &fd); + if (err != 0 || fd < 0) + return -EINVAL; + + err = nfsd_create_serv(); + if (err != 0) + return err; + + err = lockd_up(); + if (err != 0) + goto out; + + err = svc_addsock(nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT); + if (err < 0) + lockd_down(); + +out: + /* Decrease the count, but don't shut down the service */ + nfsd_serv->sv_nrthreads--; + return err; +} + +/* + * A '-' followed by the 'name' of a socket means we close the socket. + */ +static ssize_t __write_ports_delfd(char *buf) +{ + char *toclose; + int len = 0; + + toclose = kstrdup(buf + 1, GFP_KERNEL); + if (toclose == NULL) + return -ENOMEM; + + if (nfsd_serv != NULL) + len = svc_sock_names(nfsd_serv, buf, + SIMPLE_TRANSACTION_LIMIT, toclose); + if (len >= 0) + lockd_down(); + + kfree(toclose); + return len; +} + +/* + * A transport listener is added by writing it's transport name and + * a port number. + */ +static ssize_t __write_ports_addxprt(char *buf) +{ + char transport[16]; + int port, err; + + if (sscanf(buf, "%15s %4u", transport, &port) != 2) + return -EINVAL; + + if (port < 1 || port > USHORT_MAX) + return -EINVAL; + + err = nfsd_create_serv(); + if (err != 0) + return err; + + err = svc_create_xprt(nfsd_serv, transport, + PF_INET, port, SVC_SOCK_ANONYMOUS); + if (err < 0) { + /* Give a reasonable perror msg for bad transport string */ + if (err == -ENOENT) + err = -EPROTONOSUPPORT; + return err; } + return 0; +} + +/* + * A transport listener is removed by writing a "-", it's transport + * name, and it's port number. + */ +static ssize_t __write_ports_delxprt(char *buf) +{ + struct svc_xprt *xprt; + char transport[16]; + int port; + + if (sscanf(&buf[1], "%15s %4u", transport, &port) != 2) + return -EINVAL; + + if (port < 1 || port > USHORT_MAX || nfsd_serv == NULL) + return -EINVAL; + + xprt = svc_find_xprt(nfsd_serv, transport, AF_UNSPEC, port); + if (xprt == NULL) + return -ENOTCONN; + + svc_close_xprt(xprt); + svc_xprt_put(xprt); + return 0; +} + +static ssize_t __write_ports(struct file *file, char *buf, size_t size) +{ + if (size == 0) + return __write_ports_names(buf); + + if (isdigit(buf[0])) + return __write_ports_addfd(buf); + + if (buf[0] == '-' && isdigit(buf[1])) + return __write_ports_delfd(buf); + + if (isalpha(buf[0])) + return __write_ports_addxprt(buf); + + if (buf[0] == '-' && isalpha(buf[1])) + return __write_ports_delxprt(buf); + return -EINVAL; } @@ -1030,7 +1092,9 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size) * buf: C string containing an unsigned * integer value representing a bound * but unconnected socket that is to be - * used as an NFSD listener + * used as an NFSD listener; listen(3) + * must be called for a SOCK_STREAM + * socket, otherwise it is ignored * size: non-zero length of C string in @buf * Output: * On success: NFS service is started; @@ -1138,7 +1202,9 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) nfsd_max_blksize = bsize; mutex_unlock(&nfsd_mutex); } - return sprintf(buf, "%d\n", nfsd_max_blksize); + + return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%d\n", + nfsd_max_blksize); } #ifdef CONFIG_NFSD_V4 @@ -1162,8 +1228,9 @@ static ssize_t __write_leasetime(struct file *file, char *buf, size_t size) return -EINVAL; nfs4_reset_lease(lease); } - sprintf(buf, "%ld\n", nfs4_lease_time()); - return strlen(buf); + + return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%ld\n", + nfs4_lease_time()); } /** @@ -1219,8 +1286,9 @@ static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size) status = nfs4_reset_recoverydir(recdir); } - sprintf(buf, "%s\n", nfs4_recoverydir()); - return strlen(buf); + + return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%s\n", + nfs4_recoverydir()); } /** diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 9f1ca17..8847f3f 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -27,9 +27,6 @@ #define NFSDDBG_FACILITY NFSDDBG_FH -static int nfsd_nr_verified; -static int nfsd_nr_put; - /* * our acceptability function. * if NOSUBTREECHECK, accept anything @@ -251,7 +248,6 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) fhp->fh_dentry = dentry; fhp->fh_export = exp; - nfsd_nr_verified++; return 0; out: exp_put(exp); @@ -552,7 +548,6 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, return nfserr_opnotsupp; } - nfsd_nr_verified++; return 0; } @@ -609,7 +604,6 @@ fh_put(struct svc_fh *fhp) fhp->fh_pre_saved = 0; fhp->fh_post_saved = 0; #endif - nfsd_nr_put++; } if (exp) { cache_put(&exp->h, &svc_export_cache); diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index e298e26..0eb9c82 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -533,45 +533,179 @@ nfsd_proc_statfs(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, * NFSv2 Server procedures. * Only the results of non-idempotent operations are cached. */ -#define nfsd_proc_none NULL -#define nfssvc_release_none NULL struct nfsd_void { int dummy; }; -#define PROC(name, argt, rest, relt, cache, respsize) \ - { (svc_procfunc) nfsd_proc_##name, \ - (kxdrproc_t) nfssvc_decode_##argt, \ - (kxdrproc_t) nfssvc_encode_##rest, \ - (kxdrproc_t) nfssvc_release_##relt, \ - sizeof(struct nfsd_##argt), \ - sizeof(struct nfsd_##rest), \ - 0, \ - cache, \ - respsize, \ - } - #define ST 1 /* status */ #define FH 8 /* filehandle */ #define AT 18 /* attributes */ static struct svc_procedure nfsd_procedures2[18] = { - PROC(null, void, void, none, RC_NOCACHE, ST), - PROC(getattr, fhandle, attrstat, fhandle, RC_NOCACHE, ST+AT), - PROC(setattr, sattrargs, attrstat, fhandle, RC_REPLBUFF, ST+AT), - PROC(none, void, void, none, RC_NOCACHE, ST), - PROC(lookup, diropargs, diropres, fhandle, RC_NOCACHE, ST+FH+AT), - PROC(readlink, readlinkargs, readlinkres, none, RC_NOCACHE, ST+1+NFS_MAXPATHLEN/4), - PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4), - PROC(none, void, void, none, RC_NOCACHE, ST), - PROC(write, writeargs, attrstat, fhandle, RC_REPLBUFF, ST+AT), - PROC(create, createargs, diropres, fhandle, RC_REPLBUFF, ST+FH+AT), - PROC(remove, diropargs, void, none, RC_REPLSTAT, ST), - PROC(rename, renameargs, void, none, RC_REPLSTAT, ST), - PROC(link, linkargs, void, none, RC_REPLSTAT, ST), - PROC(symlink, symlinkargs, void, none, RC_REPLSTAT, ST), - PROC(mkdir, createargs, diropres, fhandle, RC_REPLBUFF, ST+FH+AT), - PROC(rmdir, diropargs, void, none, RC_REPLSTAT, ST), - PROC(readdir, readdirargs, readdirres, none, RC_NOCACHE, 0), - PROC(statfs, fhandle, statfsres, none, RC_NOCACHE, ST+5), + [NFSPROC_NULL] = { + .pc_func = (svc_procfunc) nfsd_proc_null, + .pc_decode = (kxdrproc_t) nfssvc_decode_void, + .pc_encode = (kxdrproc_t) nfssvc_encode_void, + .pc_argsize = sizeof(struct nfsd_void), + .pc_ressize = sizeof(struct nfsd_void), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST, + }, + [NFSPROC_GETATTR] = { + .pc_func = (svc_procfunc) nfsd_proc_getattr, + .pc_decode = (kxdrproc_t) nfssvc_decode_fhandle, + .pc_encode = (kxdrproc_t) nfssvc_encode_attrstat, + .pc_release = (kxdrproc_t) nfssvc_release_fhandle, + .pc_argsize = sizeof(struct nfsd_fhandle), + .pc_ressize = sizeof(struct nfsd_attrstat), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+AT, + }, + [NFSPROC_SETATTR] = { + .pc_func = (svc_procfunc) nfsd_proc_setattr, + .pc_decode = (kxdrproc_t) nfssvc_decode_sattrargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_attrstat, + .pc_release = (kxdrproc_t) nfssvc_release_fhandle, + .pc_argsize = sizeof(struct nfsd_sattrargs), + .pc_ressize = sizeof(struct nfsd_attrstat), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+AT, + }, + [NFSPROC_ROOT] = { + .pc_decode = (kxdrproc_t) nfssvc_decode_void, + .pc_encode = (kxdrproc_t) nfssvc_encode_void, + .pc_argsize = sizeof(struct nfsd_void), + .pc_ressize = sizeof(struct nfsd_void), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST, + }, + [NFSPROC_LOOKUP] = { + .pc_func = (svc_procfunc) nfsd_proc_lookup, + .pc_decode = (kxdrproc_t) nfssvc_decode_diropargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_diropres, + .pc_release = (kxdrproc_t) nfssvc_release_fhandle, + .pc_argsize = sizeof(struct nfsd_diropargs), + .pc_ressize = sizeof(struct nfsd_diropres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+FH+AT, + }, + [NFSPROC_READLINK] = { + .pc_func = (svc_procfunc) nfsd_proc_readlink, + .pc_decode = (kxdrproc_t) nfssvc_decode_readlinkargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_readlinkres, + .pc_argsize = sizeof(struct nfsd_readlinkargs), + .pc_ressize = sizeof(struct nfsd_readlinkres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+1+NFS_MAXPATHLEN/4, + }, + [NFSPROC_READ] = { + .pc_func = (svc_procfunc) nfsd_proc_read, + .pc_decode = (kxdrproc_t) nfssvc_decode_readargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_readres, + .pc_release = (kxdrproc_t) nfssvc_release_fhandle, + .pc_argsize = sizeof(struct nfsd_readargs), + .pc_ressize = sizeof(struct nfsd_readres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+AT+1+NFSSVC_MAXBLKSIZE_V2/4, + }, + [NFSPROC_WRITECACHE] = { + .pc_decode = (kxdrproc_t) nfssvc_decode_void, + .pc_encode = (kxdrproc_t) nfssvc_encode_void, + .pc_argsize = sizeof(struct nfsd_void), + .pc_ressize = sizeof(struct nfsd_void), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST, + }, + [NFSPROC_WRITE] = { + .pc_func = (svc_procfunc) nfsd_proc_write, + .pc_decode = (kxdrproc_t) nfssvc_decode_writeargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_attrstat, + .pc_release = (kxdrproc_t) nfssvc_release_fhandle, + .pc_argsize = sizeof(struct nfsd_writeargs), + .pc_ressize = sizeof(struct nfsd_attrstat), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+AT, + }, + [NFSPROC_CREATE] = { + .pc_func = (svc_procfunc) nfsd_proc_create, + .pc_decode = (kxdrproc_t) nfssvc_decode_createargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_diropres, + .pc_release = (kxdrproc_t) nfssvc_release_fhandle, + .pc_argsize = sizeof(struct nfsd_createargs), + .pc_ressize = sizeof(struct nfsd_diropres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+FH+AT, + }, + [NFSPROC_REMOVE] = { + .pc_func = (svc_procfunc) nfsd_proc_remove, + .pc_decode = (kxdrproc_t) nfssvc_decode_diropargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_void, + .pc_argsize = sizeof(struct nfsd_diropargs), + .pc_ressize = sizeof(struct nfsd_void), + .pc_cachetype = RC_REPLSTAT, + .pc_xdrressize = ST, + }, + [NFSPROC_RENAME] = { + .pc_func = (svc_procfunc) nfsd_proc_rename, + .pc_decode = (kxdrproc_t) nfssvc_decode_renameargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_void, + .pc_argsize = sizeof(struct nfsd_renameargs), + .pc_ressize = sizeof(struct nfsd_void), + .pc_cachetype = RC_REPLSTAT, + .pc_xdrressize = ST, + }, + [NFSPROC_LINK] = { + .pc_func = (svc_procfunc) nfsd_proc_link, + .pc_decode = (kxdrproc_t) nfssvc_decode_linkargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_void, + .pc_argsize = sizeof(struct nfsd_linkargs), + .pc_ressize = sizeof(struct nfsd_void), + .pc_cachetype = RC_REPLSTAT, + .pc_xdrressize = ST, + }, + [NFSPROC_SYMLINK] = { + .pc_func = (svc_procfunc) nfsd_proc_symlink, + .pc_decode = (kxdrproc_t) nfssvc_decode_symlinkargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_void, + .pc_argsize = sizeof(struct nfsd_symlinkargs), + .pc_ressize = sizeof(struct nfsd_void), + .pc_cachetype = RC_REPLSTAT, + .pc_xdrressize = ST, + }, + [NFSPROC_MKDIR] = { + .pc_func = (svc_procfunc) nfsd_proc_mkdir, + .pc_decode = (kxdrproc_t) nfssvc_decode_createargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_diropres, + .pc_release = (kxdrproc_t) nfssvc_release_fhandle, + .pc_argsize = sizeof(struct nfsd_createargs), + .pc_ressize = sizeof(struct nfsd_diropres), + .pc_cachetype = RC_REPLBUFF, + .pc_xdrressize = ST+FH+AT, + }, + [NFSPROC_RMDIR] = { + .pc_func = (svc_procfunc) nfsd_proc_rmdir, + .pc_decode = (kxdrproc_t) nfssvc_decode_diropargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_void, + .pc_argsize = sizeof(struct nfsd_diropargs), + .pc_ressize = sizeof(struct nfsd_void), + .pc_cachetype = RC_REPLSTAT, + .pc_xdrressize = ST, + }, + [NFSPROC_READDIR] = { + .pc_func = (svc_procfunc) nfsd_proc_readdir, + .pc_decode = (kxdrproc_t) nfssvc_decode_readdirargs, + .pc_encode = (kxdrproc_t) nfssvc_encode_readdirres, + .pc_argsize = sizeof(struct nfsd_readdirargs), + .pc_ressize = sizeof(struct nfsd_readdirres), + .pc_cachetype = RC_NOCACHE, + }, + [NFSPROC_STATFS] = { + .pc_func = (svc_procfunc) nfsd_proc_statfs, + .pc_decode = (kxdrproc_t) nfssvc_decode_fhandle, + .pc_encode = (kxdrproc_t) nfssvc_encode_statfsres, + .pc_argsize = sizeof(struct nfsd_fhandle), + .pc_ressize = sizeof(struct nfsd_statfsres), + .pc_cachetype = RC_NOCACHE, + .pc_xdrressize = ST+5, + }, }; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index cbba4a9..d4c9884 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -390,12 +390,14 @@ nfsd_svc(unsigned short port, int nrservs) mutex_lock(&nfsd_mutex); dprintk("nfsd: creating service\n"); - error = -EINVAL; if (nrservs <= 0) nrservs = 0; if (nrservs > NFSD_MAXSERVS) nrservs = NFSD_MAXSERVS; - + error = 0; + if (nrservs == 0 && nfsd_serv == NULL) + goto out; + /* Readahead param cache - will no-op if it already exists */ error = nfsd_racache_init(2*nrservs); if (error<0) @@ -413,6 +415,12 @@ nfsd_svc(unsigned short port, int nrservs) goto failure; error = svc_set_num_threads(nfsd_serv, NULL, nrservs); + if (error == 0) + /* We are holding a reference to nfsd_serv which + * we don't want to count in the return value, + * so subtract 1 + */ + error = nfsd_serv->sv_nrthreads - 1; failure: svc_destroy(nfsd_serv); /* Release server */ out: diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 99f8357..4145083 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -966,6 +966,43 @@ static void kill_suid(struct dentry *dentry) mutex_unlock(&dentry->d_inode->i_mutex); } +/* + * Gathered writes: If another process is currently writing to the file, + * there's a high chance this is another nfsd (triggered by a bulk write + * from a client's biod). Rather than syncing the file with each write + * request, we sleep for 10 msec. + * + * I don't know if this roughly approximates C. Juszak's idea of + * gathered writes, but it's a nice and simple solution (IMHO), and it + * seems to work:-) + * + * Note: we do this only in the NFSv2 case, since v3 and higher have a + * better tool (separate unstable writes and commits) for solving this + * problem. + */ +static int wait_for_concurrent_writes(struct file *file) +{ + struct inode *inode = file->f_path.dentry->d_inode; + static ino_t last_ino; + static dev_t last_dev; + int err = 0; + + if (atomic_read(&inode->i_writecount) > 1 + || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) { + dprintk("nfsd: write defer %d\n", task_pid_nr(current)); + msleep(10); + dprintk("nfsd: write resume %d\n", task_pid_nr(current)); + } + + if (inode->i_state & I_DIRTY) { + dprintk("nfsd: write sync %d\n", task_pid_nr(current)); + err = nfsd_sync(file); + } + last_ino = inode->i_ino; + last_dev = inode->i_sb->s_dev; + return err; +} + static __be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, struct kvec *vec, int vlen, @@ -978,6 +1015,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, __be32 err = 0; int host_err; int stable = *stablep; + int use_wgather; #ifdef MSNFS err = nfserr_perm; @@ -996,9 +1034,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, * - the sync export option has been set, or * - the client requested O_SYNC behavior (NFSv3 feature). * - The file system doesn't support fsync(). - * When gathered writes have been configured for this volume, + * When NFSv2 gathered writes have been configured for this volume, * flushing the data to disk is handled separately below. */ + use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp); if (!file->f_op->fsync) {/* COMMIT3 cannot work */ stable = 2; @@ -1007,7 +1046,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, if (!EX_ISSYNC(exp)) stable = 0; - if (stable && !EX_WGATHER(exp)) { + if (stable && !use_wgather) { spin_lock(&file->f_lock); file->f_flags |= O_SYNC; spin_unlock(&file->f_lock); @@ -1017,52 +1056,20 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, oldfs = get_fs(); set_fs(KERNEL_DS); host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset); set_fs(oldfs); - if (host_err >= 0) { - *cnt = host_err; - nfsdstats.io_write += host_err; - fsnotify_modify(file->f_path.dentry); - } + if (host_err < 0) + goto out_nfserr; + *cnt = host_err; + nfsdstats.io_write += host_err; + fsnotify_modify(file->f_path.dentry); /* clear setuid/setgid flag after write */ - if (host_err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID))) + if (inode->i_mode & (S_ISUID | S_ISGID)) kill_suid(dentry); - if (host_err >= 0 && stable) { - static ino_t last_ino; - static dev_t last_dev; - - /* - * Gathered writes: If another process is currently - * writing to the file, there's a high chance - * this is another nfsd (triggered by a bulk write - * from a client's biod). Rather than syncing the - * file with each write request, we sleep for 10 msec. - * - * I don't know if this roughly approximates - * C. Juszak's idea of gathered writes, but it's a - * nice and simple solution (IMHO), and it seems to - * work:-) - */ - if (EX_WGATHER(exp)) { - if (atomic_read(&inode->i_writecount) > 1 - || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) { - dprintk("nfsd: write defer %d\n", task_pid_nr(current)); - msleep(10); - dprintk("nfsd: write resume %d\n", task_pid_nr(current)); - } - - if (inode->i_state & I_DIRTY) { - dprintk("nfsd: write sync %d\n", task_pid_nr(current)); - host_err=nfsd_sync(file); - } -#if 0 - wake_up(&inode->i_wait); -#endif - } - last_ino = inode->i_ino; - last_dev = inode->i_sb->s_dev; - } + if (stable && use_wgather) + host_err = wait_for_concurrent_writes(file); +out_nfserr: dprintk("nfsd: write complete host_err=%d\n", host_err); if (host_err >= 0) err = 0; diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 2696d6b..fe9d8f2 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -309,10 +309,6 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode) /* ii->i_file_acl = 0; */ /* ii->i_dir_acl = 0; */ ii->i_dir_start_lookup = 0; -#ifdef CONFIG_NILFS_FS_POSIX_ACL - ii->i_acl = NULL; - ii->i_default_acl = NULL; -#endif ii->i_cno = 0; nilfs_set_inode_flags(inode); spin_lock(&sbi->s_next_gen_lock); @@ -434,10 +430,6 @@ static int __nilfs_read_inode(struct super_block *sb, unsigned long ino, raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, bh); -#ifdef CONFIG_NILFS_FS_POSIX_ACL - ii->i_acl = NILFS_ACL_NOT_CACHED; - ii->i_default_acl = NILFS_ACL_NOT_CACHED; -#endif if (nilfs_read_inode_common(inode, raw_inode)) goto failed_unmap; diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index edf6a59..724c637 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -58,10 +58,6 @@ struct nilfs_inode_info { */ struct rw_semaphore xattr_sem; #endif -#ifdef CONFIG_NILFS_POSIX_ACL - struct posix_acl *i_acl; - struct posix_acl *i_default_acl; -#endif struct buffer_head *i_bh; /* i_bh contains a new or dirty disk inode */ struct inode vfs_inode; diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index ab785f8..8e2ec43 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -189,16 +189,6 @@ static void nilfs_clear_inode(struct inode *inode) { struct nilfs_inode_info *ii = NILFS_I(inode); -#ifdef CONFIG_NILFS_POSIX_ACL - if (ii->i_acl && ii->i_acl != NILFS_ACL_NOT_CACHED) { - posix_acl_release(ii->i_acl); - ii->i_acl = NILFS_ACL_NOT_CACHED; - } - if (ii->i_default_acl && ii->i_default_acl != NILFS_ACL_NOT_CACHED) { - posix_acl_release(ii->i_default_acl); - ii->i_default_acl = NILFS_ACL_NOT_CACHED; - } -#endif /* * Free resources allocated in nilfs_read_inode(), here. */ diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index ff231ad..ff27a29 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -296,12 +296,15 @@ static int inotify_fasync(int fd, struct file *file, int on) static int inotify_release(struct inode *ignored, struct file *file) { struct fsnotify_group *group = file->private_data; + struct user_struct *user = group->inotify_data.user; fsnotify_clear_marks_by_group(group); /* free this group, matching get was inotify_init->fsnotify_obtain_group */ fsnotify_put_group(group); + atomic_dec(&user->inotify_devs); + return 0; } diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 6cdeaa7..110bb57 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -92,6 +92,9 @@ struct ocfs2_unblock_ctl { enum ocfs2_unblock_action unblock_action; }; +/* Lockdep class keys */ +struct lock_class_key lockdep_keys[OCFS2_NUM_LOCK_TYPES]; + static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, int new_level); static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres); @@ -317,9 +320,16 @@ static int ocfs2_lock_create(struct ocfs2_super *osb, u32 dlm_flags); static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres, int wanted); -static void ocfs2_cluster_unlock(struct ocfs2_super *osb, - struct ocfs2_lock_res *lockres, - int level); +static void __ocfs2_cluster_unlock(struct ocfs2_super *osb, + struct ocfs2_lock_res *lockres, + int level, unsigned long caller_ip); +static inline void ocfs2_cluster_unlock(struct ocfs2_super *osb, + struct ocfs2_lock_res *lockres, + int level) +{ + __ocfs2_cluster_unlock(osb, lockres, level, _RET_IP_); +} + static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres); static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres); static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres); @@ -489,6 +499,13 @@ static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug); ocfs2_init_lock_stats(res); +#ifdef CONFIG_DEBUG_LOCK_ALLOC + if (type != OCFS2_LOCK_TYPE_OPEN) + lockdep_init_map(&res->l_lockdep_map, ocfs2_lock_type_strings[type], + &lockdep_keys[type], 0); + else + res->l_lockdep_map.key = NULL; +#endif } void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res) @@ -644,14 +661,10 @@ static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res, static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res, struct ocfs2_super *osb) { - struct ocfs2_orphan_scan_lvb *lvb; - ocfs2_lock_res_init_once(res); ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name); ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN, &ocfs2_orphan_scan_lops, osb); - lvb = ocfs2_dlm_lvb(&res->l_lksb); - lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION; } void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, @@ -1256,11 +1269,13 @@ static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw, return ret; } -static int ocfs2_cluster_lock(struct ocfs2_super *osb, - struct ocfs2_lock_res *lockres, - int level, - u32 lkm_flags, - int arg_flags) +static int __ocfs2_cluster_lock(struct ocfs2_super *osb, + struct ocfs2_lock_res *lockres, + int level, + u32 lkm_flags, + int arg_flags, + int l_subclass, + unsigned long caller_ip) { struct ocfs2_mask_waiter mw; int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR); @@ -1403,13 +1418,37 @@ out: } ocfs2_update_lock_stats(lockres, level, &mw, ret); +#ifdef CONFIG_DEBUG_LOCK_ALLOC + if (!ret && lockres->l_lockdep_map.key != NULL) { + if (level == DLM_LOCK_PR) + rwsem_acquire_read(&lockres->l_lockdep_map, l_subclass, + !!(arg_flags & OCFS2_META_LOCK_NOQUEUE), + caller_ip); + else + rwsem_acquire(&lockres->l_lockdep_map, l_subclass, + !!(arg_flags & OCFS2_META_LOCK_NOQUEUE), + caller_ip); + } +#endif mlog_exit(ret); return ret; } -static void ocfs2_cluster_unlock(struct ocfs2_super *osb, - struct ocfs2_lock_res *lockres, - int level) +static inline int ocfs2_cluster_lock(struct ocfs2_super *osb, + struct ocfs2_lock_res *lockres, + int level, + u32 lkm_flags, + int arg_flags) +{ + return __ocfs2_cluster_lock(osb, lockres, level, lkm_flags, arg_flags, + 0, _RET_IP_); +} + + +static void __ocfs2_cluster_unlock(struct ocfs2_super *osb, + struct ocfs2_lock_res *lockres, + int level, + unsigned long caller_ip) { unsigned long flags; @@ -1418,6 +1457,10 @@ static void ocfs2_cluster_unlock(struct ocfs2_super *osb, ocfs2_dec_holders(lockres, level); ocfs2_downconvert_on_unlock(osb, lockres); spin_unlock_irqrestore(&lockres->l_lock, flags); +#ifdef CONFIG_DEBUG_LOCK_ALLOC + if (lockres->l_lockdep_map.key != NULL) + rwsem_release(&lockres->l_lockdep_map, 1, caller_ip); +#endif mlog_exit_void(); } @@ -1989,7 +2032,8 @@ static inline int ocfs2_meta_lvb_is_trustable(struct inode *inode, { struct ocfs2_meta_lvb *lvb = ocfs2_dlm_lvb(&lockres->l_lksb); - if (lvb->lvb_version == OCFS2_LVB_VERSION + if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) + && lvb->lvb_version == OCFS2_LVB_VERSION && be32_to_cpu(lvb->lvb_igeneration) == inode->i_generation) return 1; return 0; @@ -2162,10 +2206,11 @@ static int ocfs2_assign_bh(struct inode *inode, * returns < 0 error if the callback will never be called, otherwise * the result of the lock will be communicated via the callback. */ -int ocfs2_inode_lock_full(struct inode *inode, - struct buffer_head **ret_bh, - int ex, - int arg_flags) +int ocfs2_inode_lock_full_nested(struct inode *inode, + struct buffer_head **ret_bh, + int ex, + int arg_flags, + int subclass) { int status, level, acquired; u32 dlm_flags; @@ -2203,7 +2248,8 @@ int ocfs2_inode_lock_full(struct inode *inode, if (arg_flags & OCFS2_META_LOCK_NOQUEUE) dlm_flags |= DLM_LKF_NOQUEUE; - status = ocfs2_cluster_lock(osb, lockres, level, dlm_flags, arg_flags); + status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags, + arg_flags, subclass, _RET_IP_); if (status < 0) { if (status != -EAGAIN && status != -EIOCBRETRY) mlog_errno(status); @@ -2369,35 +2415,45 @@ void ocfs2_inode_unlock(struct inode *inode, mlog_exit_void(); } -int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex) +int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno) { struct ocfs2_lock_res *lockres; struct ocfs2_orphan_scan_lvb *lvb; - int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; int status = 0; + if (ocfs2_is_hard_readonly(osb)) + return -EROFS; + + if (ocfs2_mount_local(osb)) + return 0; + lockres = &osb->osb_orphan_scan.os_lockres; - status = ocfs2_cluster_lock(osb, lockres, level, 0, 0); + status = ocfs2_cluster_lock(osb, lockres, DLM_LOCK_EX, 0, 0); if (status < 0) return status; lvb = ocfs2_dlm_lvb(&lockres->l_lksb); - if (lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION) + if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) && + lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION) *seqno = be32_to_cpu(lvb->lvb_os_seqno); + else + *seqno = osb->osb_orphan_scan.os_seqno + 1; + return status; } -void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno, int ex) +void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno) { struct ocfs2_lock_res *lockres; struct ocfs2_orphan_scan_lvb *lvb; - int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR; - lockres = &osb->osb_orphan_scan.os_lockres; - lvb = ocfs2_dlm_lvb(&lockres->l_lksb); - lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION; - lvb->lvb_os_seqno = cpu_to_be32(seqno); - ocfs2_cluster_unlock(osb, lockres, level); + if (!ocfs2_is_hard_readonly(osb) && !ocfs2_mount_local(osb)) { + lockres = &osb->osb_orphan_scan.os_lockres; + lvb = ocfs2_dlm_lvb(&lockres->l_lksb); + lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION; + lvb->lvb_os_seqno = cpu_to_be32(seqno); + ocfs2_cluster_unlock(osb, lockres, DLM_LOCK_EX); + } } int ocfs2_super_lock(struct ocfs2_super *osb, @@ -3627,7 +3683,8 @@ static int ocfs2_refresh_qinfo(struct ocfs2_mem_dqinfo *oinfo) struct ocfs2_global_disk_dqinfo *gdinfo; int status = 0; - if (lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) { + if (ocfs2_dlm_lvb_valid(&lockres->l_lksb) && + lvb->lvb_version == OCFS2_QINFO_LVB_VERSION) { info->dqi_bgrace = be32_to_cpu(lvb->lvb_bgrace); info->dqi_igrace = be32_to_cpu(lvb->lvb_igrace); oinfo->dqi_syncms = be32_to_cpu(lvb->lvb_syncms); diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index 31b90d7..7553836 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h @@ -78,6 +78,14 @@ struct ocfs2_orphan_scan_lvb { /* don't block waiting for the downconvert thread, instead return -EAGAIN */ #define OCFS2_LOCK_NONBLOCK (0x04) +/* Locking subclasses of inode cluster lock */ +enum { + OI_LS_NORMAL = 0, + OI_LS_PARENT, + OI_LS_RENAME1, + OI_LS_RENAME2, +}; + int ocfs2_dlm_init(struct ocfs2_super *osb); void ocfs2_dlm_shutdown(struct ocfs2_super *osb, int hangup_pending); void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res); @@ -104,25 +112,31 @@ void ocfs2_open_unlock(struct inode *inode); int ocfs2_inode_lock_atime(struct inode *inode, struct vfsmount *vfsmnt, int *level); -int ocfs2_inode_lock_full(struct inode *inode, +int ocfs2_inode_lock_full_nested(struct inode *inode, struct buffer_head **ret_bh, int ex, - int arg_flags); + int arg_flags, + int subclass); int ocfs2_inode_lock_with_page(struct inode *inode, struct buffer_head **ret_bh, int ex, struct page *page); +/* Variants without special locking class or flags */ +#define ocfs2_inode_lock_full(i, r, e, f)\ + ocfs2_inode_lock_full_nested(i, r, e, f, OI_LS_NORMAL) +#define ocfs2_inode_lock_nested(i, b, e, s)\ + ocfs2_inode_lock_full_nested(i, b, e, 0, s) /* 99% of the time we don't want to supply any additional flags -- * those are for very specific cases only. */ -#define ocfs2_inode_lock(i, b, e) ocfs2_inode_lock_full(i, b, e, 0) +#define ocfs2_inode_lock(i, b, e) ocfs2_inode_lock_full_nested(i, b, e, 0, OI_LS_NORMAL) void ocfs2_inode_unlock(struct inode *inode, int ex); int ocfs2_super_lock(struct ocfs2_super *osb, int ex); void ocfs2_super_unlock(struct ocfs2_super *osb, int ex); -int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex); -void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno, int ex); +int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno); +void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno); int ocfs2_rename_lock(struct ocfs2_super *osb); void ocfs2_rename_unlock(struct ocfs2_super *osb); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 07267e0..62442e4 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2026,7 +2026,7 @@ static ssize_t ocfs2_file_splice_read(struct file *in, size_t len, unsigned int flags) { - int ret = 0; + int ret = 0, lock_level = 0; struct inode *inode = in->f_path.dentry->d_inode; mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe, @@ -2037,12 +2037,12 @@ static ssize_t ocfs2_file_splice_read(struct file *in, /* * See the comment in ocfs2_file_aio_read() */ - ret = ocfs2_inode_lock(inode, NULL, 0); + ret = ocfs2_inode_lock_atime(inode, in->f_vfsmnt, &lock_level); if (ret < 0) { mlog_errno(ret); goto bail; } - ocfs2_inode_unlock(inode, 0); + ocfs2_inode_unlock(inode, lock_level); ret = generic_file_splice_read(in, ppos, pipe, len, flags); diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 10e1fa87..4dc8890 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -215,6 +215,8 @@ bail: static int ocfs2_init_locked_inode(struct inode *inode, void *opaque) { struct ocfs2_find_inode_args *args = opaque; + static struct lock_class_key ocfs2_quota_ip_alloc_sem_key, + ocfs2_file_ip_alloc_sem_key; mlog_entry("inode = %p, opaque = %p\n", inode, opaque); @@ -223,6 +225,15 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque) if (args->fi_sysfile_type != 0) lockdep_set_class(&inode->i_mutex, &ocfs2_sysfile_lock_key[args->fi_sysfile_type]); + if (args->fi_sysfile_type == USER_QUOTA_SYSTEM_INODE || + args->fi_sysfile_type == GROUP_QUOTA_SYSTEM_INODE || + args->fi_sysfile_type == LOCAL_USER_QUOTA_SYSTEM_INODE || + args->fi_sysfile_type == LOCAL_GROUP_QUOTA_SYSTEM_INODE) + lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem, + &ocfs2_quota_ip_alloc_sem_key); + else + lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem, + &ocfs2_file_ip_alloc_sem_key); mlog_exit(0); return 0; diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 4a3b9e6..f033760 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -1880,13 +1880,20 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb) os = &osb->osb_orphan_scan; - status = ocfs2_orphan_scan_lock(osb, &seqno, DLM_LOCK_EX); + if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE) + goto out; + + status = ocfs2_orphan_scan_lock(osb, &seqno); if (status < 0) { if (status != -EAGAIN) mlog_errno(status); goto out; } + /* Do no queue the tasks if the volume is being umounted */ + if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE) + goto unlock; + if (os->os_seqno != seqno) { os->os_seqno = seqno; goto unlock; @@ -1903,7 +1910,7 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb) os->os_count++; os->os_scantime = CURRENT_TIME; unlock: - ocfs2_orphan_scan_unlock(osb, seqno, DLM_LOCK_EX); + ocfs2_orphan_scan_unlock(osb, seqno); out: return; } @@ -1920,8 +1927,9 @@ void ocfs2_orphan_scan_work(struct work_struct *work) mutex_lock(&os->os_lock); ocfs2_queue_orphan_scan(osb); - schedule_delayed_work(&os->os_orphan_scan_work, - ocfs2_orphan_scan_timeout()); + if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE) + schedule_delayed_work(&os->os_orphan_scan_work, + ocfs2_orphan_scan_timeout()); mutex_unlock(&os->os_lock); } @@ -1930,26 +1938,33 @@ void ocfs2_orphan_scan_stop(struct ocfs2_super *osb) struct ocfs2_orphan_scan *os; os = &osb->osb_orphan_scan; - mutex_lock(&os->os_lock); - cancel_delayed_work(&os->os_orphan_scan_work); - mutex_unlock(&os->os_lock); + if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE) { + atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE); + mutex_lock(&os->os_lock); + cancel_delayed_work(&os->os_orphan_scan_work); + mutex_unlock(&os->os_lock); + } } -int ocfs2_orphan_scan_init(struct ocfs2_super *osb) +void ocfs2_orphan_scan_init(struct ocfs2_super *osb) { struct ocfs2_orphan_scan *os; os = &osb->osb_orphan_scan; os->os_osb = osb; os->os_count = 0; + os->os_seqno = 0; os->os_scantime = CURRENT_TIME; mutex_init(&os->os_lock); - - INIT_DELAYED_WORK(&os->os_orphan_scan_work, - ocfs2_orphan_scan_work); - schedule_delayed_work(&os->os_orphan_scan_work, - ocfs2_orphan_scan_timeout()); - return 0; + INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work); + + if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb)) + atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE); + else { + atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE); + schedule_delayed_work(&os->os_orphan_scan_work, + ocfs2_orphan_scan_timeout()); + } } struct ocfs2_orphan_filldir_priv { diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 61045ee..5432c7f 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -144,7 +144,7 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb, } /* Exported only for the journal struct init code in super.c. Do not call. */ -int ocfs2_orphan_scan_init(struct ocfs2_super *osb); +void ocfs2_orphan_scan_init(struct ocfs2_super *osb); void ocfs2_orphan_scan_stop(struct ocfs2_super *osb); void ocfs2_orphan_scan_exit(struct ocfs2_super *osb); diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 33464c6..8601f93 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -118,7 +118,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len, dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno); - status = ocfs2_inode_lock(dir, NULL, 0); + status = ocfs2_inode_lock_nested(dir, NULL, 0, OI_LS_PARENT); if (status < 0) { if (status != -ENOENT) mlog_errno(status); @@ -636,7 +636,7 @@ static int ocfs2_link(struct dentry *old_dentry, if (S_ISDIR(inode->i_mode)) return -EPERM; - err = ocfs2_inode_lock(dir, &parent_fe_bh, 1); + err = ocfs2_inode_lock_nested(dir, &parent_fe_bh, 1, OI_LS_PARENT); if (err < 0) { if (err != -ENOENT) mlog_errno(err); @@ -800,7 +800,8 @@ static int ocfs2_unlink(struct inode *dir, return -EPERM; } - status = ocfs2_inode_lock(dir, &parent_node_bh, 1); + status = ocfs2_inode_lock_nested(dir, &parent_node_bh, 1, + OI_LS_PARENT); if (status < 0) { if (status != -ENOENT) mlog_errno(status); @@ -978,7 +979,8 @@ static int ocfs2_double_lock(struct ocfs2_super *osb, inode1 = tmpinode; } /* lock id2 */ - status = ocfs2_inode_lock(inode2, bh2, 1); + status = ocfs2_inode_lock_nested(inode2, bh2, 1, + OI_LS_RENAME1); if (status < 0) { if (status != -ENOENT) mlog_errno(status); @@ -987,7 +989,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb, } /* lock id1 */ - status = ocfs2_inode_lock(inode1, bh1, 1); + status = ocfs2_inode_lock_nested(inode1, bh1, 1, OI_LS_RENAME2); if (status < 0) { /* * An error return must mean that no cluster locks @@ -1103,7 +1105,8 @@ static int ocfs2_rename(struct inode *old_dir, * won't have to concurrently downconvert the inode and the * dentry locks. */ - status = ocfs2_inode_lock(old_inode, &old_inode_bh, 1); + status = ocfs2_inode_lock_nested(old_inode, &old_inode_bh, 1, + OI_LS_PARENT); if (status < 0) { if (status != -ENOENT) mlog_errno(status); diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 18c1d9e..c9345eb 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -34,6 +34,7 @@ #include <linux/workqueue.h> #include <linux/kref.h> #include <linux/mutex.h> +#include <linux/lockdep.h> #ifndef CONFIG_OCFS2_COMPAT_JBD # include <linux/jbd2.h> #else @@ -152,6 +153,14 @@ struct ocfs2_lock_res { unsigned int l_lock_max_exmode; /* Max wait for EX */ unsigned int l_lock_refresh; /* Disk refreshes */ #endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map l_lockdep_map; +#endif +}; + +enum ocfs2_orphan_scan_state { + ORPHAN_SCAN_ACTIVE, + ORPHAN_SCAN_INACTIVE }; struct ocfs2_orphan_scan { @@ -162,6 +171,7 @@ struct ocfs2_orphan_scan { struct timespec os_scantime; /* time this node ran the scan */ u32 os_count; /* tracks node specific scans */ u32 os_seqno; /* tracks cluster wide scans */ + atomic_t os_state; /* ACTIVE or INACTIVE */ }; struct ocfs2_dlm_debug { diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index fcd120f..3f66137 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c @@ -236,6 +236,16 @@ static int o2cb_dlm_lock_status(union ocfs2_dlm_lksb *lksb) return dlm_status_to_errno(lksb->lksb_o2dlm.status); } +/* + * o2dlm aways has a "valid" LVB. If the dlm loses track of the LVB + * contents, it will zero out the LVB. Thus the caller can always trust + * the contents. + */ +static int o2cb_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb) +{ + return 1; +} + static void *o2cb_dlm_lvb(union ocfs2_dlm_lksb *lksb) { return (void *)(lksb->lksb_o2dlm.lvb); @@ -354,6 +364,7 @@ static struct ocfs2_stack_operations o2cb_stack_ops = { .dlm_lock = o2cb_dlm_lock, .dlm_unlock = o2cb_dlm_unlock, .lock_status = o2cb_dlm_lock_status, + .lvb_valid = o2cb_dlm_lvb_valid, .lock_lvb = o2cb_dlm_lvb, .dump_lksb = o2cb_dump_lksb, }; diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index 9b76d41..ff4c798 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -738,6 +738,13 @@ static int user_dlm_lock_status(union ocfs2_dlm_lksb *lksb) return lksb->lksb_fsdlm.sb_status; } +static int user_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb) +{ + int invalid = lksb->lksb_fsdlm.sb_flags & DLM_SBF_VALNOTVALID; + + return !invalid; +} + static void *user_dlm_lvb(union ocfs2_dlm_lksb *lksb) { if (!lksb->lksb_fsdlm.sb_lvbptr) @@ -873,6 +880,7 @@ static struct ocfs2_stack_operations ocfs2_user_plugin_ops = { .dlm_lock = user_dlm_lock, .dlm_unlock = user_dlm_unlock, .lock_status = user_dlm_lock_status, + .lvb_valid = user_dlm_lvb_valid, .lock_lvb = user_dlm_lvb, .plock = user_plock, .dump_lksb = user_dlm_dump_lksb, diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 68b668b..3f2f1c4 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -6,7 +6,7 @@ * Code which implements an OCFS2 specific interface to underlying * cluster stacks. * - * Copyright (C) 2007 Oracle. All rights reserved. + * Copyright (C) 2007, 2009 Oracle. All rights reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public @@ -271,11 +271,12 @@ int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb) } EXPORT_SYMBOL_GPL(ocfs2_dlm_lock_status); -/* - * Why don't we cast to ocfs2_meta_lvb? The "clean" answer is that we - * don't cast at the glue level. The real answer is that the header - * ordering is nigh impossible. - */ +int ocfs2_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb) +{ + return active_stack->sp_ops->lvb_valid(lksb); +} +EXPORT_SYMBOL_GPL(ocfs2_dlm_lvb_valid); + void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb) { return active_stack->sp_ops->lock_lvb(lksb); diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index c571af3..03a44d6 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h @@ -186,6 +186,11 @@ struct ocfs2_stack_operations { int (*lock_status)(union ocfs2_dlm_lksb *lksb); /* + * Return non-zero if the LVB is valid. + */ + int (*lvb_valid)(union ocfs2_dlm_lksb *lksb); + + /* * Pull the lvb pointer off of the stack-specific lksb. */ void *(*lock_lvb)(union ocfs2_dlm_lksb *lksb); @@ -252,6 +257,7 @@ int ocfs2_dlm_unlock(struct ocfs2_cluster_connection *conn, struct ocfs2_lock_res *astarg); int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb); +int ocfs2_dlm_lvb_valid(union ocfs2_dlm_lksb *lksb); void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb); void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb); diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 8439f6b..73a16d4 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -923,14 +923,23 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, int nr) { struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; + int ret; if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap)) return 0; - if (!buffer_jbd(bg_bh) || !bh2jh(bg_bh)->b_committed_data) + + if (!buffer_jbd(bg_bh)) return 1; + jbd_lock_bh_state(bg_bh); bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data; - return !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); + if (bg) + ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); + else + ret = 1; + jbd_unlock_bh_state(bg_bh); + + return ret; } static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, @@ -1885,6 +1894,7 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle, unsigned int tmp; int journal_type = OCFS2_JOURNAL_ACCESS_WRITE; struct ocfs2_group_desc *undo_bg = NULL; + int cluster_bitmap = 0; mlog_entry_void(); @@ -1905,18 +1915,28 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle, } if (ocfs2_is_cluster_bitmap(alloc_inode)) - undo_bg = (struct ocfs2_group_desc *) bh2jh(group_bh)->b_committed_data; + cluster_bitmap = 1; + + if (cluster_bitmap) { + jbd_lock_bh_state(group_bh); + undo_bg = (struct ocfs2_group_desc *) + bh2jh(group_bh)->b_committed_data; + BUG_ON(!undo_bg); + } tmp = num_bits; while(tmp--) { ocfs2_clear_bit((bit_off + tmp), (unsigned long *) bg->bg_bitmap); - if (ocfs2_is_cluster_bitmap(alloc_inode)) + if (cluster_bitmap) ocfs2_set_bit(bit_off + tmp, (unsigned long *) undo_bg->bg_bitmap); } le16_add_cpu(&bg->bg_free_bits_count, num_bits); + if (cluster_bitmap) + jbd_unlock_bh_state(group_bh); + status = ocfs2_journal_dirty(handle, group_bh); if (status < 0) mlog_errno(status); diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 0d3ed74..7efb349 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -205,11 +205,10 @@ static const match_table_t tokens = { #ifdef CONFIG_DEBUG_FS static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) { - int out = 0; - int i; struct ocfs2_cluster_connection *cconn = osb->cconn; struct ocfs2_recovery_map *rm = osb->recovery_map; - struct ocfs2_orphan_scan *os; + struct ocfs2_orphan_scan *os = &osb->osb_orphan_scan; + int i, out = 0; out += snprintf(buf + out, len - out, "%10s => Id: %-s Uuid: %-s Gen: 0x%X Label: %-s\n", @@ -234,20 +233,24 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) "%10s => Opts: 0x%lX AtimeQuanta: %u\n", "Mount", osb->s_mount_opt, osb->s_atime_quantum); - out += snprintf(buf + out, len - out, - "%10s => Stack: %s Name: %*s Version: %d.%d\n", - "Cluster", - (*osb->osb_cluster_stack == '\0' ? - "o2cb" : osb->osb_cluster_stack), - cconn->cc_namelen, cconn->cc_name, - cconn->cc_version.pv_major, cconn->cc_version.pv_minor); + if (cconn) { + out += snprintf(buf + out, len - out, + "%10s => Stack: %s Name: %*s " + "Version: %d.%d\n", "Cluster", + (*osb->osb_cluster_stack == '\0' ? + "o2cb" : osb->osb_cluster_stack), + cconn->cc_namelen, cconn->cc_name, + cconn->cc_version.pv_major, + cconn->cc_version.pv_minor); + } spin_lock(&osb->dc_task_lock); out += snprintf(buf + out, len - out, "%10s => Pid: %d Count: %lu WakeSeq: %lu " "WorkSeq: %lu\n", "DownCnvt", - task_pid_nr(osb->dc_task), osb->blocked_lock_count, - osb->dc_wake_sequence, osb->dc_work_sequence); + (osb->dc_task ? task_pid_nr(osb->dc_task) : -1), + osb->blocked_lock_count, osb->dc_wake_sequence, + osb->dc_work_sequence); spin_unlock(&osb->dc_task_lock); spin_lock(&osb->osb_lock); @@ -267,14 +270,15 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) out += snprintf(buf + out, len - out, "%10s => Pid: %d Interval: %lu Needs: %d\n", "Commit", - task_pid_nr(osb->commit_task), osb->osb_commit_interval, + (osb->commit_task ? task_pid_nr(osb->commit_task) : -1), + osb->osb_commit_interval, atomic_read(&osb->needs_checkpoint)); out += snprintf(buf + out, len - out, - "%10s => State: %d NumTxns: %d TxnId: %lu\n", + "%10s => State: %d TxnId: %lu NumTxns: %d\n", "Journal", osb->journal->j_state, - atomic_read(&osb->journal->j_num_trans), - osb->journal->j_trans_id); + osb->journal->j_trans_id, + atomic_read(&osb->journal->j_num_trans)); out += snprintf(buf + out, len - out, "%10s => GlobalAllocs: %d LocalAllocs: %d " @@ -300,9 +304,18 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) atomic_read(&osb->s_num_inodes_stolen)); spin_unlock(&osb->osb_lock); + out += snprintf(buf + out, len - out, "OrphanScan => "); + out += snprintf(buf + out, len - out, "Local: %u Global: %u ", + os->os_count, os->os_seqno); + out += snprintf(buf + out, len - out, " Last Scan: "); + if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE) + out += snprintf(buf + out, len - out, "Disabled\n"); + else + out += snprintf(buf + out, len - out, "%lu seconds ago\n", + (get_seconds() - os->os_scantime.tv_sec)); + out += snprintf(buf + out, len - out, "%10s => %3s %10s\n", "Slots", "Num", "RecoGen"); - for (i = 0; i < osb->max_slots; ++i) { out += snprintf(buf + out, len - out, "%10s %c %3d %10d\n", @@ -311,13 +324,6 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len) i, osb->slot_recovery_generations[i]); } - os = &osb->osb_orphan_scan; - out += snprintf(buf + out, len - out, "Orphan Scan=> "); - out += snprintf(buf + out, len - out, "Local: %u Global: %u ", - os->os_count, os->os_seqno); - out += snprintf(buf + out, len - out, " Last Scan: %lu seconds ago\n", - (get_seconds() - os->os_scantime.tv_sec)); - return out; } @@ -1175,6 +1181,9 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) atomic_set(&osb->vol_state, VOLUME_MOUNTED_QUOTAS); wake_up(&osb->osb_mount_event); + /* Start this when the mount is almost sure of being successful */ + ocfs2_orphan_scan_init(osb); + mlog_exit(status); return status; @@ -1810,14 +1819,15 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) debugfs_remove(osb->osb_ctxt); + /* Orphan scan should be stopped as early as possible */ + ocfs2_orphan_scan_stop(osb); + ocfs2_disable_quotas(osb); ocfs2_shutdown_local_alloc(osb); ocfs2_truncate_log_shutdown(osb); - ocfs2_orphan_scan_stop(osb); - /* This will disable recovery and flush any recovery work. */ ocfs2_recovery_exit(osb); @@ -1978,13 +1988,6 @@ static int ocfs2_initialize_super(struct super_block *sb, goto bail; } - status = ocfs2_orphan_scan_init(osb); - if (status) { - mlog(ML_ERROR, "Unable to initialize delayed orphan scan\n"); - mlog_errno(status); - goto bail; - } - init_waitqueue_head(&osb->checkpoint_event); atomic_set(&osb->needs_checkpoint, 0); diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c index ab713eb..40e5370 100644 --- a/fs/ocfs2/sysfile.c +++ b/fs/ocfs2/sysfile.c @@ -50,6 +50,10 @@ static inline int is_in_system_inode_array(struct ocfs2_super *osb, int type, u32 slot); +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static struct lock_class_key ocfs2_sysfile_cluster_lock_key[NUM_SYSTEM_INODES]; +#endif + static inline int is_global_system_inode(int type) { return type >= OCFS2_FIRST_ONLINE_SYSTEM_INODE && @@ -118,6 +122,21 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb, inode = NULL; goto bail; } +#ifdef CONFIG_DEBUG_LOCK_ALLOC + if (type == LOCAL_USER_QUOTA_SYSTEM_INODE || + type == LOCAL_GROUP_QUOTA_SYSTEM_INODE || + type == JOURNAL_SYSTEM_INODE) { + /* Ignore inode lock on these inodes as the lock does not + * really belong to any process and lockdep cannot handle + * that */ + OCFS2_I(inode)->ip_inode_lockres.l_lockdep_map.key = NULL; + } else { + lockdep_init_map(&OCFS2_I(inode)->ip_inode_lockres. + l_lockdep_map, + ocfs2_system_inodes[type].si_name, + &ocfs2_sysfile_cluster_lock_key[type], 0); + } +#endif bail: return inode; @@ -378,63 +378,63 @@ SYSCALL_ALIAS(sys_ftruncate64, SyS_ftruncate64); #endif #endif /* BITS_PER_LONG == 32 */ -SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) + +int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { - struct file *file; - struct inode *inode; - long ret = -EINVAL; + struct inode *inode = file->f_path.dentry->d_inode; + long ret; if (offset < 0 || len <= 0) - goto out; + return -EINVAL; /* Return error if mode is not supported */ - ret = -EOPNOTSUPP; if (mode && !(mode & FALLOC_FL_KEEP_SIZE)) - goto out; + return -EOPNOTSUPP; - ret = -EBADF; - file = fget(fd); - if (!file) - goto out; if (!(file->f_mode & FMODE_WRITE)) - goto out_fput; + return -EBADF; /* * Revalidate the write permissions, in case security policy has * changed since the files were opened. */ ret = security_file_permission(file, MAY_WRITE); if (ret) - goto out_fput; + return ret; - inode = file->f_path.dentry->d_inode; - - ret = -ESPIPE; if (S_ISFIFO(inode->i_mode)) - goto out_fput; + return -ESPIPE; - ret = -ENODEV; /* * Let individual file system decide if it supports preallocation * for directories or not. */ if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) - goto out_fput; + return -ENODEV; - ret = -EFBIG; /* Check for wrap through zero too */ if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0)) - goto out_fput; + return -EFBIG; - if (inode->i_op->fallocate) - ret = inode->i_op->fallocate(inode, mode, offset, len); - else - ret = -EOPNOTSUPP; + if (!inode->i_op->fallocate) + return -EOPNOTSUPP; -out_fput: - fput(file); -out: - return ret; + return inode->i_op->fallocate(inode, mode, offset, len); } + +SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) +{ + struct file *file; + int error = -EBADF; + + file = fget(fd); + if (file) { + error = do_fallocate(file, mode, offset, len); + fput(file); + } + + return error; +} + #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS asmlinkage long SyS_fallocate(long fd, long mode, loff_t offset, loff_t len) { diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 6fd0f47..a14d6cd 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1131,8 +1131,6 @@ static void init_inode(struct inode *inode, struct treepath *path) REISERFS_I(inode)->i_trans_id = 0; REISERFS_I(inode)->i_jl = NULL; mutex_init(&(REISERFS_I(inode)->i_mmap)); - reiserfs_init_acl_access(inode); - reiserfs_init_acl_default(inode); reiserfs_init_xattr_rwsem(inode); if (stat_data_v1(ih)) { @@ -1834,8 +1832,6 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, REISERFS_I(dir)->i_attrs & REISERFS_INHERIT_MASK; sd_attrs_to_i_attrs(REISERFS_I(inode)->i_attrs, inode); mutex_init(&(REISERFS_I(inode)->i_mmap)); - reiserfs_init_acl_access(inode); - reiserfs_init_acl_default(inode); reiserfs_init_xattr_rwsem(inode); /* key to search for correct place for new stat data */ diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c index 238e9d9..18b315d 100644 --- a/fs/reiserfs/resize.c +++ b/fs/reiserfs/resize.c @@ -82,7 +82,6 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) if (reiserfs_allocate_list_bitmaps(s, jbitmap, bmap_nr_new) < 0) { printk ("reiserfs_resize: unable to allocate memory for journal bitmaps\n"); - unlock_super(s); return -ENOMEM; } /* the new journal bitmaps are zero filled, now we copy in the bitmap diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 2969773..d3aeb06 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -529,10 +529,6 @@ static void init_once(void *foo) INIT_LIST_HEAD(&ei->i_prealloc_list); inode_init_once(&ei->vfs_inode); -#ifdef CONFIG_REISERFS_FS_POSIX_ACL - ei->i_acl_access = NULL; - ei->i_acl_default = NULL; -#endif } static int init_inodecache(void) @@ -580,25 +576,6 @@ static void reiserfs_dirty_inode(struct inode *inode) reiserfs_write_unlock(inode->i_sb); } -#ifdef CONFIG_REISERFS_FS_POSIX_ACL -static void reiserfs_clear_inode(struct inode *inode) -{ - struct posix_acl *acl; - - acl = REISERFS_I(inode)->i_acl_access; - if (acl && !IS_ERR(acl)) - posix_acl_release(acl); - REISERFS_I(inode)->i_acl_access = NULL; - - acl = REISERFS_I(inode)->i_acl_default; - if (acl && !IS_ERR(acl)) - posix_acl_release(acl); - REISERFS_I(inode)->i_acl_default = NULL; -} -#else -#define reiserfs_clear_inode NULL -#endif - #ifdef CONFIG_QUOTA static ssize_t reiserfs_quota_write(struct super_block *, int, const char *, size_t, loff_t); @@ -612,7 +589,6 @@ static const struct super_operations reiserfs_sops = { .write_inode = reiserfs_write_inode, .dirty_inode = reiserfs_dirty_inode, .delete_inode = reiserfs_delete_inode, - .clear_inode = reiserfs_clear_inode, .put_super = reiserfs_put_super, .write_super = reiserfs_write_super, .sync_fs = reiserfs_sync_fs, diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index c303c42..35d6e67 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -188,29 +188,6 @@ static void *posix_acl_to_disk(const struct posix_acl *acl, size_t * size) return ERR_PTR(-EINVAL); } -static inline void iset_acl(struct inode *inode, struct posix_acl **i_acl, - struct posix_acl *acl) -{ - spin_lock(&inode->i_lock); - if (*i_acl != ERR_PTR(-ENODATA)) - posix_acl_release(*i_acl); - *i_acl = posix_acl_dup(acl); - spin_unlock(&inode->i_lock); -} - -static inline struct posix_acl *iget_acl(struct inode *inode, - struct posix_acl **i_acl) -{ - struct posix_acl *acl = ERR_PTR(-ENODATA); - - spin_lock(&inode->i_lock); - if (*i_acl != ERR_PTR(-ENODATA)) - acl = posix_acl_dup(*i_acl); - spin_unlock(&inode->i_lock); - - return acl; -} - /* * Inode operation get_posix_acl(). * @@ -220,34 +197,29 @@ static inline struct posix_acl *iget_acl(struct inode *inode, struct posix_acl *reiserfs_get_acl(struct inode *inode, int type) { char *name, *value; - struct posix_acl *acl, **p_acl; + struct posix_acl *acl; int size; int retval; - struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); + + acl = get_cached_acl(inode, type); + if (acl != ACL_NOT_CACHED) + return acl; switch (type) { case ACL_TYPE_ACCESS: name = POSIX_ACL_XATTR_ACCESS; - p_acl = &reiserfs_i->i_acl_access; break; case ACL_TYPE_DEFAULT: name = POSIX_ACL_XATTR_DEFAULT; - p_acl = &reiserfs_i->i_acl_default; break; default: - return ERR_PTR(-EINVAL); + BUG(); } - acl = iget_acl(inode, p_acl); - if (acl && !IS_ERR(acl)) - return acl; - else if (PTR_ERR(acl) == -ENODATA) - return NULL; - size = reiserfs_xattr_get(inode, name, NULL, 0); if (size < 0) { if (size == -ENODATA || size == -ENOSYS) { - *p_acl = ERR_PTR(-ENODATA); + set_cached_acl(inode, type, NULL); return NULL; } return ERR_PTR(size); @@ -262,14 +234,13 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type) /* This shouldn't actually happen as it should have been caught above.. but just in case */ acl = NULL; - *p_acl = ERR_PTR(-ENODATA); } else if (retval < 0) { acl = ERR_PTR(retval); } else { acl = posix_acl_from_disk(value, retval); - if (!IS_ERR(acl)) - iset_acl(inode, p_acl, acl); } + if (!IS_ERR(acl)) + set_cached_acl(inode, type, acl); kfree(value); return acl; @@ -287,10 +258,8 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode, { char *name; void *value = NULL; - struct posix_acl **p_acl; size_t size = 0; int error; - struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); if (S_ISLNK(inode->i_mode)) return -EOPNOTSUPP; @@ -298,7 +267,6 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode, switch (type) { case ACL_TYPE_ACCESS: name = POSIX_ACL_XATTR_ACCESS; - p_acl = &reiserfs_i->i_acl_access; if (acl) { mode_t mode = inode->i_mode; error = posix_acl_equiv_mode(acl, &mode); @@ -313,7 +281,6 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode, break; case ACL_TYPE_DEFAULT: name = POSIX_ACL_XATTR_DEFAULT; - p_acl = &reiserfs_i->i_acl_default; if (!S_ISDIR(inode->i_mode)) return acl ? -EACCES : 0; break; @@ -346,7 +313,7 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode, kfree(value); if (!error) - iset_acl(inode, p_acl, acl); + set_cached_acl(inode, type, acl); return error; } @@ -379,11 +346,8 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, } acl = reiserfs_get_acl(dir, ACL_TYPE_DEFAULT); - if (IS_ERR(acl)) { - if (PTR_ERR(acl) == -ENODATA) - goto apply_umask; + if (IS_ERR(acl)) return PTR_ERR(acl); - } if (acl) { struct posix_acl *acl_copy; @@ -608,6 +608,7 @@ void emergency_remount(void) static DEFINE_IDA(unnamed_dev_ida); static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */ +static int unnamed_dev_start = 0; /* don't bother trying below it */ int set_anon_super(struct super_block *s, void *data) { @@ -618,7 +619,9 @@ int set_anon_super(struct super_block *s, void *data) if (ida_pre_get(&unnamed_dev_ida, GFP_ATOMIC) == 0) return -ENOMEM; spin_lock(&unnamed_dev_lock); - error = ida_get_new(&unnamed_dev_ida, &dev); + error = ida_get_new_above(&unnamed_dev_ida, unnamed_dev_start, &dev); + if (!error) + unnamed_dev_start = dev + 1; spin_unlock(&unnamed_dev_lock); if (error == -EAGAIN) /* We raced and lost with another CPU. */ @@ -629,6 +632,8 @@ int set_anon_super(struct super_block *s, void *data) if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) { spin_lock(&unnamed_dev_lock); ida_remove(&unnamed_dev_ida, dev); + if (unnamed_dev_start > dev) + unnamed_dev_start = dev; spin_unlock(&unnamed_dev_lock); return -EMFILE; } @@ -645,6 +650,8 @@ void kill_anon_super(struct super_block *sb) generic_shutdown_super(sb); spin_lock(&unnamed_dev_lock); ida_remove(&unnamed_dev_ida, slot); + if (slot < unnamed_dev_start) + unnamed_dev_start = slot; spin_unlock(&unnamed_dev_lock); } diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index cfd31e2..adafcf5 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -55,9 +55,9 @@ * ACL support is not implemented. */ +#include "ubifs.h" #include <linux/xattr.h> #include <linux/posix_acl_xattr.h> -#include "ubifs.h" /* * Limit the number of extended attributes per inode so that the total size diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c index e48e9a3..1e06853 100644 --- a/fs/udf/balloc.c +++ b/fs/udf/balloc.c @@ -238,7 +238,7 @@ static int udf_bitmap_prealloc_blocks(struct super_block *sb, mutex_lock(&sbi->s_alloc_mutex); part_len = sbi->s_partmaps[partition].s_partition_len; - if (first_block < 0 || first_block >= part_len) + if (first_block >= part_len) goto out; if (first_block + block_count > part_len) @@ -297,7 +297,7 @@ static int udf_bitmap_new_block(struct super_block *sb, mutex_lock(&sbi->s_alloc_mutex); repeat: - if (goal < 0 || goal >= sbi->s_partmaps[partition].s_partition_len) + if (goal >= sbi->s_partmaps[partition].s_partition_len) goal = 0; nr_groups = bitmap->s_nr_groups; @@ -666,8 +666,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb, int8_t etype = -1; struct udf_inode_info *iinfo; - if (first_block < 0 || - first_block >= sbi->s_partmaps[partition].s_partition_len) + if (first_block >= sbi->s_partmaps[partition].s_partition_len) return 0; iinfo = UDF_I(table); @@ -743,7 +742,7 @@ static int udf_table_new_block(struct super_block *sb, return newblock; mutex_lock(&sbi->s_alloc_mutex); - if (goal < 0 || goal >= sbi->s_partmaps[partition].s_partition_len) + if (goal >= sbi->s_partmaps[partition].s_partition_len) goal = 0; /* We search for the closest matching block to goal. If we find diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c index 703843f..1b88fd5 100644 --- a/fs/udf/lowlevel.c +++ b/fs/udf/lowlevel.c @@ -56,7 +56,12 @@ unsigned long udf_get_last_block(struct super_block *sb) struct block_device *bdev = sb->s_bdev; unsigned long lblock = 0; - if (ioctl_by_bdev(bdev, CDROM_LAST_WRITTEN, (unsigned long) &lblock)) + /* + * ioctl failed or returned obviously bogus value? + * Try using the device size... + */ + if (ioctl_by_bdev(bdev, CDROM_LAST_WRITTEN, (unsigned long) &lblock) || + lblock == 0) lblock = bdev->bd_inode->i_size >> sb->s_blocksize_bits; if (lblock) diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c index 1e9d124..b23a545 100644 --- a/fs/xfs/linux-2.6/xfs_acl.c +++ b/fs/xfs/linux-2.6/xfs_acl.c @@ -25,14 +25,10 @@ #include <linux/posix_acl_xattr.h> -#define XFS_ACL_NOT_CACHED ((void *)-1) - /* * Locking scheme: * - all ACL updates are protected by inode->i_mutex, which is taken before * calling into this file. - * - access and updates to the ip->i_acl and ip->i_default_acl pointers are - * protected by inode->i_lock. */ STATIC struct posix_acl * @@ -102,59 +98,35 @@ xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl) } } -/* - * Update the cached ACL pointer in the inode. - * - * Because we don't hold any locks while reading/writing the attribute - * from/to disk another thread could have raced and updated the cached - * ACL value before us. In that case we release the previous cached value - * and update it with our new value. - */ -STATIC void -xfs_update_cached_acl(struct inode *inode, struct posix_acl **p_acl, - struct posix_acl *acl) -{ - spin_lock(&inode->i_lock); - if (*p_acl && *p_acl != XFS_ACL_NOT_CACHED) - posix_acl_release(*p_acl); - *p_acl = posix_acl_dup(acl); - spin_unlock(&inode->i_lock); -} - struct posix_acl * xfs_get_acl(struct inode *inode, int type) { struct xfs_inode *ip = XFS_I(inode); - struct posix_acl *acl = NULL, **p_acl; + struct posix_acl *acl; struct xfs_acl *xfs_acl; int len = sizeof(struct xfs_acl); char *ea_name; int error; + acl = get_cached_acl(inode, type); + if (acl != ACL_NOT_CACHED) + return acl; + switch (type) { case ACL_TYPE_ACCESS: ea_name = SGI_ACL_FILE; - p_acl = &ip->i_acl; break; case ACL_TYPE_DEFAULT: ea_name = SGI_ACL_DEFAULT; - p_acl = &ip->i_default_acl; break; default: - return ERR_PTR(-EINVAL); + BUG(); } - spin_lock(&inode->i_lock); - if (*p_acl != XFS_ACL_NOT_CACHED) - acl = posix_acl_dup(*p_acl); - spin_unlock(&inode->i_lock); - /* * If we have a cached ACLs value just return it, not need to * go out to the disk. */ - if (acl) - return acl; xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL); if (!xfs_acl) @@ -165,7 +137,7 @@ xfs_get_acl(struct inode *inode, int type) /* * If the attribute doesn't exist make sure we have a negative * cache entry, for any other error assume it is transient and - * leave the cache entry as XFS_ACL_NOT_CACHED. + * leave the cache entry as ACL_NOT_CACHED. */ if (error == -ENOATTR) { acl = NULL; @@ -179,7 +151,7 @@ xfs_get_acl(struct inode *inode, int type) goto out; out_update_cache: - xfs_update_cached_acl(inode, p_acl, acl); + set_cached_acl(inode, type, acl); out: kfree(xfs_acl); return acl; @@ -189,7 +161,6 @@ STATIC int xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) { struct xfs_inode *ip = XFS_I(inode); - struct posix_acl **p_acl; char *ea_name; int error; @@ -199,13 +170,11 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) switch (type) { case ACL_TYPE_ACCESS: ea_name = SGI_ACL_FILE; - p_acl = &ip->i_acl; break; case ACL_TYPE_DEFAULT: if (!S_ISDIR(inode->i_mode)) return acl ? -EACCES : 0; ea_name = SGI_ACL_DEFAULT; - p_acl = &ip->i_default_acl; break; default: return -EINVAL; @@ -242,7 +211,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) } if (!error) - xfs_update_cached_acl(inode, p_acl, acl); + set_cached_acl(inode, type, acl); return error; } @@ -384,30 +353,6 @@ xfs_acl_chmod(struct inode *inode) return error; } -void -xfs_inode_init_acls(struct xfs_inode *ip) -{ - /* - * No need for locking, inode is not live yet. - */ - ip->i_acl = XFS_ACL_NOT_CACHED; - ip->i_default_acl = XFS_ACL_NOT_CACHED; -} - -void -xfs_inode_clear_acls(struct xfs_inode *ip) -{ - /* - * No need for locking here, the inode is not live anymore - * and just about to be freed. - */ - if (ip->i_acl != XFS_ACL_NOT_CACHED) - posix_acl_release(ip->i_acl); - if (ip->i_default_acl != XFS_ACL_NOT_CACHED) - posix_acl_release(ip->i_default_acl); -} - - /* * System xattr handlers. * diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 63dc1f2..947b150 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -46,8 +46,6 @@ extern int xfs_check_acl(struct inode *inode, int mask); extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl); extern int xfs_acl_chmod(struct inode *inode); -extern void xfs_inode_init_acls(struct xfs_inode *ip); -extern void xfs_inode_clear_acls(struct xfs_inode *ip); extern int posix_acl_access_exists(struct inode *inode); extern int posix_acl_default_exists(struct inode *inode); @@ -57,8 +55,6 @@ extern struct xattr_handler xfs_xattr_system_handler; # define xfs_get_acl(inode, type) NULL # define xfs_inherit_acl(inode, default_acl) 0 # define xfs_acl_chmod(inode) 0 -# define xfs_inode_init_acls(ip) -# define xfs_inode_clear_acls(ip) # define posix_acl_access_exists(inode) 0 # define posix_acl_default_exists(inode) 0 #endif /* CONFIG_XFS_POSIX_ACL */ diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 76c540f..5fcec6f 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -83,7 +83,6 @@ xfs_inode_alloc( memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); ip->i_size = 0; ip->i_new_size = 0; - xfs_inode_init_acls(ip); /* * Initialize inode's trace buffers. @@ -560,7 +559,6 @@ xfs_ireclaim( ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(completion_done(&ip->i_flush)); - xfs_inode_clear_acls(ip); kmem_zone_free(xfs_inode_zone, ip); } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 7701670..1804f86 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -273,11 +273,6 @@ typedef struct xfs_inode { /* VFS inode */ struct inode i_vnode; /* embedded VFS inode */ -#ifdef CONFIG_XFS_POSIX_ACL - struct posix_acl *i_acl; - struct posix_acl *i_default_acl; -#endif - /* Trace buffers per inode. */ #ifdef XFS_INODE_TRACE struct ktrace *i_trace; /* general inode trace */ |