diff options
36 files changed, 1629 insertions, 411 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index e2f8c29..c1601e5 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1699,6 +1699,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. The default is to send the implementation identification information. + nfsd.nfs4_disable_idmapping= + [NFSv4] When set to the default of '1', the NFSv4 + server will return only numeric uids and gids to + clients using auth_sys, and will accept numeric uids + and gids from such clients. This is intended to ease + migration from NFSv2/v3. objlayoutdriver.osd_login_prog= [NFS] [OBJLAYOUT] sets the pathname to the program which diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index ad56866..b867862 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -32,24 +32,8 @@ static unsigned char ext4_filetype_table[] = { DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK }; -static int ext4_readdir(struct file *, void *, filldir_t); static int ext4_dx_readdir(struct file *filp, void *dirent, filldir_t filldir); -static int ext4_release_dir(struct inode *inode, - struct file *filp); - -const struct file_operations ext4_dir_operations = { - .llseek = ext4_llseek, - .read = generic_read_dir, - .readdir = ext4_readdir, /* we take BKL. needed?*/ - .unlocked_ioctl = ext4_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = ext4_compat_ioctl, -#endif - .fsync = ext4_sync_file, - .release = ext4_release_dir, -}; - static unsigned char get_dtype(struct super_block *sb, int filetype) { @@ -60,6 +44,26 @@ static unsigned char get_dtype(struct super_block *sb, int filetype) return (ext4_filetype_table[filetype]); } +/** + * Check if the given dir-inode refers to an htree-indexed directory + * (or a directory which chould potentially get coverted to use htree + * indexing). + * + * Return 1 if it is a dx dir, 0 if not + */ +static int is_dx_dir(struct inode *inode) +{ + struct super_block *sb = inode->i_sb; + + if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb, + EXT4_FEATURE_COMPAT_DIR_INDEX) && + ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) || + ((inode->i_size >> sb->s_blocksize_bits) == 1))) + return 1; + + return 0; +} + /* * Return 0 if the directory entry is OK, and 1 if there is a problem * @@ -115,18 +119,13 @@ static int ext4_readdir(struct file *filp, unsigned int offset; int i, stored; struct ext4_dir_entry_2 *de; - struct super_block *sb; int err; struct inode *inode = filp->f_path.dentry->d_inode; + struct super_block *sb = inode->i_sb; int ret = 0; int dir_has_error = 0; - sb = inode->i_sb; - - if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb, - EXT4_FEATURE_COMPAT_DIR_INDEX) && - ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) || - ((inode->i_size >> sb->s_blocksize_bits) == 1))) { + if (is_dx_dir(inode)) { err = ext4_dx_readdir(filp, dirent, filldir); if (err != ERR_BAD_DX_DIR) { ret = err; @@ -254,22 +253,134 @@ out: return ret; } +static inline int is_32bit_api(void) +{ +#ifdef CONFIG_COMPAT + return is_compat_task(); +#else + return (BITS_PER_LONG == 32); +#endif +} + /* * These functions convert from the major/minor hash to an f_pos - * value. + * value for dx directories + * + * Upper layer (for example NFS) should specify FMODE_32BITHASH or + * FMODE_64BITHASH explicitly. On the other hand, we allow ext4 to be mounted + * directly on both 32-bit and 64-bit nodes, under such case, neither + * FMODE_32BITHASH nor FMODE_64BITHASH is specified. + */ +static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor) +{ + if ((filp->f_mode & FMODE_32BITHASH) || + (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) + return major >> 1; + else + return ((__u64)(major >> 1) << 32) | (__u64)minor; +} + +static inline __u32 pos2maj_hash(struct file *filp, loff_t pos) +{ + if ((filp->f_mode & FMODE_32BITHASH) || + (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) + return (pos << 1) & 0xffffffff; + else + return ((pos >> 32) << 1) & 0xffffffff; +} + +static inline __u32 pos2min_hash(struct file *filp, loff_t pos) +{ + if ((filp->f_mode & FMODE_32BITHASH) || + (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) + return 0; + else + return pos & 0xffffffff; +} + +/* + * Return 32- or 64-bit end-of-file for dx directories + */ +static inline loff_t ext4_get_htree_eof(struct file *filp) +{ + if ((filp->f_mode & FMODE_32BITHASH) || + (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) + return EXT4_HTREE_EOF_32BIT; + else + return EXT4_HTREE_EOF_64BIT; +} + + +/* + * ext4_dir_llseek() based on generic_file_llseek() to handle both + * non-htree and htree directories, where the "offset" is in terms + * of the filename hash value instead of the byte offset. * - * Currently we only use major hash numer. This is unfortunate, but - * on 32-bit machines, the same VFS interface is used for lseek and - * llseek, so if we use the 64 bit offset, then the 32-bit versions of - * lseek/telldir/seekdir will blow out spectacularly, and from within - * the ext2 low-level routine, we don't know if we're being called by - * a 64-bit version of the system call or the 32-bit version of the - * system call. Worse yet, NFSv2 only allows for a 32-bit readdir - * cookie. Sigh. + * NOTE: offsets obtained *before* ext4_set_inode_flag(dir, EXT4_INODE_INDEX) + * will be invalid once the directory was converted into a dx directory */ -#define hash2pos(major, minor) (major >> 1) -#define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) -#define pos2min_hash(pos) (0) +loff_t ext4_dir_llseek(struct file *file, loff_t offset, int origin) +{ + struct inode *inode = file->f_mapping->host; + loff_t ret = -EINVAL; + int dx_dir = is_dx_dir(inode); + + mutex_lock(&inode->i_mutex); + + /* NOTE: relative offsets with dx directories might not work + * as expected, as it is difficult to figure out the + * correct offset between dx hashes */ + + switch (origin) { + case SEEK_END: + if (unlikely(offset > 0)) + goto out_err; /* not supported for directories */ + + /* so only negative offsets are left, does that have a + * meaning for directories at all? */ + if (dx_dir) + offset += ext4_get_htree_eof(file); + else + offset += inode->i_size; + break; + case SEEK_CUR: + /* + * Here we special-case the lseek(fd, 0, SEEK_CUR) + * position-querying operation. Avoid rewriting the "same" + * f_pos value back to the file because a concurrent read(), + * write() or lseek() might have altered it + */ + if (offset == 0) { + offset = file->f_pos; + goto out_ok; + } + + offset += file->f_pos; + break; + } + + if (unlikely(offset < 0)) + goto out_err; + + if (!dx_dir) { + if (offset > inode->i_sb->s_maxbytes) + goto out_err; + } else if (offset > ext4_get_htree_eof(file)) + goto out_err; + + /* Special lock needed here? */ + if (offset != file->f_pos) { + file->f_pos = offset; + file->f_version = 0; + } + +out_ok: + ret = offset; +out_err: + mutex_unlock(&inode->i_mutex); + + return ret; +} /* * This structure holds the nodes of the red-black tree used to store @@ -330,15 +441,16 @@ static void free_rb_tree_fname(struct rb_root *root) } -static struct dir_private_info *ext4_htree_create_dir_info(loff_t pos) +static struct dir_private_info *ext4_htree_create_dir_info(struct file *filp, + loff_t pos) { struct dir_private_info *p; p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL); if (!p) return NULL; - p->curr_hash = pos2maj_hash(pos); - p->curr_minor_hash = pos2min_hash(pos); + p->curr_hash = pos2maj_hash(filp, pos); + p->curr_minor_hash = pos2min_hash(filp, pos); return p; } @@ -430,7 +542,7 @@ static int call_filldir(struct file *filp, void *dirent, inode->i_ino, current->comm); return 0; } - curr_pos = hash2pos(fname->hash, fname->minor_hash); + curr_pos = hash2pos(filp, fname->hash, fname->minor_hash); while (fname) { error = filldir(dirent, fname->name, fname->name_len, curr_pos, @@ -455,13 +567,13 @@ static int ext4_dx_readdir(struct file *filp, int ret; if (!info) { - info = ext4_htree_create_dir_info(filp->f_pos); + info = ext4_htree_create_dir_info(filp, filp->f_pos); if (!info) return -ENOMEM; filp->private_data = info; } - if (filp->f_pos == EXT4_HTREE_EOF) + if (filp->f_pos == ext4_get_htree_eof(filp)) return 0; /* EOF */ /* Some one has messed with f_pos; reset the world */ @@ -469,8 +581,8 @@ static int ext4_dx_readdir(struct file *filp, free_rb_tree_fname(&info->root); info->curr_node = NULL; info->extra_fname = NULL; - info->curr_hash = pos2maj_hash(filp->f_pos); - info->curr_minor_hash = pos2min_hash(filp->f_pos); + info->curr_hash = pos2maj_hash(filp, filp->f_pos); + info->curr_minor_hash = pos2min_hash(filp, filp->f_pos); } /* @@ -502,7 +614,7 @@ static int ext4_dx_readdir(struct file *filp, if (ret < 0) return ret; if (ret == 0) { - filp->f_pos = EXT4_HTREE_EOF; + filp->f_pos = ext4_get_htree_eof(filp); break; } info->curr_node = rb_first(&info->root); @@ -522,7 +634,7 @@ static int ext4_dx_readdir(struct file *filp, info->curr_minor_hash = fname->minor_hash; } else { if (info->next_hash == ~0) { - filp->f_pos = EXT4_HTREE_EOF; + filp->f_pos = ext4_get_htree_eof(filp); break; } info->curr_hash = info->next_hash; @@ -541,3 +653,15 @@ static int ext4_release_dir(struct inode *inode, struct file *filp) return 0; } + +const struct file_operations ext4_dir_operations = { + .llseek = ext4_dir_llseek, + .read = generic_read_dir, + .readdir = ext4_readdir, + .unlocked_ioctl = ext4_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = ext4_compat_ioctl, +#endif + .fsync = ext4_sync_file, + .release = ext4_release_dir, +}; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index ded731a..ab2594a 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1623,7 +1623,11 @@ struct dx_hash_info u32 *seed; }; -#define EXT4_HTREE_EOF 0x7fffffff + +/* 32 and 64 bit signed EOF for dx directories */ +#define EXT4_HTREE_EOF_32BIT ((1UL << (32 - 1)) - 1) +#define EXT4_HTREE_EOF_64BIT ((1ULL << (64 - 1)) - 1) + /* * Control parameters used by ext4_htree_next_block diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c index ac8f168..fa8e491 100644 --- a/fs/ext4/hash.c +++ b/fs/ext4/hash.c @@ -200,8 +200,8 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) return -1; } hash = hash & ~1; - if (hash == (EXT4_HTREE_EOF << 1)) - hash = (EXT4_HTREE_EOF-1) << 1; + if (hash == (EXT4_HTREE_EOF_32BIT << 1)) + hash = (EXT4_HTREE_EOF_32BIT - 1) << 1; hinfo->hash = hash; hinfo->minor_hash = minor_hash; return 0; diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 2774e10..f49b9af 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -496,7 +496,7 @@ static int param_set_##name(const char *val, struct kernel_param *kp) \ __typeof__(type) num = which_strtol(val, &endp, 0); \ if (endp == val || *endp || num < (min) || num > (max)) \ return -EINVAL; \ - *((int *) kp->arg) = num; \ + *((type *) kp->arg) = num; \ return 0; \ } diff --git a/fs/nfsd/current_stateid.h b/fs/nfsd/current_stateid.h new file mode 100644 index 0000000..4123551 --- /dev/null +++ b/fs/nfsd/current_stateid.h @@ -0,0 +1,28 @@ +#ifndef _NFSD4_CURRENT_STATE_H +#define _NFSD4_CURRENT_STATE_H + +#include "state.h" +#include "xdr4.h" + +extern void clear_current_stateid(struct nfsd4_compound_state *cstate); +/* + * functions to set current state id + */ +extern void nfsd4_set_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *); +extern void nfsd4_set_openstateid(struct nfsd4_compound_state *, struct nfsd4_open *); +extern void nfsd4_set_lockstateid(struct nfsd4_compound_state *, struct nfsd4_lock *); +extern void nfsd4_set_closestateid(struct nfsd4_compound_state *, struct nfsd4_close *); + +/* + * functions to consume current state id + */ +extern void nfsd4_get_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *); +extern void nfsd4_get_delegreturnstateid(struct nfsd4_compound_state *, struct nfsd4_delegreturn *); +extern void nfsd4_get_freestateid(struct nfsd4_compound_state *, struct nfsd4_free_stateid *); +extern void nfsd4_get_setattrstateid(struct nfsd4_compound_state *, struct nfsd4_setattr *); +extern void nfsd4_get_closestateid(struct nfsd4_compound_state *, struct nfsd4_close *); +extern void nfsd4_get_lockustateid(struct nfsd4_compound_state *, struct nfsd4_locku *); +extern void nfsd4_get_readstateid(struct nfsd4_compound_state *, struct nfsd4_read *); +extern void nfsd4_get_writestateid(struct nfsd4_compound_state *, struct nfsd4_write *); + +#endif /* _NFSD4_CURRENT_STATE_H */ diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index cf8a6bd..8e9689a 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -87,7 +87,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) struct svc_expkey key; struct svc_expkey *ek = NULL; - if (mlen < 1 || mesg[mlen-1] != '\n') + if (mesg[mlen - 1] != '\n') return -EINVAL; mesg[mlen-1] = 0; diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h new file mode 100644 index 0000000..12e0cff --- /dev/null +++ b/fs/nfsd/netns.h @@ -0,0 +1,34 @@ +/* + * per net namespace data structures for nfsd + * + * Copyright (C) 2012, Jeff Layton <jlayton@redhat.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 51 + * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __NFSD_NETNS_H__ +#define __NFSD_NETNS_H__ + +#include <net/net_namespace.h> +#include <net/netns/generic.h> + +struct cld_net; + +struct nfsd_net { + struct cld_net *cld_net; +}; + +extern int nfsd_net_id; +#endif /* __NFSD_NETNS_H__ */ diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 0e262f3..c8e9f63 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -645,7 +645,6 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c .timeout = &timeparms, .program = &cb_program, .version = 0, - .authflavor = clp->cl_flavor, .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), }; struct rpc_clnt *client; @@ -656,6 +655,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c args.client_name = clp->cl_principal; args.prognumber = conn->cb_prog, args.protocol = XPRT_TRANSPORT_TCP; + args.authflavor = clp->cl_flavor; clp->cl_cb_ident = conn->cb_ident; } else { if (!conn->cb_xprt) @@ -665,6 +665,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c args.bc_xprt = conn->cb_xprt; args.prognumber = clp->cl_cb_session->se_cb_prog; args.protocol = XPRT_TRANSPORT_BC_TCP; + args.authflavor = RPC_AUTH_UNIX; } /* Create RPC client */ client = rpc_create(&args); @@ -754,9 +755,9 @@ static void do_probe_callback(struct nfs4_client *clp) */ void nfsd4_probe_callback(struct nfs4_client *clp) { - /* XXX: atomicity? Also, should we be using cl_cb_flags? */ + /* XXX: atomicity? Also, should we be using cl_flags? */ clp->cl_cb_state = NFSD4_CB_UNKNOWN; - set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags); + set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags); do_probe_callback(clp); } @@ -915,7 +916,7 @@ void nfsd4_destroy_callback_queue(void) /* must be called under the state lock */ void nfsd4_shutdown_callback(struct nfs4_client *clp) { - set_bit(NFSD4_CLIENT_KILL, &clp->cl_cb_flags); + set_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags); /* * Note this won't actually result in a null callback; * instead, nfsd4_do_callback_rpc() will detect the killed @@ -966,15 +967,15 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) svc_xprt_put(clp->cl_cb_conn.cb_xprt); clp->cl_cb_conn.cb_xprt = NULL; } - if (test_bit(NFSD4_CLIENT_KILL, &clp->cl_cb_flags)) + if (test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) return; spin_lock(&clp->cl_lock); /* * Only serialized callback code is allowed to clear these * flags; main nfsd code can only set them: */ - BUG_ON(!clp->cl_cb_flags); - clear_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags); + BUG_ON(!(clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK)); + clear_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags); memcpy(&conn, &cb->cb_clp->cl_cb_conn, sizeof(struct nfs4_cb_conn)); c = __nfsd4_find_backchannel(clp); if (c) { @@ -986,7 +987,7 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) err = setup_callback_client(clp, &conn, ses); if (err) { - warn_no_callback_path(clp, err); + nfsd4_mark_cb_down(clp, err); return; } /* Yay, the callback channel's back! Restart any callbacks: */ @@ -1000,7 +1001,7 @@ void nfsd4_do_callback_rpc(struct work_struct *w) struct nfs4_client *clp = cb->cb_clp; struct rpc_clnt *clnt; - if (clp->cl_cb_flags) + if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK) nfsd4_process_cb_update(cb); clnt = clp->cl_cb_client; diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 9409627..322d11c 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -41,6 +41,14 @@ #include "nfsd.h" /* + * Turn off idmapping when using AUTH_SYS. + */ +static bool nfs4_disable_idmapping = true; +module_param(nfs4_disable_idmapping, bool, 0644); +MODULE_PARM_DESC(nfs4_disable_idmapping, + "Turn off server's NFSv4 idmapping when using 'sec=sys'"); + +/* * Cache entry */ @@ -561,28 +569,65 @@ idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name) return ret; } +static bool +numeric_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, uid_t *id) +{ + int ret; + char buf[11]; + + if (namelen + 1 > sizeof(buf)) + /* too long to represent a 32-bit id: */ + return false; + /* Just to make sure it's null-terminated: */ + memcpy(buf, name, namelen); + buf[namelen] = '\0'; + ret = kstrtouint(name, 10, id); + return ret == 0; +} + +static __be32 +do_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, uid_t *id) +{ + if (nfs4_disable_idmapping && rqstp->rq_flavor < RPC_AUTH_GSS) + if (numeric_name_to_id(rqstp, type, name, namelen, id)) + return 0; + /* + * otherwise, fall through and try idmapping, for + * backwards compatibility with clients sending names: + */ + return idmap_name_to_id(rqstp, type, name, namelen, id); +} + +static int +do_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name) +{ + if (nfs4_disable_idmapping && rqstp->rq_flavor < RPC_AUTH_GSS) + return sprintf(name, "%u", id); + return idmap_id_to_name(rqstp, type, id, name); +} + __be32 nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen, __u32 *id) { - return idmap_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, id); + return do_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, id); } __be32 nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen, __u32 *id) { - return idmap_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, id); + return do_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, id); } int nfsd_map_uid_to_name(struct svc_rqst *rqstp, __u32 id, char *name) { - return idmap_id_to_name(rqstp, IDMAP_TYPE_USER, id, name); + return do_id_to_name(rqstp, IDMAP_TYPE_USER, id, name); } int nfsd_map_gid_to_name(struct svc_rqst *rqstp, __u32 id, char *name) { - return idmap_id_to_name(rqstp, IDMAP_TYPE_GROUP, id, name); + return do_id_to_name(rqstp, IDMAP_TYPE_GROUP, id, name); } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 896da74..2ed14df 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -39,6 +39,7 @@ #include "cache.h" #include "xdr4.h" #include "vfs.h" +#include "current_stateid.h" #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -192,10 +193,13 @@ static __be32 nfsd_check_obj_isreg(struct svc_fh *fh) static __be32 do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) { - struct svc_fh resfh; + struct svc_fh *resfh; __be32 status; - fh_init(&resfh, NFS4_FHSIZE); + resfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL); + if (!resfh) + return nfserr_jukebox; + fh_init(resfh, NFS4_FHSIZE); open->op_truncate = 0; if (open->op_create) { @@ -220,7 +224,7 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o */ status = do_nfsd_create(rqstp, current_fh, open->op_fname.data, open->op_fname.len, &open->op_iattr, - &resfh, open->op_createmode, + resfh, open->op_createmode, (u32 *)open->op_verf.data, &open->op_truncate, &open->op_created); @@ -234,30 +238,29 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o FATTR4_WORD1_TIME_MODIFY); } else { status = nfsd_lookup(rqstp, current_fh, - open->op_fname.data, open->op_fname.len, &resfh); + open->op_fname.data, open->op_fname.len, resfh); fh_unlock(current_fh); if (status) goto out; - status = nfsd_check_obj_isreg(&resfh); + status = nfsd_check_obj_isreg(resfh); } if (status) goto out; if (is_create_with_attrs(open) && open->op_acl != NULL) - do_set_nfs4_acl(rqstp, &resfh, open->op_acl, open->op_bmval); - - set_change_info(&open->op_cinfo, current_fh); - fh_dup2(current_fh, &resfh); + do_set_nfs4_acl(rqstp, resfh, open->op_acl, open->op_bmval); /* set reply cache */ fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh, - &resfh.fh_handle); + &resfh->fh_handle); if (!open->op_created) - status = do_open_permission(rqstp, current_fh, open, + status = do_open_permission(rqstp, resfh, open, NFSD_MAY_NOP); - + set_change_info(&open->op_cinfo, current_fh); + fh_dup2(current_fh, resfh); out: - fh_put(&resfh); + fh_put(resfh); + kfree(resfh); return status; } @@ -310,16 +313,14 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL) return nfserr_inval; - /* We don't yet support WANT bits: */ - open->op_share_access &= NFS4_SHARE_ACCESS_MASK; - open->op_created = 0; /* * RFC5661 18.51.3 * Before RECLAIM_COMPLETE done, server should deny new lock */ if (nfsd4_has_session(cstate) && - !cstate->session->se_client->cl_firststate && + !test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, + &cstate->session->se_client->cl_flags) && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) return nfserr_grace; @@ -452,6 +453,10 @@ nfsd4_restorefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return nfserr_restorefh; fh_dup2(&cstate->current_fh, &cstate->save_fh); + if (HAS_STATE_ID(cstate, SAVED_STATE_ID_FLAG)) { + memcpy(&cstate->current_stateid, &cstate->save_stateid, sizeof(stateid_t)); + SET_STATE_ID(cstate, CURRENT_STATE_ID_FLAG); + } return nfs_ok; } @@ -463,6 +468,10 @@ nfsd4_savefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return nfserr_nofilehandle; fh_dup2(&cstate->save_fh, &cstate->current_fh); + if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG)) { + memcpy(&cstate->save_stateid, &cstate->current_stateid, sizeof(stateid_t)); + SET_STATE_ID(cstate, SAVED_STATE_ID_FLAG); + } return nfs_ok; } @@ -481,14 +490,20 @@ nfsd4_access(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &access->ac_supported); } +static void gen_boot_verifier(nfs4_verifier *verifier) +{ + __be32 verf[2]; + + verf[0] = (__be32)nfssvc_boot.tv_sec; + verf[1] = (__be32)nfssvc_boot.tv_usec; + memcpy(verifier->data, verf, sizeof(verifier->data)); +} + static __be32 nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_commit *commit) { - u32 *p = (u32 *)commit->co_verf.data; - *p++ = nfssvc_boot.tv_sec; - *p++ = nfssvc_boot.tv_usec; - + gen_boot_verifier(&commit->co_verf); return nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset, commit->co_count); } @@ -865,7 +880,6 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { stateid_t *stateid = &write->wr_stateid; struct file *filp = NULL; - u32 *p; __be32 status = nfs_ok; unsigned long cnt; @@ -887,9 +901,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, cnt = write->wr_buflen; write->wr_how_written = write->wr_stable_how; - p = (u32 *)write->wr_verifier.data; - *p++ = nfssvc_boot.tv_sec; - *p++ = nfssvc_boot.tv_usec; + gen_boot_verifier(&write->wr_verifier); status = nfsd_write(rqstp, &cstate->current_fh, filp, write->wr_offset, rqstp->rq_vec, write->wr_vlen, @@ -1000,6 +1012,8 @@ static inline void nfsd4_increment_op_stats(u32 opnum) typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *, void *); typedef u32(*nfsd4op_rsize)(struct svc_rqst *, struct nfsd4_op *op); +typedef void(*stateid_setter)(struct nfsd4_compound_state *, void *); +typedef void(*stateid_getter)(struct nfsd4_compound_state *, void *); enum nfsd4_op_flags { ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */ @@ -1025,6 +1039,10 @@ enum nfsd4_op_flags { * the v4.0 case). */ OP_CACHEME = 1 << 6, + /* + * These are ops which clear current state id. + */ + OP_CLEAR_STATEID = 1 << 7, }; struct nfsd4_operation { @@ -1033,11 +1051,15 @@ struct nfsd4_operation { char *op_name; /* Try to get response size before operation */ nfsd4op_rsize op_rsize_bop; + stateid_setter op_get_currentstateid; + stateid_getter op_set_currentstateid; }; static struct nfsd4_operation nfsd4_ops[]; +#ifdef NFSD_DEBUG static const char *nfsd4_op_name(unsigned opnum); +#endif /* * Enforce NFSv4.1 COMPOUND ordering rules: @@ -1215,13 +1237,23 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, if (op->status) goto encode_op; - if (opdesc->op_func) + if (opdesc->op_func) { + if (opdesc->op_get_currentstateid) + opdesc->op_get_currentstateid(cstate, &op->u); op->status = opdesc->op_func(rqstp, cstate, &op->u); - else + } else BUG_ON(op->status == nfs_ok); - if (!op->status && need_wrongsec_check(rqstp)) - op->status = check_nfsd_access(cstate->current_fh.fh_export, rqstp); + if (!op->status) { + if (opdesc->op_set_currentstateid) + opdesc->op_set_currentstateid(cstate, &op->u); + + if (opdesc->op_flags & OP_CLEAR_STATEID) + clear_current_stateid(cstate); + + if (need_wrongsec_check(rqstp)) + op->status = check_nfsd_access(cstate->current_fh.fh_export, rqstp); + } encode_op: /* Only from SEQUENCE */ @@ -1413,6 +1445,8 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_CLOSE", .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize, + .op_get_currentstateid = (stateid_getter)nfsd4_get_closestateid, + .op_set_currentstateid = (stateid_setter)nfsd4_set_closestateid, }, [OP_COMMIT] = { .op_func = (nfsd4op_func)nfsd4_commit, @@ -1422,7 +1456,7 @@ static struct nfsd4_operation nfsd4_ops[] = { }, [OP_CREATE] = { .op_func = (nfsd4op_func)nfsd4_create, - .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, + .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME | OP_CLEAR_STATEID, .op_name = "OP_CREATE", .op_rsize_bop = (nfsd4op_rsize)nfsd4_create_rsize, }, @@ -1431,6 +1465,7 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_DELEGRETURN", .op_rsize_bop = nfsd4_only_status_rsize, + .op_get_currentstateid = (stateid_getter)nfsd4_get_delegreturnstateid, }, [OP_GETATTR] = { .op_func = (nfsd4op_func)nfsd4_getattr, @@ -1453,6 +1488,7 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_LOCK", .op_rsize_bop = (nfsd4op_rsize)nfsd4_lock_rsize, + .op_set_currentstateid = (stateid_setter)nfsd4_set_lockstateid, }, [OP_LOCKT] = { .op_func = (nfsd4op_func)nfsd4_lockt, @@ -1463,15 +1499,16 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_LOCKU", .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize, + .op_get_currentstateid = (stateid_getter)nfsd4_get_lockustateid, }, [OP_LOOKUP] = { .op_func = (nfsd4op_func)nfsd4_lookup, - .op_flags = OP_HANDLES_WRONGSEC, + .op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID, .op_name = "OP_LOOKUP", }, [OP_LOOKUPP] = { .op_func = (nfsd4op_func)nfsd4_lookupp, - .op_flags = OP_HANDLES_WRONGSEC, + .op_flags = OP_HANDLES_WRONGSEC | OP_CLEAR_STATEID, .op_name = "OP_LOOKUPP", }, [OP_NVERIFY] = { @@ -1483,6 +1520,7 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_flags = OP_HANDLES_WRONGSEC | OP_MODIFIES_SOMETHING, .op_name = "OP_OPEN", .op_rsize_bop = (nfsd4op_rsize)nfsd4_open_rsize, + .op_set_currentstateid = (stateid_setter)nfsd4_set_openstateid, }, [OP_OPEN_CONFIRM] = { .op_func = (nfsd4op_func)nfsd4_open_confirm, @@ -1495,25 +1533,30 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_OPEN_DOWNGRADE", .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize, + .op_get_currentstateid = (stateid_getter)nfsd4_get_opendowngradestateid, + .op_set_currentstateid = (stateid_setter)nfsd4_set_opendowngradestateid, }, [OP_PUTFH] = { .op_func = (nfsd4op_func)nfsd4_putfh, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS - | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING, + | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING + | OP_CLEAR_STATEID, .op_name = "OP_PUTFH", .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_PUTPUBFH] = { .op_func = (nfsd4op_func)nfsd4_putrootfh, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS - | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING, + | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING + | OP_CLEAR_STATEID, .op_name = "OP_PUTPUBFH", .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, [OP_PUTROOTFH] = { .op_func = (nfsd4op_func)nfsd4_putrootfh, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS - | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING, + | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING + | OP_CLEAR_STATEID, .op_name = "OP_PUTROOTFH", .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, }, @@ -1522,6 +1565,7 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_READ", .op_rsize_bop = (nfsd4op_rsize)nfsd4_read_rsize, + .op_get_currentstateid = (stateid_getter)nfsd4_get_readstateid, }, [OP_READDIR] = { .op_func = (nfsd4op_func)nfsd4_readdir, @@ -1576,6 +1620,7 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_name = "OP_SETATTR", .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, .op_rsize_bop = (nfsd4op_rsize)nfsd4_setattr_rsize, + .op_get_currentstateid = (stateid_getter)nfsd4_get_setattrstateid, }, [OP_SETCLIENTID] = { .op_func = (nfsd4op_func)nfsd4_setclientid, @@ -1600,6 +1645,7 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, .op_name = "OP_WRITE", .op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize, + .op_get_currentstateid = (stateid_getter)nfsd4_get_writestateid, }, [OP_RELEASE_LOCKOWNER] = { .op_func = (nfsd4op_func)nfsd4_release_lockowner, @@ -1674,12 +1720,14 @@ static struct nfsd4_operation nfsd4_ops[] = { }, }; +#ifdef NFSD_DEBUG static const char *nfsd4_op_name(unsigned opnum) { if (opnum < ARRAY_SIZE(nfsd4_ops)) return nfsd4_ops[opnum].op_name; return "unknown_operation"; } +#endif #define nfsd4_voidres nfsd4_voidargs struct nfsd4_voidargs { int dummy; }; diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 0b3e875..4767429 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2004 The Regents of the University of Michigan. +* Copyright (c) 2012 Jeff Layton <jlayton@redhat.com> * All rights reserved. * * Andy Adamson <andros@citi.umich.edu> @@ -36,16 +37,34 @@ #include <linux/namei.h> #include <linux/crypto.h> #include <linux/sched.h> +#include <linux/fs.h> +#include <linux/module.h> +#include <net/net_namespace.h> +#include <linux/sunrpc/rpc_pipe_fs.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfsd/cld.h> #include "nfsd.h" #include "state.h" #include "vfs.h" +#include "netns.h" #define NFSDDBG_FACILITY NFSDDBG_PROC +/* Declarations */ +struct nfsd4_client_tracking_ops { + int (*init)(struct net *); + void (*exit)(struct net *); + void (*create)(struct nfs4_client *); + void (*remove)(struct nfs4_client *); + int (*check)(struct nfs4_client *); + void (*grace_done)(struct net *, time_t); +}; + /* Globals */ static struct file *rec_file; static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery"; +static struct nfsd4_client_tracking_ops *client_tracking_ops; static int nfs4_save_creds(const struct cred **original_creds) @@ -117,7 +136,8 @@ out_no_tfm: return status; } -void nfsd4_create_clid_dir(struct nfs4_client *clp) +static void +nfsd4_create_clid_dir(struct nfs4_client *clp) { const struct cred *original_cred; char *dname = clp->cl_recdir; @@ -126,9 +146,8 @@ void nfsd4_create_clid_dir(struct nfs4_client *clp) dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname); - if (clp->cl_firststate) + if (test_and_set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) return; - clp->cl_firststate = 1; if (!rec_file) return; status = nfs4_save_creds(&original_cred); @@ -265,19 +284,19 @@ out_unlock: return status; } -void +static void nfsd4_remove_clid_dir(struct nfs4_client *clp) { const struct cred *original_cred; int status; - if (!rec_file || !clp->cl_firststate) + if (!rec_file || !test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) return; status = mnt_want_write_file(rec_file); if (status) goto out; - clp->cl_firststate = 0; + clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); status = nfs4_save_creds(&original_cred); if (status < 0) @@ -292,7 +311,6 @@ out: if (status) printk("NFSD: Failed to remove expired client state directory" " %.*s\n", HEXDIR_LEN, clp->cl_recdir); - return; } static int @@ -311,8 +329,9 @@ purge_old(struct dentry *parent, struct dentry *child) return 0; } -void -nfsd4_recdir_purge_old(void) { +static void +nfsd4_recdir_purge_old(struct net *net, time_t boot_time) +{ int status; if (!rec_file) @@ -343,7 +362,7 @@ load_recdir(struct dentry *parent, struct dentry *child) return 0; } -int +static int nfsd4_recdir_load(void) { int status; @@ -361,8 +380,8 @@ nfsd4_recdir_load(void) { * Hold reference to the recovery directory. */ -void -nfsd4_init_recdir() +static int +nfsd4_init_recdir(void) { const struct cred *original_cred; int status; @@ -377,20 +396,44 @@ nfsd4_init_recdir() printk("NFSD: Unable to change credentials to find recovery" " directory: error %d\n", status); - return; + return status; } rec_file = filp_open(user_recovery_dirname, O_RDONLY | O_DIRECTORY, 0); if (IS_ERR(rec_file)) { printk("NFSD: unable to find recovery directory %s\n", user_recovery_dirname); + status = PTR_ERR(rec_file); rec_file = NULL; } nfs4_reset_creds(original_cred); + return status; } -void +static int +nfsd4_load_reboot_recovery_data(struct net *net) +{ + int status; + + /* XXX: The legacy code won't work in a container */ + if (net != &init_net) { + WARN(1, KERN_ERR "NFSD: attempt to initialize legacy client " + "tracking in a container!\n"); + return -EINVAL; + } + + nfs4_lock_state(); + status = nfsd4_init_recdir(); + if (!status) + status = nfsd4_recdir_load(); + nfs4_unlock_state(); + if (status) + printk(KERN_ERR "NFSD: Failure reading reboot recovery data\n"); + return status; +} + +static void nfsd4_shutdown_recdir(void) { if (!rec_file) @@ -399,6 +442,13 @@ nfsd4_shutdown_recdir(void) rec_file = NULL; } +static void +nfsd4_legacy_tracking_exit(struct net *net) +{ + nfs4_release_reclaim(); + nfsd4_shutdown_recdir(); +} + /* * Change the NFSv4 recovery directory to recdir. */ @@ -425,3 +475,572 @@ nfs4_recoverydir(void) { return user_recovery_dirname; } + +static int +nfsd4_check_legacy_client(struct nfs4_client *clp) +{ + /* did we already find that this client is stable? */ + if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) + return 0; + + /* look for it in the reclaim hashtable otherwise */ + if (nfsd4_find_reclaim_client(clp)) { + set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); + return 0; + } + + return -ENOENT; +} + +static struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = { + .init = nfsd4_load_reboot_recovery_data, + .exit = nfsd4_legacy_tracking_exit, + .create = nfsd4_create_clid_dir, + .remove = nfsd4_remove_clid_dir, + .check = nfsd4_check_legacy_client, + .grace_done = nfsd4_recdir_purge_old, +}; + +/* Globals */ +#define NFSD_PIPE_DIR "nfsd" +#define NFSD_CLD_PIPE "cld" + +/* per-net-ns structure for holding cld upcall info */ +struct cld_net { + struct rpc_pipe *cn_pipe; + spinlock_t cn_lock; + struct list_head cn_list; + unsigned int cn_xid; +}; + +struct cld_upcall { + struct list_head cu_list; + struct cld_net *cu_net; + struct task_struct *cu_task; + struct cld_msg cu_msg; +}; + +static int +__cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg) +{ + int ret; + struct rpc_pipe_msg msg; + + memset(&msg, 0, sizeof(msg)); + msg.data = cmsg; + msg.len = sizeof(*cmsg); + + /* + * Set task state before we queue the upcall. That prevents + * wake_up_process in the downcall from racing with schedule. + */ + set_current_state(TASK_UNINTERRUPTIBLE); + ret = rpc_queue_upcall(pipe, &msg); + if (ret < 0) { + set_current_state(TASK_RUNNING); + goto out; + } + + schedule(); + set_current_state(TASK_RUNNING); + + if (msg.errno < 0) + ret = msg.errno; +out: + return ret; +} + +static int +cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg) +{ + int ret; + + /* + * -EAGAIN occurs when pipe is closed and reopened while there are + * upcalls queued. + */ + do { + ret = __cld_pipe_upcall(pipe, cmsg); + } while (ret == -EAGAIN); + + return ret; +} + +static ssize_t +cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) +{ + struct cld_upcall *tmp, *cup; + struct cld_msg *cmsg = (struct cld_msg *)src; + uint32_t xid; + struct nfsd_net *nn = net_generic(filp->f_dentry->d_sb->s_fs_info, + nfsd_net_id); + struct cld_net *cn = nn->cld_net; + + if (mlen != sizeof(*cmsg)) { + dprintk("%s: got %lu bytes, expected %lu\n", __func__, mlen, + sizeof(*cmsg)); + return -EINVAL; + } + + /* copy just the xid so we can try to find that */ + if (copy_from_user(&xid, &cmsg->cm_xid, sizeof(xid)) != 0) { + dprintk("%s: error when copying xid from userspace", __func__); + return -EFAULT; + } + + /* walk the list and find corresponding xid */ + cup = NULL; + spin_lock(&cn->cn_lock); + list_for_each_entry(tmp, &cn->cn_list, cu_list) { + if (get_unaligned(&tmp->cu_msg.cm_xid) == xid) { + cup = tmp; + list_del_init(&cup->cu_list); + break; + } + } + spin_unlock(&cn->cn_lock); + + /* couldn't find upcall? */ + if (!cup) { + dprintk("%s: couldn't find upcall -- xid=%u\n", __func__, xid); + return -EINVAL; + } + + if (copy_from_user(&cup->cu_msg, src, mlen) != 0) + return -EFAULT; + + wake_up_process(cup->cu_task); + return mlen; +} + +static void +cld_pipe_destroy_msg(struct rpc_pipe_msg *msg) +{ + struct cld_msg *cmsg = msg->data; + struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, + cu_msg); + + /* errno >= 0 means we got a downcall */ + if (msg->errno >= 0) + return; + + wake_up_process(cup->cu_task); +} + +static const struct rpc_pipe_ops cld_upcall_ops = { + .upcall = rpc_pipe_generic_upcall, + .downcall = cld_pipe_downcall, + .destroy_msg = cld_pipe_destroy_msg, +}; + +static struct dentry * +nfsd4_cld_register_sb(struct super_block *sb, struct rpc_pipe *pipe) +{ + struct dentry *dir, *dentry; + + dir = rpc_d_lookup_sb(sb, NFSD_PIPE_DIR); + if (dir == NULL) + return ERR_PTR(-ENOENT); + dentry = rpc_mkpipe_dentry(dir, NFSD_CLD_PIPE, NULL, pipe); + dput(dir); + return dentry; +} + +static void +nfsd4_cld_unregister_sb(struct rpc_pipe *pipe) +{ + if (pipe->dentry) + rpc_unlink(pipe->dentry); +} + +static struct dentry * +nfsd4_cld_register_net(struct net *net, struct rpc_pipe *pipe) +{ + struct super_block *sb; + struct dentry *dentry; + + sb = rpc_get_sb_net(net); + if (!sb) + return NULL; + dentry = nfsd4_cld_register_sb(sb, pipe); + rpc_put_sb_net(net); + return dentry; +} + +static void +nfsd4_cld_unregister_net(struct net *net, struct rpc_pipe *pipe) +{ + struct super_block *sb; + + sb = rpc_get_sb_net(net); + if (sb) { + nfsd4_cld_unregister_sb(pipe); + rpc_put_sb_net(net); + } +} + +/* Initialize rpc_pipefs pipe for communication with client tracking daemon */ +static int +nfsd4_init_cld_pipe(struct net *net) +{ + int ret; + struct dentry *dentry; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct cld_net *cn; + + if (nn->cld_net) + return 0; + + cn = kzalloc(sizeof(*cn), GFP_KERNEL); + if (!cn) { + ret = -ENOMEM; + goto err; + } + + cn->cn_pipe = rpc_mkpipe_data(&cld_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN); + if (IS_ERR(cn->cn_pipe)) { + ret = PTR_ERR(cn->cn_pipe); + goto err; + } + spin_lock_init(&cn->cn_lock); + INIT_LIST_HEAD(&cn->cn_list); + + dentry = nfsd4_cld_register_net(net, cn->cn_pipe); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); + goto err_destroy_data; + } + + cn->cn_pipe->dentry = dentry; + nn->cld_net = cn; + return 0; + +err_destroy_data: + rpc_destroy_pipe_data(cn->cn_pipe); +err: + kfree(cn); + printk(KERN_ERR "NFSD: unable to create nfsdcld upcall pipe (%d)\n", + ret); + return ret; +} + +static void +nfsd4_remove_cld_pipe(struct net *net) +{ + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct cld_net *cn = nn->cld_net; + + nfsd4_cld_unregister_net(net, cn->cn_pipe); + rpc_destroy_pipe_data(cn->cn_pipe); + kfree(nn->cld_net); + nn->cld_net = NULL; +} + +static struct cld_upcall * +alloc_cld_upcall(struct cld_net *cn) +{ + struct cld_upcall *new, *tmp; + + new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) + return new; + + /* FIXME: hard cap on number in flight? */ +restart_search: + spin_lock(&cn->cn_lock); + list_for_each_entry(tmp, &cn->cn_list, cu_list) { + if (tmp->cu_msg.cm_xid == cn->cn_xid) { + cn->cn_xid++; + spin_unlock(&cn->cn_lock); + goto restart_search; + } + } + new->cu_task = current; + new->cu_msg.cm_vers = CLD_UPCALL_VERSION; + put_unaligned(cn->cn_xid++, &new->cu_msg.cm_xid); + new->cu_net = cn; + list_add(&new->cu_list, &cn->cn_list); + spin_unlock(&cn->cn_lock); + + dprintk("%s: allocated xid %u\n", __func__, new->cu_msg.cm_xid); + + return new; +} + +static void +free_cld_upcall(struct cld_upcall *victim) +{ + struct cld_net *cn = victim->cu_net; + + spin_lock(&cn->cn_lock); + list_del(&victim->cu_list); + spin_unlock(&cn->cn_lock); + kfree(victim); +} + +/* Ask daemon to create a new record */ +static void +nfsd4_cld_create(struct nfs4_client *clp) +{ + int ret; + struct cld_upcall *cup; + /* FIXME: determine net from clp */ + struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct cld_net *cn = nn->cld_net; + + /* Don't upcall if it's already stored */ + if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) + return; + + cup = alloc_cld_upcall(cn); + if (!cup) { + ret = -ENOMEM; + goto out_err; + } + + cup->cu_msg.cm_cmd = Cld_Create; + cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len; + memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data, + clp->cl_name.len); + + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); + if (!ret) { + ret = cup->cu_msg.cm_status; + set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); + } + + free_cld_upcall(cup); +out_err: + if (ret) + printk(KERN_ERR "NFSD: Unable to create client " + "record on stable storage: %d\n", ret); +} + +/* Ask daemon to create a new record */ +static void +nfsd4_cld_remove(struct nfs4_client *clp) +{ + int ret; + struct cld_upcall *cup; + /* FIXME: determine net from clp */ + struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct cld_net *cn = nn->cld_net; + + /* Don't upcall if it's already removed */ + if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) + return; + + cup = alloc_cld_upcall(cn); + if (!cup) { + ret = -ENOMEM; + goto out_err; + } + + cup->cu_msg.cm_cmd = Cld_Remove; + cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len; + memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data, + clp->cl_name.len); + + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); + if (!ret) { + ret = cup->cu_msg.cm_status; + clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); + } + + free_cld_upcall(cup); +out_err: + if (ret) + printk(KERN_ERR "NFSD: Unable to remove client " + "record from stable storage: %d\n", ret); +} + +/* Check for presence of a record, and update its timestamp */ +static int +nfsd4_cld_check(struct nfs4_client *clp) +{ + int ret; + struct cld_upcall *cup; + /* FIXME: determine net from clp */ + struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct cld_net *cn = nn->cld_net; + + /* Don't upcall if one was already stored during this grace pd */ + if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) + return 0; + + cup = alloc_cld_upcall(cn); + if (!cup) { + printk(KERN_ERR "NFSD: Unable to check client record on " + "stable storage: %d\n", -ENOMEM); + return -ENOMEM; + } + + cup->cu_msg.cm_cmd = Cld_Check; + cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len; + memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data, + clp->cl_name.len); + + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); + if (!ret) { + ret = cup->cu_msg.cm_status; + set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); + } + + free_cld_upcall(cup); + return ret; +} + +static void +nfsd4_cld_grace_done(struct net *net, time_t boot_time) +{ + int ret; + struct cld_upcall *cup; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct cld_net *cn = nn->cld_net; + + cup = alloc_cld_upcall(cn); + if (!cup) { + ret = -ENOMEM; + goto out_err; + } + + cup->cu_msg.cm_cmd = Cld_GraceDone; + cup->cu_msg.cm_u.cm_gracetime = (int64_t)boot_time; + ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); + if (!ret) + ret = cup->cu_msg.cm_status; + + free_cld_upcall(cup); +out_err: + if (ret) + printk(KERN_ERR "NFSD: Unable to end grace period: %d\n", ret); +} + +static struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = { + .init = nfsd4_init_cld_pipe, + .exit = nfsd4_remove_cld_pipe, + .create = nfsd4_cld_create, + .remove = nfsd4_cld_remove, + .check = nfsd4_cld_check, + .grace_done = nfsd4_cld_grace_done, +}; + +int +nfsd4_client_tracking_init(struct net *net) +{ + int status; + struct path path; + + if (!client_tracking_ops) { + client_tracking_ops = &nfsd4_cld_tracking_ops; + status = kern_path(nfs4_recoverydir(), LOOKUP_FOLLOW, &path); + if (!status) { + if (S_ISDIR(path.dentry->d_inode->i_mode)) + client_tracking_ops = + &nfsd4_legacy_tracking_ops; + path_put(&path); + } + } + + status = client_tracking_ops->init(net); + if (status) { + printk(KERN_WARNING "NFSD: Unable to initialize client " + "recovery tracking! (%d)\n", status); + client_tracking_ops = NULL; + } + return status; +} + +void +nfsd4_client_tracking_exit(struct net *net) +{ + if (client_tracking_ops) { + client_tracking_ops->exit(net); + client_tracking_ops = NULL; + } +} + +void +nfsd4_client_record_create(struct nfs4_client *clp) +{ + if (client_tracking_ops) + client_tracking_ops->create(clp); +} + +void +nfsd4_client_record_remove(struct nfs4_client *clp) +{ + if (client_tracking_ops) + client_tracking_ops->remove(clp); +} + +int +nfsd4_client_record_check(struct nfs4_client *clp) +{ + if (client_tracking_ops) + return client_tracking_ops->check(clp); + + return -EOPNOTSUPP; +} + +void +nfsd4_record_grace_done(struct net *net, time_t boot_time) +{ + if (client_tracking_ops) + client_tracking_ops->grace_done(net, boot_time); +} + +static int +rpc_pipefs_event(struct notifier_block *nb, unsigned long event, void *ptr) +{ + struct super_block *sb = ptr; + struct net *net = sb->s_fs_info; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct cld_net *cn = nn->cld_net; + struct dentry *dentry; + int ret = 0; + + if (!try_module_get(THIS_MODULE)) + return 0; + + if (!cn) { + module_put(THIS_MODULE); + return 0; + } + + switch (event) { + case RPC_PIPEFS_MOUNT: + dentry = nfsd4_cld_register_sb(sb, cn->cn_pipe); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); + break; + } + cn->cn_pipe->dentry = dentry; + break; + case RPC_PIPEFS_UMOUNT: + if (cn->cn_pipe->dentry) + nfsd4_cld_unregister_sb(cn->cn_pipe); + break; + default: + ret = -ENOTSUPP; + break; + } + module_put(THIS_MODULE); + return ret; +} + +struct notifier_block nfsd4_cld_block = { + .notifier_call = rpc_pipefs_event, +}; + +int +register_cld_notifier(void) +{ + return rpc_pipefs_notifier_register(&nfsd4_cld_block); +} + +void +unregister_cld_notifier(void) +{ + rpc_pipefs_notifier_unregister(&nfsd4_cld_block); +} diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c5cddd6..1841f8b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -58,11 +58,15 @@ static const stateid_t one_stateid = { static const stateid_t zero_stateid = { /* all fields zero */ }; +static const stateid_t currentstateid = { + .si_generation = 1, +}; static u64 current_sessionid = 1; #define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t))) #define ONE_STATEID(stateid) (!memcmp((stateid), &one_stateid, sizeof(stateid_t))) +#define CURRENT_STATEID(stateid) (!memcmp((stateid), ¤tstateid, sizeof(stateid_t))) /* forward declarations */ static int check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner); @@ -91,6 +95,19 @@ nfs4_lock_state(void) mutex_lock(&client_mutex); } +static void free_session(struct kref *); + +/* Must be called under the client_lock */ +static void nfsd4_put_session_locked(struct nfsd4_session *ses) +{ + kref_put(&ses->se_ref, free_session); +} + +static void nfsd4_get_session(struct nfsd4_session *ses) +{ + kref_get(&ses->se_ref); +} + void nfs4_unlock_state(void) { @@ -605,12 +622,20 @@ hash_sessionid(struct nfs4_sessionid *sessionid) return sid->sequence % SESSION_HASH_SIZE; } +#ifdef NFSD_DEBUG static inline void dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid) { u32 *ptr = (u32 *)(&sessionid->data[0]); dprintk("%s: %u:%u:%u:%u\n", fn, ptr[0], ptr[1], ptr[2], ptr[3]); } +#else +static inline void +dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid) +{ +} +#endif + static void gen_sessionid(struct nfsd4_session *ses) @@ -832,11 +857,12 @@ static void nfsd4_del_conns(struct nfsd4_session *s) spin_unlock(&clp->cl_lock); } -void free_session(struct kref *kref) +static void free_session(struct kref *kref) { struct nfsd4_session *ses; int mem; + BUG_ON(!spin_is_locked(&client_lock)); ses = container_of(kref, struct nfsd4_session, se_ref); nfsd4_del_conns(ses); spin_lock(&nfsd_drc_lock); @@ -847,6 +873,13 @@ void free_session(struct kref *kref) kfree(ses); } +void nfsd4_put_session(struct nfsd4_session *ses) +{ + spin_lock(&client_lock); + nfsd4_put_session_locked(ses); + spin_unlock(&client_lock); +} + static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, struct nfsd4_create_session *cses) { struct nfsd4_session *new; @@ -894,7 +927,9 @@ static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct n status = nfsd4_new_conn_from_crses(rqstp, new); /* whoops: benny points out, status is ignored! (err, or bogus) */ if (status) { + spin_lock(&client_lock); free_session(&new->se_ref); + spin_unlock(&client_lock); return NULL; } if (cses->flags & SESSION4_BACK_CHAN) { @@ -1006,12 +1041,13 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) static inline void free_client(struct nfs4_client *clp) { + BUG_ON(!spin_is_locked(&client_lock)); while (!list_empty(&clp->cl_sessions)) { struct nfsd4_session *ses; ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, se_perclnt); list_del(&ses->se_perclnt); - nfsd4_put_session(ses); + nfsd4_put_session_locked(ses); } if (clp->cl_cred.cr_group_info) put_group_info(clp->cl_cred.cr_group_info); @@ -1138,12 +1174,12 @@ static void gen_clid(struct nfs4_client *clp) static void gen_confirm(struct nfs4_client *clp) { + __be32 verf[2]; static u32 i; - u32 *p; - p = (u32 *)clp->cl_confirm.data; - *p++ = get_seconds(); - *p++ = i++; + verf[0] = (__be32)get_seconds(); + verf[1] = (__be32)i++; + memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data)); } static struct nfs4_stid *find_stateid(struct nfs4_client *cl, stateid_t *t) @@ -1180,7 +1216,9 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, if (princ) { clp->cl_principal = kstrdup(princ, GFP_KERNEL); if (clp->cl_principal == NULL) { + spin_lock(&client_lock); free_client(clp); + spin_unlock(&client_lock); return NULL; } } @@ -1347,6 +1385,7 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) slot->sl_opcnt = resp->opcnt; slot->sl_status = resp->cstate.status; + slot->sl_flags |= NFSD4_SLOT_INITIALIZED; if (nfsd4_not_cached(resp)) { slot->sl_datalen = 0; return; @@ -1374,15 +1413,12 @@ nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args, struct nfsd4_op *op; struct nfsd4_slot *slot = resp->cstate.slot; - dprintk("--> %s resp->opcnt %d cachethis %u \n", __func__, - resp->opcnt, resp->cstate.slot->sl_cachethis); - /* Encode the replayed sequence operation */ op = &args->ops[resp->opcnt - 1]; nfsd4_encode_operation(resp, op); /* Return nfserr_retry_uncached_rep in next operation. */ - if (args->opcnt > 1 && slot->sl_cachethis == 0) { + if (args->opcnt > 1 && !(slot->sl_flags & NFSD4_SLOT_CACHETHIS)) { op = &args->ops[resp->opcnt++]; op->status = nfserr_retry_uncached_rep; nfsd4_encode_operation(resp, op); @@ -1575,16 +1611,11 @@ check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse) else return nfserr_seq_misordered; } - /* Normal */ + /* Note unsigned 32-bit arithmetic handles wraparound: */ if (likely(seqid == slot_seqid + 1)) return nfs_ok; - /* Replay */ if (seqid == slot_seqid) return nfserr_replay_cache; - /* Wraparound */ - if (seqid == 1 && (slot_seqid + 1) == 0) - return nfs_ok; - /* Misordered replay or misordered new request */ return nfserr_seq_misordered; } @@ -1815,9 +1846,10 @@ nfsd4_destroy_session(struct svc_rqst *r, nfsd4_probe_callback_sync(ses->se_client); nfs4_unlock_state(); + spin_lock(&client_lock); nfsd4_del_conns(ses); - - nfsd4_put_session(ses); + nfsd4_put_session_locked(ses); + spin_unlock(&client_lock); status = nfs_ok; out: dprintk("%s returns %d\n", __func__, ntohl(status)); @@ -1921,8 +1953,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, * sr_highest_slotid and the sr_target_slot id to maxslots */ seq->maxslots = session->se_fchannel.maxreqs; - status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_inuse); + status = check_slot_seqid(seq->seqid, slot->sl_seqid, + slot->sl_flags & NFSD4_SLOT_INUSE); if (status == nfserr_replay_cache) { + status = nfserr_seq_misordered; + if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED)) + goto out; cstate->slot = slot; cstate->session = session; /* Return the cached reply status and set cstate->status @@ -1938,9 +1974,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, conn = NULL; /* Success! bump slot seqid */ - slot->sl_inuse = true; slot->sl_seqid = seq->seqid; - slot->sl_cachethis = seq->cachethis; + slot->sl_flags |= NFSD4_SLOT_INUSE; + if (seq->cachethis) + slot->sl_flags |= NFSD4_SLOT_CACHETHIS; + else + slot->sl_flags &= ~NFSD4_SLOT_CACHETHIS; cstate->slot = slot; cstate->session = session; @@ -2030,7 +2069,8 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta nfs4_lock_state(); status = nfserr_complete_already; - if (cstate->session->se_client->cl_firststate) + if (test_and_set_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, + &cstate->session->se_client->cl_flags)) goto out; status = nfserr_stale_clientid; @@ -2045,7 +2085,7 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta goto out; status = nfs_ok; - nfsd4_create_clid_dir(cstate->session->se_client); + nfsd4_client_record_create(cstate->session->se_client); out: nfs4_unlock_state(); return status; @@ -2240,7 +2280,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, conf = find_confirmed_client_by_str(unconf->cl_recdir, hash); if (conf) { - nfsd4_remove_clid_dir(conf); + nfsd4_client_record_remove(conf); expire_client(conf); } move_to_confirmed(unconf); @@ -2633,8 +2673,6 @@ nfs4_check_delegmode(struct nfs4_delegation *dp, int flags) static int share_access_to_flags(u32 share_access) { - share_access &= ~NFS4_SHARE_WANT_MASK; - return share_access == NFS4_SHARE_ACCESS_READ ? RD_STATE : WR_STATE; } @@ -2776,10 +2814,9 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *c static void -nfs4_set_claim_prev(struct nfsd4_open *open) +nfs4_set_claim_prev(struct nfsd4_open *open, bool has_session) { open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; - open->op_openowner->oo_owner.so_client->cl_firststate = 1; } /* Should we give out recallable state?: */ @@ -2855,6 +2892,27 @@ static int nfs4_set_delegation(struct nfs4_delegation *dp, int flag) return 0; } +static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) +{ + open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT; + if (status == -EAGAIN) + open->op_why_no_deleg = WND4_CONTENTION; + else { + open->op_why_no_deleg = WND4_RESOURCE; + switch (open->op_deleg_want) { + case NFS4_SHARE_WANT_READ_DELEG: + case NFS4_SHARE_WANT_WRITE_DELEG: + case NFS4_SHARE_WANT_ANY_DELEG: + break; + case NFS4_SHARE_WANT_CANCEL: + open->op_why_no_deleg = WND4_CANCELLED; + break; + case NFS4_SHARE_WANT_NO_DELEG: + BUG(); /* not supposed to get here */ + } + } +} + /* * Attempt to hand out a delegation. */ @@ -2864,7 +2922,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_ol_ struct nfs4_delegation *dp; struct nfs4_openowner *oo = container_of(stp->st_stateowner, struct nfs4_openowner, oo_owner); int cb_up; - int status, flag = 0; + int status = 0, flag = 0; cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client); flag = NFS4_OPEN_DELEGATE_NONE; @@ -2905,11 +2963,16 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_ol_ dprintk("NFSD: delegation stateid=" STATEID_FMT "\n", STATEID_VAL(&dp->dl_stid.sc_stateid)); out: - if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS - && flag == NFS4_OPEN_DELEGATE_NONE - && open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) - dprintk("NFSD: WARNING: refusing delegation reclaim\n"); open->op_delegate_type = flag; + if (flag == NFS4_OPEN_DELEGATE_NONE) { + if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && + open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) + dprintk("NFSD: WARNING: refusing delegation reclaim\n"); + + /* 4.1 client asking for a delegation? */ + if (open->op_deleg_want) + nfsd4_open_deleg_none_ext(open, status); + } return; out_free: nfs4_put_delegation(dp); @@ -2918,6 +2981,24 @@ out_no_deleg: goto out; } +static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open, + struct nfs4_delegation *dp) +{ + if (open->op_deleg_want == NFS4_SHARE_WANT_READ_DELEG && + dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) { + open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT; + open->op_why_no_deleg = WND4_NOT_SUPP_DOWNGRADE; + } else if (open->op_deleg_want == NFS4_SHARE_WANT_WRITE_DELEG && + dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) { + open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT; + open->op_why_no_deleg = WND4_NOT_SUPP_UPGRADE; + } + /* Otherwise the client must be confused wanting a delegation + * it already has, therefore we don't return + * NFS4_OPEN_DELEGATE_NONE_EXT and reason. + */ +} + /* * called with nfs4_lock_state() held. */ @@ -2979,24 +3060,36 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf update_stateid(&stp->st_stid.sc_stateid); memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); - if (nfsd4_has_session(&resp->cstate)) + if (nfsd4_has_session(&resp->cstate)) { open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; + if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) { + open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT; + open->op_why_no_deleg = WND4_NOT_WANTED; + goto nodeleg; + } + } + /* * Attempt to hand out a delegation. No error return, because the * OPEN succeeds even if we fail. */ nfs4_open_delegation(current_fh, open, stp); - +nodeleg: status = nfs_ok; dprintk("%s: stateid=" STATEID_FMT "\n", __func__, STATEID_VAL(&stp->st_stid.sc_stateid)); out: + /* 4.1 client trying to upgrade/downgrade delegation? */ + if (open->op_delegate_type == NFS4_OPEN_DELEGATE_NONE && dp && + open->op_deleg_want) + nfsd4_deleg_xgrade_none_ext(open, dp); + if (fp) put_nfs4_file(fp); if (status == 0 && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) - nfs4_set_claim_prev(open); + nfs4_set_claim_prev(open, nfsd4_has_session(&resp->cstate)); /* * To finish the open response, we just need to set the rflags. */ @@ -3066,7 +3159,7 @@ static void nfsd4_end_grace(void) { dprintk("NFSD: end of grace period\n"); - nfsd4_recdir_purge_old(); + nfsd4_record_grace_done(&init_net, boot_time); locks_end_grace(&nfsd4_manager); /* * Now that every NFSv4 client has had the chance to recover and @@ -3115,7 +3208,7 @@ nfs4_laundromat(void) clp = list_entry(pos, struct nfs4_client, cl_lru); dprintk("NFSD: purging unused client (clientid %08x)\n", clp->cl_clientid.cl_id); - nfsd4_remove_clid_dir(clp); + nfsd4_client_record_remove(clp); expire_client(clp); } spin_lock(&recall_lock); @@ -3400,7 +3493,14 @@ __be32 nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_test_stateid *test_stateid) { - /* real work is done during encoding */ + struct nfsd4_test_stateid_id *stateid; + struct nfs4_client *cl = cstate->session->se_client; + + nfs4_lock_state(); + list_for_each_entry(stateid, &test_stateid->ts_stateid_list, ts_id_list) + stateid->ts_id_status = nfs4_validate_stateid(cl, &stateid->ts_id_stateid); + nfs4_unlock_state(); + return nfs_ok; } @@ -3539,7 +3639,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n", __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid)); - nfsd4_create_clid_dir(oo->oo_owner.so_client); + nfsd4_client_record_create(oo->oo_owner.so_client); status = nfs_ok; out: if (!cstate->replay_owner) @@ -3596,7 +3696,9 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, cstate->current_fh.fh_dentry->d_name.name); /* We don't yet support WANT bits: */ - od->od_share_access &= NFS4_SHARE_ACCESS_MASK; + if (od->od_deleg_want) + dprintk("NFSD: %s: od_deleg_want=0x%x ignored\n", __func__, + od->od_deleg_want); nfs4_lock_state(); status = nfs4_preprocess_confirmed_seqid_op(cstate, od->od_seqid, @@ -4353,7 +4455,9 @@ nfs4_has_reclaimed_state(const char *name, bool use_exchange_id) struct nfs4_client *clp; clp = find_confirmed_client_by_str(name, strhashval); - return clp ? 1 : 0; + if (!clp) + return 0; + return test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); } /* @@ -4377,7 +4481,7 @@ nfs4_client_to_reclaim(const char *name) return 1; } -static void +void nfs4_release_reclaim(void) { struct nfs4_client_reclaim *crp = NULL; @@ -4397,19 +4501,12 @@ nfs4_release_reclaim(void) /* * called from OPEN, CLAIM_PREVIOUS with a new clientid. */ -static struct nfs4_client_reclaim * -nfs4_find_reclaim_client(clientid_t *clid) +struct nfs4_client_reclaim * +nfsd4_find_reclaim_client(struct nfs4_client *clp) { unsigned int strhashval; - struct nfs4_client *clp; struct nfs4_client_reclaim *crp = NULL; - - /* find clientid in conf_id_hashtbl */ - clp = find_confirmed_client(clid); - if (clp == NULL) - return NULL; - dprintk("NFSD: nfs4_find_reclaim_client for %.*s with recdir %s\n", clp->cl_name.len, clp->cl_name.data, clp->cl_recdir); @@ -4430,7 +4527,14 @@ nfs4_find_reclaim_client(clientid_t *clid) __be32 nfs4_check_open_reclaim(clientid_t *clid) { - return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad; + struct nfs4_client *clp; + + /* find clientid in conf_id_hashtbl */ + clp = find_confirmed_client(clid); + if (clp == NULL) + return nfserr_reclaim_bad; + + return nfsd4_client_record_check(clp) ? nfserr_reclaim_bad : nfs_ok; } #ifdef CONFIG_NFSD_FAULT_INJECTION @@ -4442,7 +4546,7 @@ void nfsd_forget_clients(u64 num) nfs4_lock_state(); list_for_each_entry_safe(clp, next, &client_lru, cl_lru) { - nfsd4_remove_clid_dir(clp); + nfsd4_client_record_remove(clp); expire_client(clp); if (++count == num) break; @@ -4577,19 +4681,6 @@ nfs4_state_init(void) reclaim_str_hashtbl_size = 0; } -static void -nfsd4_load_reboot_recovery_data(void) -{ - int status; - - nfs4_lock_state(); - nfsd4_init_recdir(); - status = nfsd4_recdir_load(); - nfs4_unlock_state(); - if (status) - printk("NFSD: Failure reading reboot recovery data\n"); -} - /* * Since the lifetime of a delegation isn't limited to that of an open, a * client may quite reasonably hang on to a delegation as long as it has @@ -4613,21 +4704,34 @@ set_max_delegations(void) /* initialization to perform when the nfsd service is started: */ -static int -__nfs4_state_start(void) +int +nfs4_state_start(void) { int ret; + /* + * FIXME: For now, we hang most of the pernet global stuff off of + * init_net until nfsd is fully containerized. Eventually, we'll + * need to pass a net pointer into this function, take a reference + * to that instead and then do most of the rest of this on a per-net + * basis. + */ + get_net(&init_net); + nfsd4_client_tracking_init(&init_net); boot_time = get_seconds(); locks_start_grace(&nfsd4_manager); printk(KERN_INFO "NFSD: starting %ld-second grace period\n", nfsd4_grace); ret = set_callback_cred(); - if (ret) - return -ENOMEM; + if (ret) { + ret = -ENOMEM; + goto out_recovery; + } laundry_wq = create_singlethread_workqueue("nfsd4"); - if (laundry_wq == NULL) - return -ENOMEM; + if (laundry_wq == NULL) { + ret = -ENOMEM; + goto out_recovery; + } ret = nfsd4_create_callback_queue(); if (ret) goto out_free_laundry; @@ -4636,16 +4740,12 @@ __nfs4_state_start(void) return 0; out_free_laundry: destroy_workqueue(laundry_wq); +out_recovery: + nfsd4_client_tracking_exit(&init_net); + put_net(&init_net); return ret; } -int -nfs4_state_start(void) -{ - nfsd4_load_reboot_recovery_data(); - return __nfs4_state_start(); -} - static void __nfs4_state_shutdown(void) { @@ -4676,7 +4776,8 @@ __nfs4_state_shutdown(void) unhash_delegation(dp); } - nfsd4_shutdown_recdir(); + nfsd4_client_tracking_exit(&init_net); + put_net(&init_net); } void @@ -4686,8 +4787,108 @@ nfs4_state_shutdown(void) destroy_workqueue(laundry_wq); locks_end_grace(&nfsd4_manager); nfs4_lock_state(); - nfs4_release_reclaim(); __nfs4_state_shutdown(); nfs4_unlock_state(); nfsd4_destroy_callback_queue(); } + +static void +get_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid) +{ + if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG) && CURRENT_STATEID(stateid)) + memcpy(stateid, &cstate->current_stateid, sizeof(stateid_t)); +} + +static void +put_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid) +{ + if (cstate->minorversion) { + memcpy(&cstate->current_stateid, stateid, sizeof(stateid_t)); + SET_STATE_ID(cstate, CURRENT_STATE_ID_FLAG); + } +} + +void +clear_current_stateid(struct nfsd4_compound_state *cstate) +{ + CLEAR_STATE_ID(cstate, CURRENT_STATE_ID_FLAG); +} + +/* + * functions to set current state id + */ +void +nfsd4_set_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *odp) +{ + put_stateid(cstate, &odp->od_stateid); +} + +void +nfsd4_set_openstateid(struct nfsd4_compound_state *cstate, struct nfsd4_open *open) +{ + put_stateid(cstate, &open->op_stateid); +} + +void +nfsd4_set_closestateid(struct nfsd4_compound_state *cstate, struct nfsd4_close *close) +{ + put_stateid(cstate, &close->cl_stateid); +} + +void +nfsd4_set_lockstateid(struct nfsd4_compound_state *cstate, struct nfsd4_lock *lock) +{ + put_stateid(cstate, &lock->lk_resp_stateid); +} + +/* + * functions to consume current state id + */ + +void +nfsd4_get_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *odp) +{ + get_stateid(cstate, &odp->od_stateid); +} + +void +nfsd4_get_delegreturnstateid(struct nfsd4_compound_state *cstate, struct nfsd4_delegreturn *drp) +{ + get_stateid(cstate, &drp->dr_stateid); +} + +void +nfsd4_get_freestateid(struct nfsd4_compound_state *cstate, struct nfsd4_free_stateid *fsp) +{ + get_stateid(cstate, &fsp->fr_stateid); +} + +void +nfsd4_get_setattrstateid(struct nfsd4_compound_state *cstate, struct nfsd4_setattr *setattr) +{ + get_stateid(cstate, &setattr->sa_stateid); +} + +void +nfsd4_get_closestateid(struct nfsd4_compound_state *cstate, struct nfsd4_close *close) +{ + get_stateid(cstate, &close->cl_stateid); +} + +void +nfsd4_get_lockustateid(struct nfsd4_compound_state *cstate, struct nfsd4_locku *locku) +{ + get_stateid(cstate, &locku->lu_stateid); +} + +void +nfsd4_get_readstateid(struct nfsd4_compound_state *cstate, struct nfsd4_read *read) +{ + get_stateid(cstate, &read->rd_stateid); +} + +void +nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, struct nfsd4_write *write) +{ + get_stateid(cstate, &write->wr_stateid); +} diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 0ec5a1b..bcd8904 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -133,22 +133,6 @@ xdr_error: \ } \ } while (0) -static void save_buf(struct nfsd4_compoundargs *argp, struct nfsd4_saved_compoundargs *savep) -{ - savep->p = argp->p; - savep->end = argp->end; - savep->pagelen = argp->pagelen; - savep->pagelist = argp->pagelist; -} - -static void restore_buf(struct nfsd4_compoundargs *argp, struct nfsd4_saved_compoundargs *savep) -{ - argp->p = savep->p; - argp->end = savep->end; - argp->pagelen = savep->pagelen; - argp->pagelist = savep->pagelist; -} - static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) { /* We want more bytes than seem to be available. @@ -638,14 +622,18 @@ nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup DECODE_TAIL; } -static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *x) +static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *share_access, u32 *deleg_want, u32 *deleg_when) { __be32 *p; u32 w; READ_BUF(4); READ32(w); - *x = w; + *share_access = w & NFS4_SHARE_ACCESS_MASK; + *deleg_want = w & NFS4_SHARE_WANT_MASK; + if (deleg_when) + *deleg_when = w & NFS4_SHARE_WHEN_MASK; + switch (w & NFS4_SHARE_ACCESS_MASK) { case NFS4_SHARE_ACCESS_READ: case NFS4_SHARE_ACCESS_WRITE: @@ -673,6 +661,9 @@ static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *x) w &= ~NFS4_SHARE_WANT_MASK; if (!w) return nfs_ok; + + if (!deleg_when) /* open_downgrade */ + return nfserr_inval; switch (w) { case NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL: case NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED: @@ -719,6 +710,7 @@ static __be32 nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) { DECODE_HEAD; + u32 dummy; memset(open->op_bmval, 0, sizeof(open->op_bmval)); open->op_iattr.ia_valid = 0; @@ -727,7 +719,9 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) /* seqid, share_access, share_deny, clientid, ownerlen */ READ_BUF(4); READ32(open->op_seqid); - status = nfsd4_decode_share_access(argp, &open->op_share_access); + /* decode, yet ignore deleg_when until supported */ + status = nfsd4_decode_share_access(argp, &open->op_share_access, + &open->op_deleg_want, &dummy); if (status) goto xdr_error; status = nfsd4_decode_share_deny(argp, &open->op_share_deny); @@ -755,14 +749,14 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) goto out; break; case NFS4_CREATE_EXCLUSIVE: - READ_BUF(8); - COPYMEM(open->op_verf.data, 8); + READ_BUF(NFS4_VERIFIER_SIZE); + COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE); break; case NFS4_CREATE_EXCLUSIVE4_1: if (argp->minorversion < 1) goto xdr_error; - READ_BUF(8); - COPYMEM(open->op_verf.data, 8); + READ_BUF(NFS4_VERIFIER_SIZE); + COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE); status = nfsd4_decode_fattr(argp, open->op_bmval, &open->op_iattr, &open->op_acl); if (status) @@ -848,7 +842,8 @@ nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_d return status; READ_BUF(4); READ32(open_down->od_seqid); - status = nfsd4_decode_share_access(argp, &open_down->od_share_access); + status = nfsd4_decode_share_access(argp, &open_down->od_share_access, + &open_down->od_deleg_want, NULL); if (status) return status; status = nfsd4_decode_share_deny(argp, &open_down->od_share_deny); @@ -994,8 +989,8 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient { DECODE_HEAD; - READ_BUF(8); - COPYMEM(setclientid->se_verf.data, 8); + READ_BUF(NFS4_VERIFIER_SIZE); + COPYMEM(setclientid->se_verf.data, NFS4_VERIFIER_SIZE); status = nfsd4_decode_opaque(argp, &setclientid->se_name); if (status) @@ -1020,9 +1015,9 @@ nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_s { DECODE_HEAD; - READ_BUF(8 + sizeof(nfs4_verifier)); + READ_BUF(8 + NFS4_VERIFIER_SIZE); COPYMEM(&scd_c->sc_clientid, 8); - COPYMEM(&scd_c->sc_confirm, sizeof(nfs4_verifier)); + COPYMEM(&scd_c->sc_confirm, NFS4_VERIFIER_SIZE); DECODE_TAIL; } @@ -1385,26 +1380,29 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, static __be32 nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_stateid *test_stateid) { - unsigned int nbytes; - stateid_t si; int i; - __be32 *p; - __be32 status; + __be32 *p, status; + struct nfsd4_test_stateid_id *stateid; READ_BUF(4); test_stateid->ts_num_ids = ntohl(*p++); - nbytes = test_stateid->ts_num_ids * sizeof(stateid_t); - if (nbytes > (u32)((char *)argp->end - (char *)argp->p)) - goto xdr_error; - - test_stateid->ts_saved_args = argp; - save_buf(argp, &test_stateid->ts_savedp); + INIT_LIST_HEAD(&test_stateid->ts_stateid_list); for (i = 0; i < test_stateid->ts_num_ids; i++) { - status = nfsd4_decode_stateid(argp, &si); + stateid = kmalloc(sizeof(struct nfsd4_test_stateid_id), GFP_KERNEL); + if (!stateid) { + status = PTR_ERR(stateid); + goto out; + } + + defer_free(argp, kfree, stateid); + INIT_LIST_HEAD(&stateid->ts_id_list); + list_add_tail(&stateid->ts_id_list, &test_stateid->ts_stateid_list); + + status = nfsd4_decode_stateid(argp, &stateid->ts_id_stateid); if (status) - return status; + goto out; } status = 0; @@ -2661,8 +2659,8 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ __be32 *p; if (!nfserr) { - RESERVE_SPACE(8); - WRITEMEM(commit->co_verf.data, 8); + RESERVE_SPACE(NFS4_VERIFIER_SIZE); + WRITEMEM(commit->co_verf.data, NFS4_VERIFIER_SIZE); ADJUST_ARGS(); } return nfserr; @@ -2851,6 +2849,20 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op WRITE32(0); /* XXX: is NULL principal ok? */ ADJUST_ARGS(); break; + case NFS4_OPEN_DELEGATE_NONE_EXT: /* 4.1 */ + switch (open->op_why_no_deleg) { + case WND4_CONTENTION: + case WND4_RESOURCE: + RESERVE_SPACE(8); + WRITE32(open->op_why_no_deleg); + WRITE32(0); /* deleg signaling not supported yet */ + break; + default: + RESERVE_SPACE(4); + WRITE32(open->op_why_no_deleg); + } + ADJUST_ARGS(); + break; default: BUG(); } @@ -3008,7 +3020,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 if (resp->xbuf->page_len) return nfserr_resource; - RESERVE_SPACE(8); /* verifier */ + RESERVE_SPACE(NFS4_VERIFIER_SIZE); savep = p; /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */ @@ -3209,9 +3221,9 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n __be32 *p; if (!nfserr) { - RESERVE_SPACE(8 + sizeof(nfs4_verifier)); + RESERVE_SPACE(8 + NFS4_VERIFIER_SIZE); WRITEMEM(&scd->se_clientid, 8); - WRITEMEM(&scd->se_confirm, sizeof(nfs4_verifier)); + WRITEMEM(&scd->se_confirm, NFS4_VERIFIER_SIZE); ADJUST_ARGS(); } else if (nfserr == nfserr_clid_inuse) { @@ -3232,7 +3244,7 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w RESERVE_SPACE(16); WRITE32(write->wr_bytes_written); WRITE32(write->wr_how_written); - WRITEMEM(write->wr_verifier.data, 8); + WRITEMEM(write->wr_verifier.data, NFS4_VERIFIER_SIZE); ADJUST_ARGS(); } return nfserr; @@ -3391,30 +3403,17 @@ __be32 nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_test_stateid *test_stateid) { - struct nfsd4_compoundargs *argp; - struct nfs4_client *cl = resp->cstate.session->se_client; - stateid_t si; + struct nfsd4_test_stateid_id *stateid, *next; __be32 *p; - int i; - int valid; - - restore_buf(test_stateid->ts_saved_args, &test_stateid->ts_savedp); - argp = test_stateid->ts_saved_args; - RESERVE_SPACE(4); + RESERVE_SPACE(4 + (4 * test_stateid->ts_num_ids)); *p++ = htonl(test_stateid->ts_num_ids); - resp->p = p; - nfs4_lock_state(); - for (i = 0; i < test_stateid->ts_num_ids; i++) { - nfsd4_decode_stateid(argp, &si); - valid = nfs4_validate_stateid(cl, &si); - RESERVE_SPACE(4); - *p++ = htonl(valid); - resp->p = p; + list_for_each_entry_safe(stateid, next, &test_stateid->ts_stateid_list, ts_id_list) { + *p++ = htonl(stateid->ts_id_status); } - nfs4_unlock_state(); + ADJUST_ARGS(); return nfserr; } @@ -3532,7 +3531,7 @@ int nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad) if (length > session->se_fchannel.maxresp_sz) return nfserr_rep_too_big; - if (slot->sl_cachethis == 1 && + if ((slot->sl_flags & NFSD4_SLOT_CACHETHIS) && length > session->se_fchannel.maxresp_cached) return nfserr_rep_too_big_to_cache; @@ -3656,8 +3655,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo if (nfsd4_has_session(cs)) { if (cs->status != nfserr_replay_cache) { nfsd4_store_cache_entry(resp); - dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__); - cs->slot->sl_inuse = false; + cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE; } /* Renew the clientid on success and on replay */ release_session_client(cs->session); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 64c24af..2c53be6 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -13,12 +13,14 @@ #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/gss_api.h> #include <linux/sunrpc/gss_krb5_enctypes.h> +#include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/module.h> #include "idmap.h" #include "nfsd.h" #include "cache.h" #include "fault_inject.h" +#include "netns.h" /* * We have a single directory with several nodes in it. @@ -1124,14 +1126,26 @@ static int create_proc_exports_entry(void) } #endif +int nfsd_net_id; +static struct pernet_operations nfsd_net_ops = { + .id = &nfsd_net_id, + .size = sizeof(struct nfsd_net), +}; + static int __init init_nfsd(void) { int retval; printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n"); - retval = nfsd4_init_slabs(); + retval = register_cld_notifier(); if (retval) return retval; + retval = register_pernet_subsys(&nfsd_net_ops); + if (retval < 0) + goto out_unregister_notifier; + retval = nfsd4_init_slabs(); + if (retval) + goto out_unregister_pernet; nfs4_state_init(); retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */ if (retval) @@ -1169,6 +1183,10 @@ out_free_stat: nfsd_fault_inject_cleanup(); out_free_slabs: nfsd4_free_slabs(); +out_unregister_pernet: + unregister_pernet_subsys(&nfsd_net_ops); +out_unregister_notifier: + unregister_cld_notifier(); return retval; } @@ -1184,6 +1202,8 @@ static void __exit exit_nfsd(void) nfsd4_free_slabs(); nfsd_fault_inject_cleanup(); unregister_filesystem(&nfsd_fs_type); + unregister_pernet_subsys(&nfsd_net_ops); + unregister_cld_notifier(); } MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 1d1e858..1671429 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -364,12 +364,17 @@ static inline u32 nfsd_suppattrs2(u32 minorversion) NFSD_WRITEABLE_ATTRS_WORD2 extern int nfsd4_is_junction(struct dentry *dentry); -#else +extern int register_cld_notifier(void); +extern void unregister_cld_notifier(void); +#else /* CONFIG_NFSD_V4 */ static inline int nfsd4_is_junction(struct dentry *dentry) { return 0; } +#define register_cld_notifier() 0 +#define unregister_cld_notifier() do { } while(0) + #endif /* CONFIG_NFSD_V4 */ #endif /* LINUX_NFSD_NFSD_H */ diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index fce472f..28dfad3 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -307,33 +307,37 @@ static void set_max_drc(void) dprintk("%s nfsd_drc_max_mem %u \n", __func__, nfsd_drc_max_mem); } -int nfsd_create_serv(void) +static int nfsd_get_default_max_blksize(void) { - int err = 0; + struct sysinfo i; + unsigned long long target; + unsigned long ret; + + si_meminfo(&i); + target = (i.totalram - i.totalhigh) << PAGE_SHIFT; + /* + * Aim for 1/4096 of memory per thread This gives 1MB on 4Gig + * machines, but only uses 32K on 128M machines. Bottom out at + * 8K on 32M and smaller. Of course, this is only a default. + */ + target >>= 12; + + ret = NFSSVC_MAXBLKSIZE; + while (ret > target && ret >= 8*1024*2) + ret /= 2; + return ret; +} +int nfsd_create_serv(void) +{ WARN_ON(!mutex_is_locked(&nfsd_mutex)); if (nfsd_serv) { svc_get(nfsd_serv); return 0; } - if (nfsd_max_blksize == 0) { - /* choose a suitable default */ - struct sysinfo i; - si_meminfo(&i); - /* Aim for 1/4096 of memory per thread - * This gives 1MB on 4Gig machines - * But only uses 32K on 128M machines. - * Bottom out at 8K on 32M and smaller. - * Of course, this is only a default. - */ - nfsd_max_blksize = NFSSVC_MAXBLKSIZE; - i.totalram <<= PAGE_SHIFT - 12; - while (nfsd_max_blksize > i.totalram && - nfsd_max_blksize >= 8*1024*2) - nfsd_max_blksize /= 2; - } + if (nfsd_max_blksize == 0) + nfsd_max_blksize = nfsd_get_default_max_blksize(); nfsd_reset_versions(); - nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, nfsd_last_thread, nfsd, THIS_MODULE); if (nfsd_serv == NULL) @@ -341,7 +345,7 @@ int nfsd_create_serv(void) set_max_drc(); do_gettimeofday(&nfssvc_boot); /* record boot time */ - return err; + return 0; } int nfsd_nrpools(void) diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index ffb5df1..89ab137 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -128,12 +128,14 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s) (NFSD_CACHE_SIZE_SLOTS_PER_SESSION * NFSD_SLOT_CACHE_SIZE) struct nfsd4_slot { - bool sl_inuse; - bool sl_cachethis; - u16 sl_opcnt; u32 sl_seqid; __be32 sl_status; u32 sl_datalen; + u16 sl_opcnt; +#define NFSD4_SLOT_INUSE (1 << 0) +#define NFSD4_SLOT_CACHETHIS (1 << 1) +#define NFSD4_SLOT_INITIALIZED (1 << 2) + u8 sl_flags; char sl_data[]; }; @@ -196,18 +198,7 @@ struct nfsd4_session { struct nfsd4_slot *se_slots[]; /* forward channel slots */ }; -static inline void -nfsd4_put_session(struct nfsd4_session *ses) -{ - extern void free_session(struct kref *kref); - kref_put(&ses->se_ref, free_session); -} - -static inline void -nfsd4_get_session(struct nfsd4_session *ses) -{ - kref_get(&ses->se_ref); -} +extern void nfsd4_put_session(struct nfsd4_session *ses); /* formatted contents of nfs4_sessionid */ struct nfsd4_sessionid { @@ -245,14 +236,17 @@ struct nfs4_client { struct svc_cred cl_cred; /* setclientid principal */ clientid_t cl_clientid; /* generated by server */ nfs4_verifier cl_confirm; /* generated by server */ - u32 cl_firststate; /* recovery dir creation */ u32 cl_minorversion; /* for v4.0 and v4.1 callbacks: */ struct nfs4_cb_conn cl_cb_conn; -#define NFSD4_CLIENT_CB_UPDATE 1 -#define NFSD4_CLIENT_KILL 2 - unsigned long cl_cb_flags; +#define NFSD4_CLIENT_CB_UPDATE (0) +#define NFSD4_CLIENT_CB_KILL (1) +#define NFSD4_CLIENT_STABLE (2) /* client on stable storage */ +#define NFSD4_CLIENT_RECLAIM_COMPLETE (3) /* reclaim_complete done */ +#define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \ + 1 << NFSD4_CLIENT_CB_KILL) + unsigned long cl_flags; struct rpc_clnt *cl_cb_client; u32 cl_cb_ident; #define NFSD4_CB_UP 0 @@ -463,6 +457,8 @@ extern __be32 nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, extern void nfs4_lock_state(void); extern void nfs4_unlock_state(void); extern int nfs4_in_grace(void); +extern void nfs4_release_reclaim(void); +extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(struct nfs4_client *crp); extern __be32 nfs4_check_open_reclaim(clientid_t *clid); extern void nfs4_free_openowner(struct nfs4_openowner *); extern void nfs4_free_lockowner(struct nfs4_lockowner *); @@ -477,16 +473,17 @@ extern void nfsd4_destroy_callback_queue(void); extern void nfsd4_shutdown_callback(struct nfs4_client *); extern void nfs4_put_delegation(struct nfs4_delegation *dp); extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname); -extern void nfsd4_init_recdir(void); -extern int nfsd4_recdir_load(void); -extern void nfsd4_shutdown_recdir(void); extern int nfs4_client_to_reclaim(const char *name); extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id); -extern void nfsd4_recdir_purge_old(void); -extern void nfsd4_create_clid_dir(struct nfs4_client *clp); -extern void nfsd4_remove_clid_dir(struct nfs4_client *clp); extern void release_session_client(struct nfsd4_session *); extern __be32 nfs4_validate_stateid(struct nfs4_client *, stateid_t *); extern void nfsd4_purge_closed_stateid(struct nfs4_stateowner *); +/* nfs4recover operations */ +extern int nfsd4_client_tracking_init(struct net *net); +extern void nfsd4_client_tracking_exit(struct net *net); +extern void nfsd4_client_record_create(struct nfs4_client *clp); +extern void nfsd4_client_record_remove(struct nfs4_client *clp); +extern int nfsd4_client_record_check(struct nfs4_client *clp); +extern void nfsd4_record_grace_done(struct net *net, time_t boot_time); #endif /* NFSD4_STATE_H */ diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index e59f71d..296d671 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -737,12 +737,13 @@ static int nfsd_open_break_lease(struct inode *inode, int access) /* * Open an existing file or directory. - * The access argument indicates the type of open (read/write/lock) + * The may_flags argument indicates the type of open (read/write/lock) + * and additional flags. * N.B. After this call fhp needs an fh_put */ __be32 nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, - int access, struct file **filp) + int may_flags, struct file **filp) { struct dentry *dentry; struct inode *inode; @@ -757,7 +758,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, * and (hopefully) checked permission - so allow OWNER_OVERRIDE * in case a chmod has now revoked permission. */ - err = fh_verify(rqstp, fhp, type, access | NFSD_MAY_OWNER_OVERRIDE); + err = fh_verify(rqstp, fhp, type, may_flags | NFSD_MAY_OWNER_OVERRIDE); if (err) goto out; @@ -768,7 +769,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, * or any access when mandatory locking enabled */ err = nfserr_perm; - if (IS_APPEND(inode) && (access & NFSD_MAY_WRITE)) + if (IS_APPEND(inode) && (may_flags & NFSD_MAY_WRITE)) goto out; /* * We must ignore files (but only files) which might have mandatory @@ -781,12 +782,12 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, if (!inode->i_fop) goto out; - host_err = nfsd_open_break_lease(inode, access); + host_err = nfsd_open_break_lease(inode, may_flags); if (host_err) /* NOMEM or WOULDBLOCK */ goto out_nfserr; - if (access & NFSD_MAY_WRITE) { - if (access & NFSD_MAY_READ) + if (may_flags & NFSD_MAY_WRITE) { + if (may_flags & NFSD_MAY_READ) flags = O_RDWR|O_LARGEFILE; else flags = O_WRONLY|O_LARGEFILE; @@ -795,8 +796,15 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, flags, current_cred()); if (IS_ERR(*filp)) host_err = PTR_ERR(*filp); - else - host_err = ima_file_check(*filp, access); + else { + host_err = ima_file_check(*filp, may_flags); + + if (may_flags & NFSD_MAY_64BIT_COOKIE) + (*filp)->f_mode |= FMODE_64BITHASH; + else + (*filp)->f_mode |= FMODE_32BITHASH; + } + out_nfserr: err = nfserrno(host_err); out: @@ -2021,8 +2029,13 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, __be32 err; struct file *file; loff_t offset = *offsetp; + int may_flags = NFSD_MAY_READ; + + /* NFSv2 only supports 32 bit cookies */ + if (rqstp->rq_vers > 2) + may_flags |= NFSD_MAY_64BIT_COOKIE; - err = nfsd_open(rqstp, fhp, S_IFDIR, NFSD_MAY_READ, &file); + err = nfsd_open(rqstp, fhp, S_IFDIR, may_flags, &file); if (err) goto out; diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 1dcd238..ec0611b 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -27,6 +27,8 @@ #define NFSD_MAY_BYPASS_GSS 0x400 #define NFSD_MAY_READ_IF_EXEC 0x800 +#define NFSD_MAY_64BIT_COOKIE 0x1000 /* 64 bit readdir cookies for >= NFSv3 */ + #define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE) #define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC) diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 2364747..1b35015 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -43,6 +43,13 @@ #define NFSD4_MAX_TAGLEN 128 #define XDR_LEN(n) (((n) + 3) & ~3) +#define CURRENT_STATE_ID_FLAG (1<<0) +#define SAVED_STATE_ID_FLAG (1<<1) + +#define SET_STATE_ID(c, f) ((c)->sid_flags |= (f)) +#define HAS_STATE_ID(c, f) ((c)->sid_flags & (f)) +#define CLEAR_STATE_ID(c, f) ((c)->sid_flags &= ~(f)) + struct nfsd4_compound_state { struct svc_fh current_fh; struct svc_fh save_fh; @@ -54,6 +61,10 @@ struct nfsd4_compound_state { size_t iovlen; u32 minorversion; u32 status; + stateid_t current_stateid; + stateid_t save_stateid; + /* to indicate current and saved state id presents */ + u32 sid_flags; }; static inline bool nfsd4_has_session(struct nfsd4_compound_state *cs) @@ -212,16 +223,19 @@ struct nfsd4_open { struct xdr_netobj op_fname; /* request - everything but CLAIM_PREV */ u32 op_delegate_type; /* request - CLAIM_PREV only */ stateid_t op_delegate_stateid; /* request - response */ + u32 op_why_no_deleg; /* response - DELEG_NONE_EXT only */ u32 op_create; /* request */ u32 op_createmode; /* request */ u32 op_bmval[3]; /* request */ struct iattr iattr; /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */ - nfs4_verifier verf; /* EXCLUSIVE4 */ + nfs4_verifier op_verf __attribute__((aligned(32))); + /* EXCLUSIVE4 */ clientid_t op_clientid; /* request */ struct xdr_netobj op_owner; /* request */ u32 op_seqid; /* request */ u32 op_share_access; /* request */ u32 op_share_deny; /* request */ + u32 op_deleg_want; /* request */ stateid_t op_stateid; /* response */ u32 op_recall; /* recall */ struct nfsd4_change_info op_cinfo; /* response */ @@ -234,7 +248,6 @@ struct nfsd4_open { struct nfs4_acl *op_acl; }; #define op_iattr iattr -#define op_verf verf struct nfsd4_open_confirm { stateid_t oc_req_stateid /* request */; @@ -245,8 +258,9 @@ struct nfsd4_open_confirm { struct nfsd4_open_downgrade { stateid_t od_stateid; u32 od_seqid; - u32 od_share_access; - u32 od_share_deny; + u32 od_share_access; /* request */ + u32 od_deleg_want; /* request */ + u32 od_share_deny; /* request */ }; @@ -343,10 +357,15 @@ struct nfsd4_saved_compoundargs { struct page **pagelist; }; +struct nfsd4_test_stateid_id { + __be32 ts_id_status; + stateid_t ts_id_stateid; + struct list_head ts_id_list; +}; + struct nfsd4_test_stateid { __be32 ts_num_ids; - struct nfsd4_compoundargs *ts_saved_args; - struct nfsd4_saved_compoundargs ts_savedp; + struct list_head ts_stateid_list; }; struct nfsd4_free_stateid { @@ -503,7 +522,8 @@ static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp) static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp) { - return !resp->cstate.slot->sl_cachethis || nfsd4_is_solo_sequence(resp); + return !(resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS) + || nfsd4_is_solo_sequence(resp); } #define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs) diff --git a/include/linux/fs.h b/include/linux/fs.h index c437f91..135693e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -92,6 +92,10 @@ struct inodes_stat_t { /* File is opened using open(.., 3, ..) and is writeable only for ioctls (specialy hack for floppy.c) */ #define FMODE_WRITE_IOCTL ((__force fmode_t)0x100) +/* 32bit hashes as llseek() offset (for directories) */ +#define FMODE_32BITHASH ((__force fmode_t)0x200) +/* 64bit hashes as llseek() offset (for directories) */ +#define FMODE_64BITHASH ((__force fmode_t)0x400) /* * Don't update ctime and mtime. diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 834df8b..0987146 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -438,7 +438,20 @@ enum limit_by4 { enum open_delegation_type4 { NFS4_OPEN_DELEGATE_NONE = 0, NFS4_OPEN_DELEGATE_READ = 1, - NFS4_OPEN_DELEGATE_WRITE = 2 + NFS4_OPEN_DELEGATE_WRITE = 2, + NFS4_OPEN_DELEGATE_NONE_EXT = 3, /* 4.1 */ +}; + +enum why_no_delegation4 { /* new to v4.1 */ + WND4_NOT_WANTED = 0, + WND4_CONTENTION = 1, + WND4_RESOURCE = 2, + WND4_NOT_SUPP_FTYPE = 3, + WND4_WRITE_DELEG_NOT_SUPP_FTYPE = 4, + WND4_NOT_SUPP_UPGRADE = 5, + WND4_NOT_SUPP_DOWNGRADE = 6, + WND4_CANCELLED = 7, + WND4_IS_DIR = 8, }; enum lock_type4 { diff --git a/include/linux/nfsd/cld.h b/include/linux/nfsd/cld.h new file mode 100644 index 0000000..f14a9ab --- /dev/null +++ b/include/linux/nfsd/cld.h @@ -0,0 +1,56 @@ +/* + * Upcall description for nfsdcld communication + * + * Copyright (c) 2012 Red Hat, Inc. + * Author(s): Jeff Layton <jlayton@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _NFSD_CLD_H +#define _NFSD_CLD_H + +/* latest upcall version available */ +#define CLD_UPCALL_VERSION 1 + +/* defined by RFC3530 */ +#define NFS4_OPAQUE_LIMIT 1024 + +enum cld_command { + Cld_Create, /* create a record for this cm_id */ + Cld_Remove, /* remove record of this cm_id */ + Cld_Check, /* is this cm_id allowed? */ + Cld_GraceDone, /* grace period is complete */ +}; + +/* representation of long-form NFSv4 client ID */ +struct cld_name { + uint16_t cn_len; /* length of cm_id */ + unsigned char cn_id[NFS4_OPAQUE_LIMIT]; /* client-provided */ +} __attribute__((packed)); + +/* message struct for communication with userspace */ +struct cld_msg { + uint8_t cm_vers; /* upcall version */ + uint8_t cm_cmd; /* upcall command */ + int16_t cm_status; /* return code */ + uint32_t cm_xid; /* transaction id */ + union { + int64_t cm_gracetime; /* grace period start time */ + struct cld_name cm_name; + } __attribute__((packed)) cm_u; +} __attribute__((packed)); + +#endif /* !_NFSD_CLD_H */ diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index c14fe86..0b8e3e6 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -190,7 +190,7 @@ extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *, extern void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *, int); extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int); extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int, - u32, u64, u32); + __be32, __be64, u32); extern void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *, struct rpcrdma_msg *, struct rpcrdma_msg *, @@ -292,7 +292,7 @@ svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp) if (wr_ary) { rp_ary = (struct rpcrdma_write_array *) &wr_ary-> - wc_array[wr_ary->wc_nchunks].wc_target.rs_length; + wc_array[ntohl(wr_ary->wc_nchunks)].wc_target.rs_length; goto found_it; } diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index f21ece0..de0b0f3 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -830,6 +830,8 @@ static ssize_t cache_do_downcall(char *kaddr, const char __user *buf, { ssize_t ret; + if (count == 0) + return -EINVAL; if (copy_from_user(kaddr, buf, count)) return -EFAULT; kaddr[count] = '\0'; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index c84c0e0..0af37fc 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -1014,6 +1014,7 @@ enum { RPCAUTH_statd, RPCAUTH_nfsd4_cb, RPCAUTH_cache, + RPCAUTH_nfsd, RPCAUTH_RootEOF }; @@ -1046,6 +1047,10 @@ static const struct rpc_filelist files[] = { .name = "cache", .mode = S_IFDIR | S_IRUGO | S_IXUGO, }, + [RPCAUTH_nfsd] = { + .name = "nfsd", + .mode = S_IFDIR | S_IRUGO | S_IXUGO, + }, }; /* diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index bcd574f..521d8f7 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -507,7 +507,7 @@ static int unix_gid_parse(struct cache_detail *cd, time_t expiry; struct unix_gid ug, *ugp; - if (mlen <= 0 || mesg[mlen-1] != '\n') + if (mesg[mlen - 1] != '\n') return -EINVAL; mesg[mlen-1] = 0; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 40ae884..824d32f 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1381,8 +1381,6 @@ void svc_sock_update_bufs(struct svc_serv *serv) spin_lock_bh(&serv->sv_lock); list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list) set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); - list_for_each_entry(svsk, &serv->sv_tempsocks, sk_xprt.xpt_list) - set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); spin_unlock_bh(&serv->sv_lock); } EXPORT_SYMBOL_GPL(svc_sock_update_bufs); diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index 09af4fa..8343737 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -47,6 +47,7 @@ #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/sched.h> #include <linux/sunrpc/svc_rdma.h> +#include "xprt_rdma.h" #define RPCDBG_FACILITY RPCDBG_SVCXPRT diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c index 9530ef2..8d2eddd 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c +++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c @@ -60,21 +60,11 @@ static u32 *decode_read_list(u32 *va, u32 *vaend) struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va; while (ch->rc_discrim != xdr_zero) { - u64 ch_offset; - if (((unsigned long)ch + sizeof(struct rpcrdma_read_chunk)) > (unsigned long)vaend) { dprintk("svcrdma: vaend=%p, ch=%p\n", vaend, ch); return NULL; } - - ch->rc_discrim = ntohl(ch->rc_discrim); - ch->rc_position = ntohl(ch->rc_position); - ch->rc_target.rs_handle = ntohl(ch->rc_target.rs_handle); - ch->rc_target.rs_length = ntohl(ch->rc_target.rs_length); - va = (u32 *)&ch->rc_target.rs_offset; - xdr_decode_hyper(va, &ch_offset); - put_unaligned(ch_offset, (u64 *)va); ch++; } return (u32 *)&ch->rc_position; @@ -91,7 +81,7 @@ void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *ch, *byte_count = 0; *ch_count = 0; for (; ch->rc_discrim != 0; ch++) { - *byte_count = *byte_count + ch->rc_target.rs_length; + *byte_count = *byte_count + ntohl(ch->rc_target.rs_length); *ch_count = *ch_count + 1; } } @@ -108,7 +98,8 @@ void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *ch, */ static u32 *decode_write_list(u32 *va, u32 *vaend) { - int ch_no; + int nchunks; + struct rpcrdma_write_array *ary = (struct rpcrdma_write_array *)va; @@ -121,37 +112,24 @@ static u32 *decode_write_list(u32 *va, u32 *vaend) dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend); return NULL; } - ary->wc_discrim = ntohl(ary->wc_discrim); - ary->wc_nchunks = ntohl(ary->wc_nchunks); + nchunks = ntohl(ary->wc_nchunks); if (((unsigned long)&ary->wc_array[0] + - (sizeof(struct rpcrdma_write_chunk) * ary->wc_nchunks)) > + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > (unsigned long)vaend) { dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n", - ary, ary->wc_nchunks, vaend); + ary, nchunks, vaend); return NULL; } - for (ch_no = 0; ch_no < ary->wc_nchunks; ch_no++) { - u64 ch_offset; - - ary->wc_array[ch_no].wc_target.rs_handle = - ntohl(ary->wc_array[ch_no].wc_target.rs_handle); - ary->wc_array[ch_no].wc_target.rs_length = - ntohl(ary->wc_array[ch_no].wc_target.rs_length); - va = (u32 *)&ary->wc_array[ch_no].wc_target.rs_offset; - xdr_decode_hyper(va, &ch_offset); - put_unaligned(ch_offset, (u64 *)va); - } - /* * rs_length is the 2nd 4B field in wc_target and taking its * address skips the list terminator */ - return (u32 *)&ary->wc_array[ch_no].wc_target.rs_length; + return (u32 *)&ary->wc_array[nchunks].wc_target.rs_length; } static u32 *decode_reply_array(u32 *va, u32 *vaend) { - int ch_no; + int nchunks; struct rpcrdma_write_array *ary = (struct rpcrdma_write_array *)va; @@ -164,28 +142,15 @@ static u32 *decode_reply_array(u32 *va, u32 *vaend) dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend); return NULL; } - ary->wc_discrim = ntohl(ary->wc_discrim); - ary->wc_nchunks = ntohl(ary->wc_nchunks); + nchunks = ntohl(ary->wc_nchunks); if (((unsigned long)&ary->wc_array[0] + - (sizeof(struct rpcrdma_write_chunk) * ary->wc_nchunks)) > + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > (unsigned long)vaend) { dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n", - ary, ary->wc_nchunks, vaend); + ary, nchunks, vaend); return NULL; } - for (ch_no = 0; ch_no < ary->wc_nchunks; ch_no++) { - u64 ch_offset; - - ary->wc_array[ch_no].wc_target.rs_handle = - ntohl(ary->wc_array[ch_no].wc_target.rs_handle); - ary->wc_array[ch_no].wc_target.rs_length = - ntohl(ary->wc_array[ch_no].wc_target.rs_length); - va = (u32 *)&ary->wc_array[ch_no].wc_target.rs_offset; - xdr_decode_hyper(va, &ch_offset); - put_unaligned(ch_offset, (u64 *)va); - } - - return (u32 *)&ary->wc_array[ch_no]; + return (u32 *)&ary->wc_array[nchunks]; } int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req, @@ -386,13 +351,14 @@ void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *ary, void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary, int chunk_no, - u32 rs_handle, u64 rs_offset, + __be32 rs_handle, + __be64 rs_offset, u32 write_len) { struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target; - seg->rs_handle = htonl(rs_handle); + seg->rs_handle = rs_handle; + seg->rs_offset = rs_offset; seg->rs_length = htonl(write_len); - xdr_encode_hyper((u32 *) &seg->rs_offset, rs_offset); } void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt, diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index df67211..41cb63b 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -147,7 +147,7 @@ static int map_read_chunks(struct svcxprt_rdma *xprt, page_off = 0; ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; ch_no = 0; - ch_bytes = ch->rc_target.rs_length; + ch_bytes = ntohl(ch->rc_target.rs_length); head->arg.head[0] = rqstp->rq_arg.head[0]; head->arg.tail[0] = rqstp->rq_arg.tail[0]; head->arg.pages = &head->pages[head->count]; @@ -183,7 +183,7 @@ static int map_read_chunks(struct svcxprt_rdma *xprt, ch_no++; ch++; chl_map->ch[ch_no].start = sge_no; - ch_bytes = ch->rc_target.rs_length; + ch_bytes = ntohl(ch->rc_target.rs_length); /* If bytes remaining account for next chunk */ if (byte_count) { head->arg.page_len += ch_bytes; @@ -281,11 +281,12 @@ static int fast_reg_read_chunks(struct svcxprt_rdma *xprt, offset = 0; ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; for (ch_no = 0; ch_no < ch_count; ch_no++) { + int len = ntohl(ch->rc_target.rs_length); rpl_map->sge[ch_no].iov_base = frmr->kva + offset; - rpl_map->sge[ch_no].iov_len = ch->rc_target.rs_length; + rpl_map->sge[ch_no].iov_len = len; chl_map->ch[ch_no].count = 1; chl_map->ch[ch_no].start = ch_no; - offset += ch->rc_target.rs_length; + offset += len; ch++; } @@ -316,7 +317,7 @@ static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt, for (i = 0; i < count; i++) { ctxt->sge[i].length = 0; /* in case map fails */ if (!frmr) { - BUG_ON(0 == virt_to_page(vec[i].iov_base)); + BUG_ON(!virt_to_page(vec[i].iov_base)); off = (unsigned long)vec[i].iov_base & ~PAGE_MASK; ctxt->sge[i].addr = ib_dma_map_page(xprt->sc_cm_id->device, @@ -426,6 +427,7 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; ch->rc_discrim != 0; ch++, ch_no++) { + u64 rs_offset; next_sge: ctxt = svc_rdma_get_context(xprt); ctxt->direction = DMA_FROM_DEVICE; @@ -440,10 +442,10 @@ next_sge: read_wr.opcode = IB_WR_RDMA_READ; ctxt->wr_op = read_wr.opcode; read_wr.send_flags = IB_SEND_SIGNALED; - read_wr.wr.rdma.rkey = ch->rc_target.rs_handle; - read_wr.wr.rdma.remote_addr = - get_unaligned(&(ch->rc_target.rs_offset)) + - sgl_offset; + read_wr.wr.rdma.rkey = ntohl(ch->rc_target.rs_handle); + xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset, + &rs_offset); + read_wr.wr.rdma.remote_addr = rs_offset + sgl_offset; read_wr.sg_list = ctxt->sge; read_wr.num_sge = rdma_read_max_sge(xprt, chl_map->ch[ch_no].count); diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 249a835..42eb7ba 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -409,21 +409,21 @@ static int send_write_chunks(struct svcxprt_rdma *xprt, u64 rs_offset; arg_ch = &arg_ary->wc_array[chunk_no].wc_target; - write_len = min(xfer_len, arg_ch->rs_length); + write_len = min(xfer_len, ntohl(arg_ch->rs_length)); /* Prepare the response chunk given the length actually * written */ - rs_offset = get_unaligned(&(arg_ch->rs_offset)); + xdr_decode_hyper((__be32 *)&arg_ch->rs_offset, &rs_offset); svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no, - arg_ch->rs_handle, - rs_offset, - write_len); + arg_ch->rs_handle, + arg_ch->rs_offset, + write_len); chunk_off = 0; while (write_len) { int this_write; this_write = min(write_len, max_write); ret = send_write(xprt, rqstp, - arg_ch->rs_handle, + ntohl(arg_ch->rs_handle), rs_offset + chunk_off, xdr_off, this_write, @@ -457,6 +457,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt, u32 xdr_off; int chunk_no; int chunk_off; + int nchunks; struct rpcrdma_segment *ch; struct rpcrdma_write_array *arg_ary; struct rpcrdma_write_array *res_ary; @@ -476,26 +477,27 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt, max_write = xprt->sc_max_sge * PAGE_SIZE; /* xdr offset starts at RPC message */ + nchunks = ntohl(arg_ary->wc_nchunks); for (xdr_off = 0, chunk_no = 0; - xfer_len && chunk_no < arg_ary->wc_nchunks; + xfer_len && chunk_no < nchunks; chunk_no++) { u64 rs_offset; ch = &arg_ary->wc_array[chunk_no].wc_target; - write_len = min(xfer_len, ch->rs_length); + write_len = min(xfer_len, htonl(ch->rs_length)); /* Prepare the reply chunk given the length actually * written */ - rs_offset = get_unaligned(&(ch->rs_offset)); + xdr_decode_hyper((__be32 *)&ch->rs_offset, &rs_offset); svc_rdma_xdr_encode_array_chunk(res_ary, chunk_no, - ch->rs_handle, rs_offset, - write_len); + ch->rs_handle, ch->rs_offset, + write_len); chunk_off = 0; while (write_len) { int this_write; this_write = min(write_len, max_write); ret = send_write(xprt, rqstp, - ch->rs_handle, + ntohl(ch->rs_handle), rs_offset + chunk_off, xdr_off, this_write, diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 894cb42..73b428b 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -51,6 +51,7 @@ #include <rdma/rdma_cm.h> #include <linux/sunrpc/svc_rdma.h> #include <linux/export.h> +#include "xprt_rdma.h" #define RPCDBG_FACILITY RPCDBG_SVCXPRT @@ -90,12 +91,6 @@ struct svc_xprt_class svc_rdma_class = { .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP, }; -/* WR context cache. Created in svc_rdma.c */ -extern struct kmem_cache *svc_rdma_ctxt_cachep; - -/* Workqueue created in svc_rdma.c */ -extern struct workqueue_struct *svc_rdma_wq; - struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) { struct svc_rdma_op_ctxt *ctxt; @@ -150,9 +145,6 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages) atomic_dec(&xprt->sc_ctxt_used); } -/* Temporary NFS request map cache. Created in svc_rdma.c */ -extern struct kmem_cache *svc_rdma_map_cachep; - /* * Temporary NFS req mappings are shared across all transport * instances. These are short lived and should be bounded by the number diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 08c5d5a..9a66c95 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -343,4 +343,11 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *); */ int rpcrdma_marshal_req(struct rpc_rqst *); +/* Temporary NFS request map cache. Created in svc_rdma.c */ +extern struct kmem_cache *svc_rdma_map_cachep; +/* WR context cache. Created in svc_rdma.c */ +extern struct kmem_cache *svc_rdma_ctxt_cachep; +/* Workqueue created in svc_rdma.c */ +extern struct workqueue_struct *svc_rdma_wq; + #endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 92bc518..890b03f 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2475,6 +2475,7 @@ static struct rpc_xprt_ops xs_tcp_ops = { static struct rpc_xprt_ops bc_tcp_ops = { .reserve_xprt = xprt_reserve_xprt, .release_xprt = xprt_release_xprt, + .rpcbind = xs_local_rpcbind, .buf_alloc = bc_malloc, .buf_free = bc_free, .send_request = bc_send_request, |