diff options
Diffstat (limited to 'fs')
66 files changed, 3109 insertions, 2074 deletions
diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c index f848b52..3ddcbb1 100644 --- a/fs/lockd/clnt4xdr.c +++ b/fs/lockd/clnt4xdr.c @@ -598,7 +598,7 @@ static struct rpc_procinfo nlm4_procedures[] = { PROC(GRANTED_RES, res, norep), }; -struct rpc_version nlm_version4 = { +const struct rpc_version nlm_version4 = { .number = 4, .nrprocs = ARRAY_SIZE(nlm4_procedures), .procs = nlm4_procedures, diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 8d4ea83..ba1dc2e 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -62,7 +62,8 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init) host = nlmclnt_lookup_host(nlm_init->address, nlm_init->addrlen, nlm_init->protocol, nlm_version, - nlm_init->hostname, nlm_init->noresvport); + nlm_init->hostname, nlm_init->noresvport, + nlm_init->net); if (host == NULL) { lockd_down(); return ERR_PTR(-ENOLCK); diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c index 180ac34..3d35e3e 100644 --- a/fs/lockd/clntxdr.c +++ b/fs/lockd/clntxdr.c @@ -596,19 +596,19 @@ static struct rpc_procinfo nlm_procedures[] = { PROC(GRANTED_RES, res, norep), }; -static struct rpc_version nlm_version1 = { +static const struct rpc_version nlm_version1 = { .number = 1, .nrprocs = ARRAY_SIZE(nlm_procedures), .procs = nlm_procedures, }; -static struct rpc_version nlm_version3 = { +static const struct rpc_version nlm_version3 = { .number = 3, .nrprocs = ARRAY_SIZE(nlm_procedures), .procs = nlm_procedures, }; -static struct rpc_version *nlm_versions[] = { +static const struct rpc_version *nlm_versions[] = { [1] = &nlm_version1, [3] = &nlm_version3, #ifdef CONFIG_LOCKD_V4 @@ -618,7 +618,7 @@ static struct rpc_version *nlm_versions[] = { static struct rpc_stat nlm_rpc_stats; -struct rpc_program nlm_program = { +const struct rpc_program nlm_program = { .name = "lockd", .number = NLM_PROGRAM, .nrvers = ARRAY_SIZE(nlm_versions), diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 6f29836..eb75ca7 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -17,6 +17,8 @@ #include <linux/lockd/lockd.h> #include <linux/mutex.h> +#include <linux/sunrpc/svc_xprt.h> + #include <net/ipv6.h> #define NLMDBG_FACILITY NLMDBG_HOSTCACHE @@ -54,6 +56,7 @@ struct nlm_lookup_host_info { const char *hostname; /* remote's hostname */ const size_t hostname_len; /* it's length */ const int noresvport; /* use non-priv port */ + struct net *net; /* network namespace to bind */ }; /* @@ -155,6 +158,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni, INIT_LIST_HEAD(&host->h_reclaim); host->h_nsmhandle = nsm; host->h_addrbuf = nsm->sm_addrbuf; + host->net = ni->net; out: return host; @@ -206,7 +210,8 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, const unsigned short protocol, const u32 version, const char *hostname, - int noresvport) + int noresvport, + struct net *net) { struct nlm_lookup_host_info ni = { .server = 0, @@ -217,6 +222,7 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, .hostname = hostname, .hostname_len = strlen(hostname), .noresvport = noresvport, + .net = net, }; struct hlist_head *chain; struct hlist_node *pos; @@ -231,6 +237,8 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, chain = &nlm_client_hosts[nlm_hash_address(sap)]; hlist_for_each_entry(host, pos, chain, h_hash) { + if (host->net != net) + continue; if (!rpc_cmp_addr(nlm_addr(host), sap)) continue; @@ -318,6 +326,7 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, struct nsm_handle *nsm = NULL; struct sockaddr *src_sap = svc_daddr(rqstp); size_t src_len = rqstp->rq_daddrlen; + struct net *net = rqstp->rq_xprt->xpt_net; struct nlm_lookup_host_info ni = { .server = 1, .sap = svc_addr(rqstp), @@ -326,6 +335,7 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, .version = rqstp->rq_vers, .hostname = hostname, .hostname_len = hostname_len, + .net = net, }; dprintk("lockd: %s(host='%*s', vers=%u, proto=%s)\n", __func__, @@ -339,6 +349,8 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, chain = &nlm_server_hosts[nlm_hash_address(ni.sap)]; hlist_for_each_entry(host, pos, chain, h_hash) { + if (host->net != net) + continue; if (!rpc_cmp_addr(nlm_addr(host), ni.sap)) continue; @@ -431,7 +443,7 @@ nlm_bind_host(struct nlm_host *host) .to_retries = 5U, }; struct rpc_create_args args = { - .net = &init_net, + .net = host->net, .protocol = host->h_proto, .address = nlm_addr(host), .addrsize = host->h_addrlen, @@ -553,12 +565,8 @@ void nlm_host_rebooted(const struct nlm_reboot *info) nsm_release(nsm); } -/* - * Shut down the hosts module. - * Note that this routine is called only at server shutdown time. - */ void -nlm_shutdown_hosts(void) +nlm_shutdown_hosts_net(struct net *net) { struct hlist_head *chain; struct hlist_node *pos; @@ -570,6 +578,8 @@ nlm_shutdown_hosts(void) /* First, make all hosts eligible for gc */ dprintk("lockd: nuking all hosts...\n"); for_each_host(host, pos, chain, nlm_server_hosts) { + if (net && host->net != net) + continue; host->h_expires = jiffies - 1; if (host->h_rpcclnt) { rpc_shutdown_client(host->h_rpcclnt); @@ -580,15 +590,29 @@ nlm_shutdown_hosts(void) /* Then, perform a garbage collection pass */ nlm_gc_hosts(); mutex_unlock(&nlm_host_mutex); +} + +/* + * Shut down the hosts module. + * Note that this routine is called only at server shutdown time. + */ +void +nlm_shutdown_hosts(void) +{ + struct hlist_head *chain; + struct hlist_node *pos; + struct nlm_host *host; + + nlm_shutdown_hosts_net(NULL); /* complain if any hosts are left */ if (nrhosts != 0) { printk(KERN_WARNING "lockd: couldn't shutdown host module!\n"); dprintk("lockd: %lu hosts left:\n", nrhosts); for_each_host(host, pos, chain, nlm_server_hosts) { - dprintk(" %s (cnt %d use %d exp %ld)\n", + dprintk(" %s (cnt %d use %d exp %ld net %p)\n", host->h_name, atomic_read(&host->h_count), - host->h_inuse, host->h_expires); + host->h_inuse, host->h_expires, host->net); } } } diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 65ba36b..7ef14b3 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -47,7 +47,7 @@ struct nsm_res { u32 state; }; -static struct rpc_program nsm_program; +static const struct rpc_program nsm_program; static LIST_HEAD(nsm_handles); static DEFINE_SPINLOCK(nsm_lock); @@ -62,14 +62,14 @@ static inline struct sockaddr *nsm_addr(const struct nsm_handle *nsm) return (struct sockaddr *)&nsm->sm_addr; } -static struct rpc_clnt *nsm_create(void) +static struct rpc_clnt *nsm_create(struct net *net) { struct sockaddr_in sin = { .sin_family = AF_INET, .sin_addr.s_addr = htonl(INADDR_LOOPBACK), }; struct rpc_create_args args = { - .net = &init_net, + .net = net, .protocol = XPRT_TRANSPORT_UDP, .address = (struct sockaddr *)&sin, .addrsize = sizeof(sin), @@ -83,7 +83,8 @@ static struct rpc_clnt *nsm_create(void) return rpc_create(&args); } -static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res) +static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res, + struct net *net) { struct rpc_clnt *clnt; int status; @@ -99,7 +100,7 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res) .rpc_resp = res, }; - clnt = nsm_create(); + clnt = nsm_create(net); if (IS_ERR(clnt)) { status = PTR_ERR(clnt); dprintk("lockd: failed to create NSM upcall transport, " @@ -149,7 +150,7 @@ int nsm_monitor(const struct nlm_host *host) */ nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf; - status = nsm_mon_unmon(nsm, NSMPROC_MON, &res); + status = nsm_mon_unmon(nsm, NSMPROC_MON, &res, host->net); if (unlikely(res.status != 0)) status = -EIO; if (unlikely(status < 0)) { @@ -183,7 +184,7 @@ void nsm_unmonitor(const struct nlm_host *host) && nsm->sm_monitored && !nsm->sm_sticky) { dprintk("lockd: nsm_unmonitor(%s)\n", nsm->sm_name); - status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res); + status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res, host->net); if (res.status != 0) status = -EIO; if (status < 0) @@ -534,19 +535,19 @@ static struct rpc_procinfo nsm_procedures[] = { }, }; -static struct rpc_version nsm_version1 = { +static const struct rpc_version nsm_version1 = { .number = 1, .nrprocs = ARRAY_SIZE(nsm_procedures), .procs = nsm_procedures }; -static struct rpc_version * nsm_version[] = { +static const struct rpc_version *nsm_version[] = { [1] = &nsm_version1, }; static struct rpc_stat nsm_stats; -static struct rpc_program nsm_program = { +static const struct rpc_program nsm_program = { .name = "statd", .number = NSM_PROGRAM, .nrvers = ARRAY_SIZE(nsm_version), diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h new file mode 100644 index 0000000..ce227e0 --- /dev/null +++ b/fs/lockd/netns.h @@ -0,0 +1,12 @@ +#ifndef __LOCKD_NETNS_H__ +#define __LOCKD_NETNS_H__ + +#include <net/netns/generic.h> + +struct lockd_net { + unsigned int nlmsvc_users; +}; + +extern int lockd_net_id; + +#endif diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index c061b9a..2774e10 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -35,6 +35,8 @@ #include <linux/lockd/lockd.h> #include <linux/nfs.h> +#include "netns.h" + #define NLMDBG_FACILITY NLMDBG_SVC #define LOCKD_BUFSIZE (1024 + NLMSVC_XDRSIZE) #define ALLOWED_SIGS (sigmask(SIGKILL)) @@ -50,6 +52,8 @@ static struct task_struct *nlmsvc_task; static struct svc_rqst *nlmsvc_rqst; unsigned long nlmsvc_timeout; +int lockd_net_id; + /* * These can be set at insmod time (useful for NFS as root filesystem), * and also changed through the sysctl interface. -- Jamie Lokier, Aug 2003 @@ -189,27 +193,29 @@ lockd(void *vrqstp) } static int create_lockd_listener(struct svc_serv *serv, const char *name, - const int family, const unsigned short port) + struct net *net, const int family, + const unsigned short port) { struct svc_xprt *xprt; - xprt = svc_find_xprt(serv, name, family, 0); + xprt = svc_find_xprt(serv, name, net, family, 0); if (xprt == NULL) - return svc_create_xprt(serv, name, &init_net, family, port, + return svc_create_xprt(serv, name, net, family, port, SVC_SOCK_DEFAULTS); svc_xprt_put(xprt); return 0; } -static int create_lockd_family(struct svc_serv *serv, const int family) +static int create_lockd_family(struct svc_serv *serv, struct net *net, + const int family) { int err; - err = create_lockd_listener(serv, "udp", family, nlm_udpport); + err = create_lockd_listener(serv, "udp", net, family, nlm_udpport); if (err < 0) return err; - return create_lockd_listener(serv, "tcp", family, nlm_tcpport); + return create_lockd_listener(serv, "tcp", net, family, nlm_tcpport); } /* @@ -222,16 +228,16 @@ static int create_lockd_family(struct svc_serv *serv, const int family) * Returns zero if all listeners are available; otherwise a * negative errno value is returned. */ -static int make_socks(struct svc_serv *serv) +static int make_socks(struct svc_serv *serv, struct net *net) { static int warned; int err; - err = create_lockd_family(serv, PF_INET); + err = create_lockd_family(serv, net, PF_INET); if (err < 0) goto out_err; - err = create_lockd_family(serv, PF_INET6); + err = create_lockd_family(serv, net, PF_INET6); if (err < 0 && err != -EAFNOSUPPORT) goto out_err; @@ -245,6 +251,47 @@ out_err: return err; } +static int lockd_up_net(struct net *net) +{ + struct lockd_net *ln = net_generic(net, lockd_net_id); + struct svc_serv *serv = nlmsvc_rqst->rq_server; + int error; + + if (ln->nlmsvc_users) + return 0; + + error = svc_rpcb_setup(serv, net); + if (error) + goto err_rpcb; + + error = make_socks(serv, net); + if (error < 0) + goto err_socks; + return 0; + +err_socks: + svc_rpcb_cleanup(serv, net); +err_rpcb: + return error; +} + +static void lockd_down_net(struct net *net) +{ + struct lockd_net *ln = net_generic(net, lockd_net_id); + struct svc_serv *serv = nlmsvc_rqst->rq_server; + + if (ln->nlmsvc_users) { + if (--ln->nlmsvc_users == 0) { + nlm_shutdown_hosts_net(net); + svc_shutdown_net(serv, net); + } + } else { + printk(KERN_ERR "lockd_down_net: no users! task=%p, net=%p\n", + nlmsvc_task, net); + BUG(); + } +} + /* * Bring up the lockd process if it's not already up. */ @@ -252,13 +299,16 @@ int lockd_up(void) { struct svc_serv *serv; int error = 0; + struct net *net = current->nsproxy->net_ns; mutex_lock(&nlmsvc_mutex); /* * Check whether we're already up and running. */ - if (nlmsvc_rqst) + if (nlmsvc_rqst) { + error = lockd_up_net(net); goto out; + } /* * Sanity check: if there's no pid, @@ -275,7 +325,7 @@ int lockd_up(void) goto out; } - error = make_socks(serv); + error = make_socks(serv, net); if (error < 0) goto destroy_and_out; @@ -313,8 +363,12 @@ int lockd_up(void) destroy_and_out: svc_destroy(serv); out: - if (!error) + if (!error) { + struct lockd_net *ln = net_generic(net, lockd_net_id); + + ln->nlmsvc_users++; nlmsvc_users++; + } mutex_unlock(&nlmsvc_mutex); return error; } @@ -328,8 +382,10 @@ lockd_down(void) { mutex_lock(&nlmsvc_mutex); if (nlmsvc_users) { - if (--nlmsvc_users) + if (--nlmsvc_users) { + lockd_down_net(current->nsproxy->net_ns); goto out; + } } else { printk(KERN_ERR "lockd_down: no users! task=%p\n", nlmsvc_task); @@ -497,24 +553,55 @@ module_param_call(nlm_tcpport, param_set_port, param_get_int, module_param(nsm_use_hostnames, bool, 0644); module_param(nlm_max_connections, uint, 0644); +static int lockd_init_net(struct net *net) +{ + return 0; +} + +static void lockd_exit_net(struct net *net) +{ +} + +static struct pernet_operations lockd_net_ops = { + .init = lockd_init_net, + .exit = lockd_exit_net, + .id = &lockd_net_id, + .size = sizeof(struct lockd_net), +}; + + /* * Initialising and terminating the module. */ static int __init init_nlm(void) { + int err; + #ifdef CONFIG_SYSCTL + err = -ENOMEM; nlm_sysctl_table = register_sysctl_table(nlm_sysctl_root); - return nlm_sysctl_table ? 0 : -ENOMEM; -#else + if (nlm_sysctl_table == NULL) + goto err_sysctl; +#endif + err = register_pernet_subsys(&lockd_net_ops); + if (err) + goto err_pernet; return 0; + +err_pernet: +#ifdef CONFIG_SYSCTL + unregister_sysctl_table(nlm_sysctl_table); #endif +err_sysctl: + return err; } static void __exit exit_nlm(void) { /* FIXME: delete all NLM clients */ nlm_shutdown_hosts(); + unregister_pernet_subsys(&lockd_net_ops); #ifdef CONFIG_SYSCTL unregister_sysctl_table(nlm_sysctl_table); #endif diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index f0179c3..e46353f 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -46,7 +46,6 @@ static void nlmsvc_remove_block(struct nlm_block *block); static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock); static void nlmsvc_freegrantargs(struct nlm_rqst *call); static const struct rpc_call_ops nlmsvc_grant_ops; -static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie); /* * The list of blocked locks to retry @@ -54,6 +53,35 @@ static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie); static LIST_HEAD(nlm_blocked); static DEFINE_SPINLOCK(nlm_blocked_lock); +#ifdef LOCKD_DEBUG +static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie) +{ + /* + * We can get away with a static buffer because we're only + * called with BKL held. + */ + static char buf[2*NLM_MAXCOOKIELEN+1]; + unsigned int i, len = sizeof(buf); + char *p = buf; + + len--; /* allow for trailing \0 */ + if (len < 3) + return "???"; + for (i = 0 ; i < cookie->len ; i++) { + if (len < 2) { + strcpy(p-3, "..."); + break; + } + sprintf(p, "%02x", cookie->data[i]); + p += 2; + len -= 2; + } + *p = '\0'; + + return buf; +} +#endif + /* * Insert a blocked lock into the global list */ @@ -935,32 +963,3 @@ nlmsvc_retry_blocked(void) return timeout; } - -#ifdef RPC_DEBUG -static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie) -{ - /* - * We can get away with a static buffer because we're only - * called with BKL held. - */ - static char buf[2*NLM_MAXCOOKIELEN+1]; - unsigned int i, len = sizeof(buf); - char *p = buf; - - len--; /* allow for trailing \0 */ - if (len < 3) - return "???"; - for (i = 0 ; i < cookie->len ; i++) { - if (len < 2) { - strcpy(p-3, "..."); - break; - } - sprintf(p, "%02x", cookie->data[i]); - p += 2; - len -= 2; - } - *p = '\0'; - - return buf; -} -#endif diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index dbcd821..2a0e6c5 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -64,6 +64,7 @@ config NFS_V4 bool "NFS client support for NFS version 4" depends on NFS_FS select SUNRPC_GSS + select KEYS help This option enables support for version 4 of the NFS protocol (RFC 3530) in the kernel's NFS client. @@ -98,6 +99,18 @@ config PNFS_OBJLAYOUT depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD default m +config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN + string "NFSv4.1 Implementation ID Domain" + depends on NFS_V4_1 + default "kernel.org" + help + This option defines the domain portion of the implementation ID that + may be sent in the NFS exchange_id operation. The value must be in + the format of a DNS domain name and should be set to the DNS domain + name of the distribution. + If the NFS client is unchanged from the upstream kernel, this + option should be set to the default "kernel.org". + config ROOT_NFS bool "Root file system on NFS" depends on NFS_FS=y && IP_PNP @@ -130,16 +143,10 @@ config NFS_USE_KERNEL_DNS bool depends on NFS_V4 && !NFS_USE_LEGACY_DNS select DNS_RESOLVER - select KEYS default y -config NFS_USE_NEW_IDMAPPER - bool "Use the new idmapper upcall routine" - depends on NFS_V4 && KEYS - help - Say Y here if you want NFS to use the new idmapper upcall functions. - You will need /sbin/request-key (usually provided by the keyutils - package). For details, read - <file:Documentation/filesystems/nfs/idmapper.txt>. - - If you are unsure, say N. +config NFS_DEBUG + bool + depends on NFS_FS && SUNRPC_DEBUG + select CRC32 + default y diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 48cfac3..9c94297 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -46,9 +46,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Andy Adamson <andros@citi.umich.edu>"); MODULE_DESCRIPTION("The NFSv4.1 pNFS Block layout driver"); -struct dentry *bl_device_pipe; -wait_queue_head_t bl_wq; - static void print_page(struct page *page) { dprintk("PRINTPAGE page %p\n", page); @@ -236,12 +233,11 @@ bl_read_pagelist(struct nfs_read_data *rdata) sector_t isect, extent_length = 0; struct parallel_io *par; loff_t f_offset = rdata->args.offset; - size_t count = rdata->args.count; struct page **pages = rdata->args.pages; int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; - dprintk("%s enter nr_pages %u offset %lld count %Zd\n", __func__, - rdata->npages, f_offset, count); + dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, + rdata->npages, f_offset, (unsigned int)rdata->args.count); par = alloc_parallel(rdata); if (!par) @@ -1025,10 +1021,128 @@ static const struct rpc_pipe_ops bl_upcall_ops = { .destroy_msg = bl_pipe_destroy_msg, }; +static struct dentry *nfs4blocklayout_register_sb(struct super_block *sb, + struct rpc_pipe *pipe) +{ + struct dentry *dir, *dentry; + + dir = rpc_d_lookup_sb(sb, NFS_PIPE_DIRNAME); + if (dir == NULL) + return ERR_PTR(-ENOENT); + dentry = rpc_mkpipe_dentry(dir, "blocklayout", NULL, pipe); + dput(dir); + return dentry; +} + +static void nfs4blocklayout_unregister_sb(struct super_block *sb, + struct rpc_pipe *pipe) +{ + if (pipe->dentry) + rpc_unlink(pipe->dentry); +} + +static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, + void *ptr) +{ + struct super_block *sb = ptr; + struct net *net = sb->s_fs_info; + struct nfs_net *nn = net_generic(net, nfs_net_id); + struct dentry *dentry; + int ret = 0; + + if (!try_module_get(THIS_MODULE)) + return 0; + + if (nn->bl_device_pipe == NULL) { + module_put(THIS_MODULE); + return 0; + } + + switch (event) { + case RPC_PIPEFS_MOUNT: + dentry = nfs4blocklayout_register_sb(sb, nn->bl_device_pipe); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); + break; + } + nn->bl_device_pipe->dentry = dentry; + break; + case RPC_PIPEFS_UMOUNT: + if (nn->bl_device_pipe->dentry) + nfs4blocklayout_unregister_sb(sb, nn->bl_device_pipe); + break; + default: + ret = -ENOTSUPP; + break; + } + module_put(THIS_MODULE); + return ret; +} + +static struct notifier_block nfs4blocklayout_block = { + .notifier_call = rpc_pipefs_event, +}; + +static struct dentry *nfs4blocklayout_register_net(struct net *net, + struct rpc_pipe *pipe) +{ + struct super_block *pipefs_sb; + struct dentry *dentry; + + pipefs_sb = rpc_get_sb_net(net); + if (!pipefs_sb) + return NULL; + dentry = nfs4blocklayout_register_sb(pipefs_sb, pipe); + rpc_put_sb_net(net); + return dentry; +} + +static void nfs4blocklayout_unregister_net(struct net *net, + struct rpc_pipe *pipe) +{ + struct super_block *pipefs_sb; + + pipefs_sb = rpc_get_sb_net(net); + if (pipefs_sb) { + nfs4blocklayout_unregister_sb(pipefs_sb, pipe); + rpc_put_sb_net(net); + } +} + +static int nfs4blocklayout_net_init(struct net *net) +{ + struct nfs_net *nn = net_generic(net, nfs_net_id); + struct dentry *dentry; + + init_waitqueue_head(&nn->bl_wq); + nn->bl_device_pipe = rpc_mkpipe_data(&bl_upcall_ops, 0); + if (IS_ERR(nn->bl_device_pipe)) + return PTR_ERR(nn->bl_device_pipe); + dentry = nfs4blocklayout_register_net(net, nn->bl_device_pipe); + if (IS_ERR(dentry)) { + rpc_destroy_pipe_data(nn->bl_device_pipe); + return PTR_ERR(dentry); + } + nn->bl_device_pipe->dentry = dentry; + return 0; +} + +static void nfs4blocklayout_net_exit(struct net *net) +{ + struct nfs_net *nn = net_generic(net, nfs_net_id); + + nfs4blocklayout_unregister_net(net, nn->bl_device_pipe); + rpc_destroy_pipe_data(nn->bl_device_pipe); + nn->bl_device_pipe = NULL; +} + +static struct pernet_operations nfs4blocklayout_net_ops = { + .init = nfs4blocklayout_net_init, + .exit = nfs4blocklayout_net_exit, +}; + static int __init nfs4blocklayout_init(void) { - struct vfsmount *mnt; - struct path path; int ret; dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__); @@ -1037,32 +1151,17 @@ static int __init nfs4blocklayout_init(void) if (ret) goto out; - init_waitqueue_head(&bl_wq); - - mnt = rpc_get_mount(); - if (IS_ERR(mnt)) { - ret = PTR_ERR(mnt); + ret = rpc_pipefs_notifier_register(&nfs4blocklayout_block); + if (ret) goto out_remove; - } - - ret = vfs_path_lookup(mnt->mnt_root, - mnt, - NFS_PIPE_DIRNAME, 0, &path); + ret = register_pernet_subsys(&nfs4blocklayout_net_ops); if (ret) - goto out_putrpc; - - bl_device_pipe = rpc_mkpipe(path.dentry, "blocklayout", NULL, - &bl_upcall_ops, 0); - path_put(&path); - if (IS_ERR(bl_device_pipe)) { - ret = PTR_ERR(bl_device_pipe); - goto out_putrpc; - } + goto out_notifier; out: return ret; -out_putrpc: - rpc_put_mount(); +out_notifier: + rpc_pipefs_notifier_unregister(&nfs4blocklayout_block); out_remove: pnfs_unregister_layoutdriver(&blocklayout_type); return ret; @@ -1073,9 +1172,9 @@ static void __exit nfs4blocklayout_exit(void) dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n", __func__); + rpc_pipefs_notifier_unregister(&nfs4blocklayout_block); + unregister_pernet_subsys(&nfs4blocklayout_net_ops); pnfs_unregister_layoutdriver(&blocklayout_type); - rpc_unlink(bl_device_pipe); - rpc_put_mount(); } MODULE_ALIAS("nfs-layouttype4-3"); diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h index e31a2df..0335069 100644 --- a/fs/nfs/blocklayout/blocklayout.h +++ b/fs/nfs/blocklayout/blocklayout.h @@ -37,6 +37,7 @@ #include <linux/sunrpc/rpc_pipe_fs.h> #include "../pnfs.h" +#include "../netns.h" #define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT) #define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT) @@ -50,6 +51,7 @@ struct pnfs_block_dev { struct list_head bm_node; struct nfs4_deviceid bm_mdevid; /* associated devid */ struct block_device *bm_mdev; /* meta device itself */ + struct net *net; }; enum exstate4 { @@ -151,9 +153,9 @@ BLK_LSEG2EXT(struct pnfs_layout_segment *lseg) return BLK_LO2EXT(lseg->pls_layout); } -struct bl_dev_msg { - int32_t status; - uint32_t major, minor; +struct bl_pipe_msg { + struct rpc_pipe_msg msg; + wait_queue_head_t *bl_wq; }; struct bl_msg_hdr { @@ -161,9 +163,6 @@ struct bl_msg_hdr { u16 totallen; /* length of entire message, including hdr itself */ }; -extern struct dentry *bl_device_pipe; -extern wait_queue_head_t bl_wq; - #define BL_DEVICE_UMOUNT 0x0 /* Umount--delete devices */ #define BL_DEVICE_MOUNT 0x1 /* Mount--create devices*/ #define BL_DEVICE_REQUEST_INIT 0x0 /* Start request */ diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c index d08ba91..a5c88a5 100644 --- a/fs/nfs/blocklayout/blocklayoutdev.c +++ b/fs/nfs/blocklayout/blocklayoutdev.c @@ -46,7 +46,7 @@ static int decode_sector_number(__be32 **rp, sector_t *sp) *rp = xdr_decode_hyper(*rp, &s); if (s & 0x1ff) { - printk(KERN_WARNING "%s: sector not aligned\n", __func__); + printk(KERN_WARNING "NFS: %s: sector not aligned\n", __func__); return -1; } *sp = s >> SECTOR_SHIFT; @@ -79,27 +79,30 @@ int nfs4_blkdev_put(struct block_device *bdev) return blkdev_put(bdev, FMODE_READ); } -static struct bl_dev_msg bl_mount_reply; - ssize_t bl_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) { + struct nfs_net *nn = net_generic(filp->f_dentry->d_sb->s_fs_info, + nfs_net_id); + if (mlen != sizeof (struct bl_dev_msg)) return -EINVAL; - if (copy_from_user(&bl_mount_reply, src, mlen) != 0) + if (copy_from_user(&nn->bl_mount_reply, src, mlen) != 0) return -EFAULT; - wake_up(&bl_wq); + wake_up(&nn->bl_wq); return mlen; } void bl_pipe_destroy_msg(struct rpc_pipe_msg *msg) { + struct bl_pipe_msg *bl_pipe_msg = container_of(msg, struct bl_pipe_msg, msg); + if (msg->errno >= 0) return; - wake_up(&bl_wq); + wake_up(bl_pipe_msg->bl_wq); } /* @@ -111,29 +114,33 @@ nfs4_blk_decode_device(struct nfs_server *server, { struct pnfs_block_dev *rv; struct block_device *bd = NULL; - struct rpc_pipe_msg msg; + struct bl_pipe_msg bl_pipe_msg; + struct rpc_pipe_msg *msg = &bl_pipe_msg.msg; struct bl_msg_hdr bl_msg = { .type = BL_DEVICE_MOUNT, .totallen = dev->mincount, }; uint8_t *dataptr; DECLARE_WAITQUEUE(wq, current); - struct bl_dev_msg *reply = &bl_mount_reply; int offset, len, i, rc; + struct net *net = server->nfs_client->net; + struct nfs_net *nn = net_generic(net, nfs_net_id); + struct bl_dev_msg *reply = &nn->bl_mount_reply; dprintk("%s CREATING PIPEFS MESSAGE\n", __func__); dprintk("%s: deviceid: %s, mincount: %d\n", __func__, dev->dev_id.data, dev->mincount); - memset(&msg, 0, sizeof(msg)); - msg.data = kzalloc(sizeof(bl_msg) + dev->mincount, GFP_NOFS); - if (!msg.data) { + bl_pipe_msg.bl_wq = &nn->bl_wq; + memset(msg, 0, sizeof(*msg)); + msg->data = kzalloc(sizeof(bl_msg) + dev->mincount, GFP_NOFS); + if (!msg->data) { rv = ERR_PTR(-ENOMEM); goto out; } - memcpy(msg.data, &bl_msg, sizeof(bl_msg)); - dataptr = (uint8_t *) msg.data; + memcpy(msg->data, &bl_msg, sizeof(bl_msg)); + dataptr = (uint8_t *) msg->data; len = dev->mincount; offset = sizeof(bl_msg); for (i = 0; len > 0; i++) { @@ -142,13 +149,13 @@ nfs4_blk_decode_device(struct nfs_server *server, len -= PAGE_CACHE_SIZE; offset += PAGE_CACHE_SIZE; } - msg.len = sizeof(bl_msg) + dev->mincount; + msg->len = sizeof(bl_msg) + dev->mincount; dprintk("%s CALLING USERSPACE DAEMON\n", __func__); - add_wait_queue(&bl_wq, &wq); - rc = rpc_queue_upcall(bl_device_pipe->d_inode, &msg); + add_wait_queue(&nn->bl_wq, &wq); + rc = rpc_queue_upcall(nn->bl_device_pipe, msg); if (rc < 0) { - remove_wait_queue(&bl_wq, &wq); + remove_wait_queue(&nn->bl_wq, &wq); rv = ERR_PTR(rc); goto out; } @@ -156,7 +163,7 @@ nfs4_blk_decode_device(struct nfs_server *server, set_current_state(TASK_UNINTERRUPTIBLE); schedule(); __set_current_state(TASK_RUNNING); - remove_wait_queue(&bl_wq, &wq); + remove_wait_queue(&nn->bl_wq, &wq); if (reply->status != BL_DEVICE_REQUEST_PROC) { dprintk("%s failed to open device: %d\n", @@ -181,13 +188,14 @@ nfs4_blk_decode_device(struct nfs_server *server, rv->bm_mdev = bd; memcpy(&rv->bm_mdevid, &dev->dev_id, sizeof(struct nfs4_deviceid)); + rv->net = net; dprintk("%s Created device %s with bd_block_size %u\n", __func__, bd->bd_disk->disk_name, bd->bd_block_size); out: - kfree(msg.data); + kfree(msg->data); return rv; } diff --git a/fs/nfs/blocklayout/blocklayoutdm.c b/fs/nfs/blocklayout/blocklayoutdm.c index d055c75..737d839 100644 --- a/fs/nfs/blocklayout/blocklayoutdm.c +++ b/fs/nfs/blocklayout/blocklayoutdm.c @@ -38,9 +38,10 @@ #define NFSDBG_FACILITY NFSDBG_PNFS_LD -static void dev_remove(dev_t dev) +static void dev_remove(struct net *net, dev_t dev) { - struct rpc_pipe_msg msg; + struct bl_pipe_msg bl_pipe_msg; + struct rpc_pipe_msg *msg = &bl_pipe_msg.msg; struct bl_dev_msg bl_umount_request; struct bl_msg_hdr bl_msg = { .type = BL_DEVICE_UMOUNT, @@ -48,36 +49,38 @@ static void dev_remove(dev_t dev) }; uint8_t *dataptr; DECLARE_WAITQUEUE(wq, current); + struct nfs_net *nn = net_generic(net, nfs_net_id); dprintk("Entering %s\n", __func__); - memset(&msg, 0, sizeof(msg)); - msg.data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS); - if (!msg.data) + bl_pipe_msg.bl_wq = &nn->bl_wq; + memset(msg, 0, sizeof(*msg)); + msg->data = kzalloc(1 + sizeof(bl_umount_request), GFP_NOFS); + if (!msg->data) goto out; memset(&bl_umount_request, 0, sizeof(bl_umount_request)); bl_umount_request.major = MAJOR(dev); bl_umount_request.minor = MINOR(dev); - memcpy(msg.data, &bl_msg, sizeof(bl_msg)); - dataptr = (uint8_t *) msg.data; + memcpy(msg->data, &bl_msg, sizeof(bl_msg)); + dataptr = (uint8_t *) msg->data; memcpy(&dataptr[sizeof(bl_msg)], &bl_umount_request, sizeof(bl_umount_request)); - msg.len = sizeof(bl_msg) + bl_msg.totallen; + msg->len = sizeof(bl_msg) + bl_msg.totallen; - add_wait_queue(&bl_wq, &wq); - if (rpc_queue_upcall(bl_device_pipe->d_inode, &msg) < 0) { - remove_wait_queue(&bl_wq, &wq); + add_wait_queue(&nn->bl_wq, &wq); + if (rpc_queue_upcall(nn->bl_device_pipe, msg) < 0) { + remove_wait_queue(&nn->bl_wq, &wq); goto out; } set_current_state(TASK_UNINTERRUPTIBLE); schedule(); __set_current_state(TASK_RUNNING); - remove_wait_queue(&bl_wq, &wq); + remove_wait_queue(&nn->bl_wq, &wq); out: - kfree(msg.data); + kfree(msg->data); } /* @@ -90,10 +93,10 @@ static void nfs4_blk_metadev_release(struct pnfs_block_dev *bdev) dprintk("%s Releasing\n", __func__); rv = nfs4_blkdev_put(bdev->bm_mdev); if (rv) - printk(KERN_ERR "%s nfs4_blkdev_put returns %d\n", + printk(KERN_ERR "NFS: %s nfs4_blkdev_put returns %d\n", __func__, rv); - dev_remove(bdev->bm_mdev->bd_dev); + dev_remove(bdev->net, bdev->bm_mdev->bd_dev); } void bl_free_block_dev(struct pnfs_block_dev *bdev) diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c index 1abac09..1f9a603 100644 --- a/fs/nfs/blocklayout/extents.c +++ b/fs/nfs/blocklayout/extents.c @@ -147,7 +147,7 @@ static int _preload_range(struct pnfs_inval_markings *marks, count = (int)(end - start) / (int)tree->mtt_step_size; /* Pre-malloc what memory we might need */ - storage = kmalloc(sizeof(*storage) * count, GFP_NOFS); + storage = kcalloc(count, sizeof(*storage), GFP_NOFS); if (!storage) return -ENOMEM; for (i = 0; i < count; i++) { diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c index c98b439..dded263 100644 --- a/fs/nfs/cache_lib.c +++ b/fs/nfs/cache_lib.c @@ -13,6 +13,7 @@ #include <linux/slab.h> #include <linux/sunrpc/cache.h> #include <linux/sunrpc/rpc_pipe_fs.h> +#include <net/net_namespace.h> #include "cache_lib.h" @@ -111,30 +112,54 @@ int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq) return 0; } -int nfs_cache_register(struct cache_detail *cd) +int nfs_cache_register_sb(struct super_block *sb, struct cache_detail *cd) { - struct vfsmount *mnt; - struct path path; int ret; + struct dentry *dir; - mnt = rpc_get_mount(); - if (IS_ERR(mnt)) - return PTR_ERR(mnt); - ret = vfs_path_lookup(mnt->mnt_root, mnt, "/cache", 0, &path); - if (ret) - goto err; - ret = sunrpc_cache_register_pipefs(path.dentry, cd->name, 0600, cd); - path_put(&path); - if (!ret) - return ret; -err: - rpc_put_mount(); + dir = rpc_d_lookup_sb(sb, "cache"); + BUG_ON(dir == NULL); + ret = sunrpc_cache_register_pipefs(dir, cd->name, 0600, cd); + dput(dir); return ret; } -void nfs_cache_unregister(struct cache_detail *cd) +int nfs_cache_register_net(struct net *net, struct cache_detail *cd) { - sunrpc_cache_unregister_pipefs(cd); - rpc_put_mount(); + struct super_block *pipefs_sb; + int ret = 0; + + pipefs_sb = rpc_get_sb_net(net); + if (pipefs_sb) { + ret = nfs_cache_register_sb(pipefs_sb, cd); + rpc_put_sb_net(net); + } + return ret; +} + +void nfs_cache_unregister_sb(struct super_block *sb, struct cache_detail *cd) +{ + if (cd->u.pipefs.dir) + sunrpc_cache_unregister_pipefs(cd); +} + +void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd) +{ + struct super_block *pipefs_sb; + + pipefs_sb = rpc_get_sb_net(net); + if (pipefs_sb) { + nfs_cache_unregister_sb(pipefs_sb, cd); + rpc_put_sb_net(net); + } +} + +void nfs_cache_init(struct cache_detail *cd) +{ + sunrpc_init_cache_detail(cd); } +void nfs_cache_destroy(struct cache_detail *cd) +{ + sunrpc_destroy_cache_detail(cd); +} diff --git a/fs/nfs/cache_lib.h b/fs/nfs/cache_lib.h index 7cf6caf..317db95 100644 --- a/fs/nfs/cache_lib.h +++ b/fs/nfs/cache_lib.h @@ -23,5 +23,11 @@ extern struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void); extern void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq); extern int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq); -extern int nfs_cache_register(struct cache_detail *cd); -extern void nfs_cache_unregister(struct cache_detail *cd); +extern void nfs_cache_init(struct cache_detail *cd); +extern void nfs_cache_destroy(struct cache_detail *cd); +extern int nfs_cache_register_net(struct net *net, struct cache_detail *cd); +extern void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd); +extern int nfs_cache_register_sb(struct super_block *sb, + struct cache_detail *cd); +extern void nfs_cache_unregister_sb(struct super_block *sb, + struct cache_detail *cd); diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 516f337..eb95f50 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -85,7 +85,7 @@ nfs4_callback_svc(void *vrqstp) } if (err < 0) { if (err != preverr) { - printk(KERN_WARNING "%s: unexpected error " + printk(KERN_WARNING "NFS: %s: unexpected error " "from svc_recv (%d)\n", __func__, err); preverr = err; } @@ -101,12 +101,12 @@ nfs4_callback_svc(void *vrqstp) /* * Prepare to bring up the NFSv4 callback service */ -struct svc_rqst * -nfs4_callback_up(struct svc_serv *serv) +static struct svc_rqst * +nfs4_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) { int ret; - ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET, + ret = svc_create_xprt(serv, "tcp", xprt->xprt_net, PF_INET, nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); if (ret <= 0) goto out_err; @@ -114,7 +114,7 @@ nfs4_callback_up(struct svc_serv *serv) dprintk("NFS: Callback listener port = %u (af %u)\n", nfs_callback_tcpport, PF_INET); - ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET6, + ret = svc_create_xprt(serv, "tcp", xprt->xprt_net, PF_INET6, nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); if (ret > 0) { nfs_callback_tcpport6 = ret; @@ -172,7 +172,7 @@ nfs41_callback_svc(void *vrqstp) /* * Bring up the NFSv4.1 callback service */ -struct svc_rqst * +static struct svc_rqst * nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) { struct svc_rqst *rqstp; @@ -183,7 +183,7 @@ nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) * fore channel connection. * Returns the input port (0) and sets the svc_serv bc_xprt on success */ - ret = svc_create_xprt(serv, "tcp-bc", &init_net, PF_INET, 0, + ret = svc_create_xprt(serv, "tcp-bc", xprt->xprt_net, PF_INET, 0, SVC_SOCK_ANONYMOUS); if (ret < 0) { rqstp = ERR_PTR(ret); @@ -269,7 +269,7 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) serv, xprt, &rqstp, &callback_svc); if (!minorversion_setup) { /* v4.0 callback setup */ - rqstp = nfs4_callback_up(serv); + rqstp = nfs4_callback_up(serv, xprt); callback_svc = nfs4_callback_svc; } @@ -332,7 +332,6 @@ void nfs_callback_down(int minorversion) int check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp) { - struct rpc_clnt *r = clp->cl_rpcclient; char *p = svc_gss_principal(rqstp); if (rqstp->rq_authop->flavour != RPC_AUTH_GSS) @@ -353,7 +352,7 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp) if (memcmp(p, "nfs@", 4) != 0) return 0; p += 4; - if (strcmp(p, r->cl_server) != 0) + if (strcmp(p, clp->cl_hostname) != 0) return 0; return 1; } diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index c89d3b9..a5527c9 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -38,7 +38,8 @@ enum nfs4_callback_opnum { struct cb_process_state { __be32 drc_status; struct nfs_client *clp; - int slotid; + u32 slotid; + struct net *net; }; struct cb_compound_hdr_arg { diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 54cea8a..1b5d809 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -8,6 +8,7 @@ #include <linux/nfs4.h> #include <linux/nfs_fs.h> #include <linux/slab.h> +#include <linux/rcupdate.h> #include "nfs4_fs.h" #include "callback.h" #include "delegation.h" @@ -33,7 +34,7 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, res->bitmap[0] = res->bitmap[1] = 0; res->status = htonl(NFS4ERR_BADHANDLE); - dprintk("NFS: GETATTR callback request from %s\n", + dprintk_rcu("NFS: GETATTR callback request from %s\n", rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR)); inode = nfs_delegation_find_inode(cps->clp, &args->fh); @@ -73,7 +74,7 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy, if (!cps->clp) /* Always set for v4.0. Set in cb_sequence for v4.1 */ goto out; - dprintk("NFS: RECALL callback request from %s\n", + dprintk_rcu("NFS: RECALL callback request from %s\n", rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR)); res = htonl(NFS4ERR_BADHANDLE); @@ -86,8 +87,7 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy, res = 0; break; case -ENOENT: - if (res != 0) - res = htonl(NFS4ERR_BAD_STATEID); + res = htonl(NFS4ERR_BAD_STATEID); break; default: res = htonl(NFS4ERR_RESOURCE); @@ -98,52 +98,64 @@ out: return res; } -int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid) -{ - if (delegation == NULL || memcmp(delegation->stateid.data, stateid->data, - sizeof(delegation->stateid.data)) != 0) - return 0; - return 1; -} - #if defined(CONFIG_NFS_V4_1) -static u32 initiate_file_draining(struct nfs_client *clp, - struct cb_layoutrecallargs *args) +/* + * Lookup a layout by filehandle. + * + * Note: gets a refcount on the layout hdr and on its respective inode. + * Caller must put the layout hdr and the inode. + * + * TODO: keep track of all layouts (and delegations) in a hash table + * hashed by filehandle. + */ +static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, struct nfs_fh *fh) { struct nfs_server *server; - struct pnfs_layout_hdr *lo; struct inode *ino; - bool found = false; - u32 rv = NFS4ERR_NOMATCHING_LAYOUT; - LIST_HEAD(free_me_list); + struct pnfs_layout_hdr *lo; - spin_lock(&clp->cl_lock); - rcu_read_lock(); list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { list_for_each_entry(lo, &server->layouts, plh_layouts) { - if (nfs_compare_fh(&args->cbl_fh, - &NFS_I(lo->plh_inode)->fh)) + if (nfs_compare_fh(fh, &NFS_I(lo->plh_inode)->fh)) continue; ino = igrab(lo->plh_inode); if (!ino) continue; - found = true; - /* Without this, layout can be freed as soon - * as we release cl_lock. - */ get_layout_hdr(lo); - break; + return lo; } - if (found) - break; } + + return NULL; +} + +static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp, struct nfs_fh *fh) +{ + struct pnfs_layout_hdr *lo; + + spin_lock(&clp->cl_lock); + rcu_read_lock(); + lo = get_layout_by_fh_locked(clp, fh); rcu_read_unlock(); spin_unlock(&clp->cl_lock); - if (!found) + return lo; +} + +static u32 initiate_file_draining(struct nfs_client *clp, + struct cb_layoutrecallargs *args) +{ + struct inode *ino; + struct pnfs_layout_hdr *lo; + u32 rv = NFS4ERR_NOMATCHING_LAYOUT; + LIST_HEAD(free_me_list); + + lo = get_layout_by_fh(clp, &args->cbl_fh); + if (!lo) return NFS4ERR_NOMATCHING_LAYOUT; + ino = lo->plh_inode; spin_lock(&ino->i_lock); if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || mark_matching_lsegs_invalid(lo, &free_me_list, @@ -213,17 +225,13 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, static u32 do_callback_layoutrecall(struct nfs_client *clp, struct cb_layoutrecallargs *args) { - u32 res = NFS4ERR_DELAY; + u32 res; dprintk("%s enter, type=%i\n", __func__, args->cbl_recall_type); - if (test_and_set_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state)) - goto out; if (args->cbl_recall_type == RETURN_FILE) res = initiate_file_draining(clp, args); else res = initiate_bulk_draining(clp, args); - clear_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state); -out: dprintk("%s returning %i\n", __func__, res); return res; @@ -303,21 +311,6 @@ out: return res; } -int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid) -{ - if (delegation == NULL) - return 0; - - if (stateid->stateid.seqid != 0) - return 0; - if (memcmp(&delegation->stateid.stateid.other, - &stateid->stateid.other, - NFS4_STATEID_OTHER_SIZE)) - return 0; - - return 1; -} - /* * Validate the sequenceID sent by the server. * Return success if the sequenceID is one more than what we last saw on @@ -441,7 +434,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, int i; __be32 status = htonl(NFS4ERR_BADSESSION); - clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid); + clp = nfs4_find_client_sessionid(cps->net, args->csa_addr, &args->csa_sessionid); if (clp == NULL) goto out; @@ -517,7 +510,7 @@ __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy, if (!cps->clp) /* set in cb_sequence */ goto out; - dprintk("NFS: RECALL_ANY callback request from %s\n", + dprintk_rcu("NFS: RECALL_ANY callback request from %s\n", rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR)); status = cpu_to_be32(NFS4ERR_INVAL); @@ -552,7 +545,7 @@ __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args, void *dummy, if (!cps->clp) /* set in cb_sequence */ goto out; - dprintk("NFS: CB_RECALL_SLOT request from %s target max slots %d\n", + dprintk_rcu("NFS: CB_RECALL_SLOT request from %s target max slots %d\n", rpc_peeraddr2str(cps->clp->cl_rpcclient, RPC_DISPLAY_ADDR), args->crsa_target_max_slots); diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index d50b274..95bfc24 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -9,6 +9,8 @@ #include <linux/sunrpc/svc.h> #include <linux/nfs4.h> #include <linux/nfs_fs.h> +#include <linux/ratelimit.h> +#include <linux/printk.h> #include <linux/slab.h> #include <linux/sunrpc/bc_xprt.h> #include "nfs4_fs.h" @@ -73,7 +75,7 @@ static __be32 *read_buf(struct xdr_stream *xdr, int nbytes) p = xdr_inline_decode(xdr, nbytes); if (unlikely(p == NULL)) - printk(KERN_WARNING "NFSv4 callback reply buffer overflowed!\n"); + printk(KERN_WARNING "NFS: NFSv4 callback reply buffer overflowed!\n"); return p; } @@ -138,10 +140,10 @@ static __be32 decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) { __be32 *p; - p = read_buf(xdr, 16); + p = read_buf(xdr, NFS4_STATEID_SIZE); if (unlikely(p == NULL)) return htonl(NFS4ERR_RESOURCE); - memcpy(stateid->data, p, 16); + memcpy(stateid, p, NFS4_STATEID_SIZE); return 0; } @@ -155,7 +157,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound return status; /* We do not like overly long tags! */ if (hdr->taglen > CB_OP_TAGLEN_MAXSZ - 12) { - printk("NFSv4 CALLBACK %s: client sent tag of length %u\n", + printk("NFS: NFSv4 CALLBACK %s: client sent tag of length %u\n", __func__, hdr->taglen); return htonl(NFS4ERR_RESOURCE); } @@ -167,7 +169,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound if (hdr->minorversion <= 1) { hdr->cb_ident = ntohl(*p++); /* ignored by v4.1 */ } else { - printk(KERN_WARNING "%s: NFSv4 server callback with " + pr_warn_ratelimited("NFS: %s: NFSv4 server callback with " "illegal minor version %u!\n", __func__, hdr->minorversion); return htonl(NFS4ERR_MINOR_VERS_MISMATCH); @@ -759,14 +761,14 @@ static void nfs4_callback_free_slot(struct nfs4_session *session) * Let the state manager know callback processing done. * A single slot, so highest used slotid is either 0 or -1 */ - tbl->highest_used_slotid = -1; + tbl->highest_used_slotid = NFS4_NO_SLOT; nfs4_check_drain_bc_complete(session); spin_unlock(&tbl->slot_tbl_lock); } static void nfs4_cb_free_slot(struct cb_process_state *cps) { - if (cps->slotid != -1) + if (cps->slotid != NFS4_NO_SLOT) nfs4_callback_free_slot(cps->clp->cl_session); } @@ -860,7 +862,8 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r struct cb_process_state cps = { .drc_status = 0, .clp = NULL, - .slotid = -1, + .slotid = NFS4_NO_SLOT, + .net = rqstp->rq_xprt->xpt_net, }; unsigned int nops = 0; @@ -876,7 +879,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r return rpc_garbage_args; if (hdr_arg.minorversion == 0) { - cps.clp = nfs4_find_client_ident(hdr_arg.cb_ident); + cps.clp = nfs4_find_client_ident(rqstp->rq_xprt->xpt_net, hdr_arg.cb_ident); if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp)) return rpc_drop_reply; } diff --git a/fs/nfs/client.c b/fs/nfs/client.c index d4f772e..4a108a0 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -40,6 +40,8 @@ #include <net/ipv6.h> #include <linux/nfs_xdr.h> #include <linux/sunrpc/bc_xprt.h> +#include <linux/nsproxy.h> +#include <linux/pid_namespace.h> #include <asm/system.h> @@ -50,15 +52,12 @@ #include "internal.h" #include "fscache.h" #include "pnfs.h" +#include "netns.h" #define NFSDBG_FACILITY NFSDBG_CLIENT -static DEFINE_SPINLOCK(nfs_client_lock); -static LIST_HEAD(nfs_client_list); -static LIST_HEAD(nfs_volume_list); static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq); #ifdef CONFIG_NFS_V4 -static DEFINE_IDR(cb_ident_idr); /* Protected by nfs_client_lock */ /* * Get a unique NFSv4.0 callback identifier which will be used @@ -67,15 +66,16 @@ static DEFINE_IDR(cb_ident_idr); /* Protected by nfs_client_lock */ static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) { int ret = 0; + struct nfs_net *nn = net_generic(clp->net, nfs_net_id); if (clp->rpc_ops->version != 4 || minorversion != 0) return ret; retry: - if (!idr_pre_get(&cb_ident_idr, GFP_KERNEL)) + if (!idr_pre_get(&nn->cb_ident_idr, GFP_KERNEL)) return -ENOMEM; - spin_lock(&nfs_client_lock); - ret = idr_get_new(&cb_ident_idr, clp, &clp->cl_cb_ident); - spin_unlock(&nfs_client_lock); + spin_lock(&nn->nfs_client_lock); + ret = idr_get_new(&nn->cb_ident_idr, clp, &clp->cl_cb_ident); + spin_unlock(&nn->nfs_client_lock); if (ret == -EAGAIN) goto retry; return ret; @@ -90,7 +90,7 @@ static bool nfs4_disable_idmapping = true; /* * RPC cruft for NFS */ -static struct rpc_version *nfs_version[5] = { +static const struct rpc_version *nfs_version[5] = { [2] = &nfs_version2, #ifdef CONFIG_NFS_V3 [3] = &nfs_version3, @@ -100,7 +100,7 @@ static struct rpc_version *nfs_version[5] = { #endif }; -struct rpc_program nfs_program = { +const struct rpc_program nfs_program = { .name = "nfs", .number = NFS_PROGRAM, .nrvers = ARRAY_SIZE(nfs_version), @@ -116,11 +116,11 @@ struct rpc_stat nfs_rpcstat = { #ifdef CONFIG_NFS_V3_ACL static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; -static struct rpc_version * nfsacl_version[] = { +static const struct rpc_version *nfsacl_version[] = { [3] = &nfsacl_version3, }; -struct rpc_program nfsacl_program = { +const struct rpc_program nfsacl_program = { .name = "nfsacl", .number = NFS_ACL_PROGRAM, .nrvers = ARRAY_SIZE(nfsacl_version), @@ -136,6 +136,7 @@ struct nfs_client_initdata { const struct nfs_rpc_ops *rpc_ops; int proto; u32 minorversion; + struct net *net; }; /* @@ -172,6 +173,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ clp->cl_rpcclient = ERR_PTR(-EINVAL); clp->cl_proto = cl_init->proto; + clp->net = get_net(cl_init->net); #ifdef CONFIG_NFS_V4 err = nfs_get_cb_ident_idr(clp, cl_init->minorversion); @@ -203,8 +205,11 @@ error_0: #ifdef CONFIG_NFS_V4_1 static void nfs4_shutdown_session(struct nfs_client *clp) { - if (nfs4_has_session(clp)) + if (nfs4_has_session(clp)) { + nfs4_deviceid_purge_client(clp); nfs4_destroy_session(clp->cl_session); + } + } #else /* CONFIG_NFS_V4_1 */ static void nfs4_shutdown_session(struct nfs_client *clp) @@ -234,16 +239,20 @@ static void nfs4_shutdown_client(struct nfs_client *clp) } /* idr_remove_all is not needed as all id's are removed by nfs_put_client */ -void nfs_cleanup_cb_ident_idr(void) +void nfs_cleanup_cb_ident_idr(struct net *net) { - idr_destroy(&cb_ident_idr); + struct nfs_net *nn = net_generic(net, nfs_net_id); + + idr_destroy(&nn->cb_ident_idr); } /* nfs_client_lock held */ static void nfs_cb_idr_remove_locked(struct nfs_client *clp) { + struct nfs_net *nn = net_generic(clp->net, nfs_net_id); + if (clp->cl_cb_ident) - idr_remove(&cb_ident_idr, clp->cl_cb_ident); + idr_remove(&nn->cb_ident_idr, clp->cl_cb_ident); } static void pnfs_init_server(struct nfs_server *server) @@ -261,7 +270,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp) { } -void nfs_cleanup_cb_ident_idr(void) +void nfs_cleanup_cb_ident_idr(struct net *net) { } @@ -293,10 +302,10 @@ static void nfs_free_client(struct nfs_client *clp) if (clp->cl_machine_cred != NULL) put_rpccred(clp->cl_machine_cred); - nfs4_deviceid_purge_client(clp); - + put_net(clp->net); kfree(clp->cl_hostname); kfree(clp->server_scope); + kfree(clp->impl_id); kfree(clp); dprintk("<-- nfs_free_client()\n"); @@ -307,15 +316,18 @@ static void nfs_free_client(struct nfs_client *clp) */ void nfs_put_client(struct nfs_client *clp) { + struct nfs_net *nn; + if (!clp) return; dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count)); + nn = net_generic(clp->net, nfs_net_id); - if (atomic_dec_and_lock(&clp->cl_count, &nfs_client_lock)) { + if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) { list_del(&clp->cl_share_link); nfs_cb_idr_remove_locked(clp); - spin_unlock(&nfs_client_lock); + spin_unlock(&nn->nfs_client_lock); BUG_ON(!list_empty(&clp->cl_superblocks)); @@ -393,6 +405,7 @@ static int nfs_sockaddr_cmp_ip4(const struct sockaddr *sa1, (sin1->sin_port == sin2->sin_port); } +#if defined(CONFIG_NFS_V4_1) /* * Test if two socket addresses represent the same actual socket, * by comparing (only) relevant fields, excluding the port number. @@ -411,6 +424,7 @@ static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1, } return 0; } +#endif /* CONFIG_NFS_V4_1 */ /* * Test if two socket addresses represent the same actual socket, @@ -431,10 +445,10 @@ static int nfs_sockaddr_cmp(const struct sockaddr *sa1, return 0; } +#if defined(CONFIG_NFS_V4_1) /* Common match routine for v4.0 and v4.1 callback services */ -bool -nfs4_cb_match_client(const struct sockaddr *addr, struct nfs_client *clp, - u32 minorversion) +static bool nfs4_cb_match_client(const struct sockaddr *addr, + struct nfs_client *clp, u32 minorversion) { struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; @@ -454,6 +468,7 @@ nfs4_cb_match_client(const struct sockaddr *addr, struct nfs_client *clp, return true; } +#endif /* CONFIG_NFS_V4_1 */ /* * Find an nfs_client on the list that matches the initialisation data @@ -463,8 +478,9 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat { struct nfs_client *clp; const struct sockaddr *sap = data->addr; + struct nfs_net *nn = net_generic(data->net, nfs_net_id); - list_for_each_entry(clp, &nfs_client_list, cl_share_link) { + list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { const struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; /* Don't match clients that failed to initialise properly */ if (clp->cl_cons_state < 0) @@ -502,13 +518,14 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, { struct nfs_client *clp, *new = NULL; int error; + struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id); dprintk("--> nfs_get_client(%s,v%u)\n", cl_init->hostname ?: "", cl_init->rpc_ops->version); /* see if the client already exists */ do { - spin_lock(&nfs_client_lock); + spin_lock(&nn->nfs_client_lock); clp = nfs_match_client(cl_init); if (clp) @@ -516,7 +533,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, if (new) goto install_client; - spin_unlock(&nfs_client_lock); + spin_unlock(&nn->nfs_client_lock); new = nfs_alloc_client(cl_init); } while (!IS_ERR(new)); @@ -527,8 +544,8 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, /* install a new client and return with it unready */ install_client: clp = new; - list_add(&clp->cl_share_link, &nfs_client_list); - spin_unlock(&nfs_client_lock); + list_add(&clp->cl_share_link, &nn->nfs_client_list); + spin_unlock(&nn->nfs_client_lock); error = cl_init->rpc_ops->init_client(clp, timeparms, ip_addr, authflavour, noresvport); @@ -543,7 +560,7 @@ install_client: * - make sure it's ready before returning */ found_client: - spin_unlock(&nfs_client_lock); + spin_unlock(&nn->nfs_client_lock); if (new) nfs_free_client(new); @@ -643,7 +660,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp, { struct rpc_clnt *clnt = NULL; struct rpc_create_args args = { - .net = &init_net, + .net = clp->net, .protocol = clp->cl_proto, .address = (struct sockaddr *)&clp->cl_addr, .addrsize = clp->cl_addrlen, @@ -697,6 +714,7 @@ static int nfs_start_lockd(struct nfs_server *server) .nfs_version = clp->rpc_ops->version, .noresvport = server->flags & NFS_MOUNT_NORESVPORT ? 1 : 0, + .net = clp->net, }; if (nlm_init.nfs_version > 3) @@ -832,6 +850,7 @@ static int nfs_init_server(struct nfs_server *server, .addrlen = data->nfs_server.addrlen, .rpc_ops = &nfs_v2_clientops, .proto = data->nfs_server.protocol, + .net = data->net, }; struct rpc_timeout timeparms; struct nfs_client *clp; @@ -1030,25 +1049,30 @@ static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_serve static void nfs_server_insert_lists(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; + struct nfs_net *nn = net_generic(clp->net, nfs_net_id); - spin_lock(&nfs_client_lock); + spin_lock(&nn->nfs_client_lock); list_add_tail_rcu(&server->client_link, &clp->cl_superblocks); - list_add_tail(&server->master_link, &nfs_volume_list); + list_add_tail(&server->master_link, &nn->nfs_volume_list); clear_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state); - spin_unlock(&nfs_client_lock); + spin_unlock(&nn->nfs_client_lock); } static void nfs_server_remove_lists(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; + struct nfs_net *nn; - spin_lock(&nfs_client_lock); + if (clp == NULL) + return; + nn = net_generic(clp->net, nfs_net_id); + spin_lock(&nn->nfs_client_lock); list_del_rcu(&server->client_link); - if (clp && list_empty(&clp->cl_superblocks)) + if (list_empty(&clp->cl_superblocks)) set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state); list_del(&server->master_link); - spin_unlock(&nfs_client_lock); + spin_unlock(&nn->nfs_client_lock); synchronize_rcu(); } @@ -1087,6 +1111,8 @@ static struct nfs_server *nfs_alloc_server(void) return NULL; } + ida_init(&server->openowner_id); + ida_init(&server->lockowner_id); pnfs_init_server(server); return server; @@ -1112,6 +1138,8 @@ void nfs_free_server(struct nfs_server *server) nfs_put_client(server->nfs_client); + ida_destroy(&server->lockowner_id); + ida_destroy(&server->openowner_id); nfs_free_iostats(server->io_stats); bdi_destroy(&server->backing_dev_info); kfree(server); @@ -1190,45 +1218,19 @@ error: /* * NFSv4.0 callback thread helper * - * Find a client by IP address, protocol version, and minorversion - * - * Called from the pg_authenticate method. The callback identifier - * is not used as it has not been decoded. - * - * Returns NULL if no such client - */ -struct nfs_client * -nfs4_find_client_no_ident(const struct sockaddr *addr) -{ - struct nfs_client *clp; - - spin_lock(&nfs_client_lock); - list_for_each_entry(clp, &nfs_client_list, cl_share_link) { - if (nfs4_cb_match_client(addr, clp, 0) == false) - continue; - atomic_inc(&clp->cl_count); - spin_unlock(&nfs_client_lock); - return clp; - } - spin_unlock(&nfs_client_lock); - return NULL; -} - -/* - * NFSv4.0 callback thread helper - * * Find a client by callback identifier */ struct nfs_client * -nfs4_find_client_ident(int cb_ident) +nfs4_find_client_ident(struct net *net, int cb_ident) { struct nfs_client *clp; + struct nfs_net *nn = net_generic(net, nfs_net_id); - spin_lock(&nfs_client_lock); - clp = idr_find(&cb_ident_idr, cb_ident); + spin_lock(&nn->nfs_client_lock); + clp = idr_find(&nn->cb_ident_idr, cb_ident); if (clp) atomic_inc(&clp->cl_count); - spin_unlock(&nfs_client_lock); + spin_unlock(&nn->nfs_client_lock); return clp; } @@ -1241,13 +1243,14 @@ nfs4_find_client_ident(int cb_ident) * Returns NULL if no such client */ struct nfs_client * -nfs4_find_client_sessionid(const struct sockaddr *addr, +nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, struct nfs4_sessionid *sid) { struct nfs_client *clp; + struct nfs_net *nn = net_generic(net, nfs_net_id); - spin_lock(&nfs_client_lock); - list_for_each_entry(clp, &nfs_client_list, cl_share_link) { + spin_lock(&nn->nfs_client_lock); + list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { if (nfs4_cb_match_client(addr, clp, 1) == false) continue; @@ -1260,17 +1263,17 @@ nfs4_find_client_sessionid(const struct sockaddr *addr, continue; atomic_inc(&clp->cl_count); - spin_unlock(&nfs_client_lock); + spin_unlock(&nn->nfs_client_lock); return clp; } - spin_unlock(&nfs_client_lock); + spin_unlock(&nn->nfs_client_lock); return NULL; } #else /* CONFIG_NFS_V4_1 */ struct nfs_client * -nfs4_find_client_sessionid(const struct sockaddr *addr, +nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, struct nfs4_sessionid *sid) { return NULL; @@ -1285,16 +1288,18 @@ static int nfs4_init_callback(struct nfs_client *clp) int error; if (clp->rpc_ops->version == 4) { + struct rpc_xprt *xprt; + + xprt = rcu_dereference_raw(clp->cl_rpcclient->cl_xprt); + if (nfs4_has_session(clp)) { - error = xprt_setup_backchannel( - clp->cl_rpcclient->cl_xprt, + error = xprt_setup_backchannel(xprt, NFS41_BC_MIN_CALLBACKS); if (error < 0) return error; } - error = nfs_callback_up(clp->cl_mvops->minor_version, - clp->cl_rpcclient->cl_xprt); + error = nfs_callback_up(clp->cl_mvops->minor_version, xprt); if (error < 0) { dprintk("%s: failed to start callback. Error = %d\n", __func__, error); @@ -1345,6 +1350,7 @@ int nfs4_init_client(struct nfs_client *clp, rpc_authflavor_t authflavour, int noresvport) { + char buf[INET6_ADDRSTRLEN + 1]; int error; if (clp->cl_cons_state == NFS_CS_READY) { @@ -1360,6 +1366,20 @@ int nfs4_init_client(struct nfs_client *clp, 1, noresvport); if (error < 0) goto error; + + /* If no clientaddr= option was specified, find a usable cb address */ + if (ip_addr == NULL) { + struct sockaddr_storage cb_addr; + struct sockaddr *sap = (struct sockaddr *)&cb_addr; + + error = rpc_localaddr(clp->cl_rpcclient, sap, sizeof(cb_addr)); + if (error < 0) + goto error; + error = rpc_ntop(sap, buf, sizeof(buf)); + if (error < 0) + goto error; + ip_addr = (const char *)buf; + } strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr)); error = nfs_idmap_new(clp); @@ -1394,7 +1414,7 @@ static int nfs4_set_client(struct nfs_server *server, const char *ip_addr, rpc_authflavor_t authflavour, int proto, const struct rpc_timeout *timeparms, - u32 minorversion) + u32 minorversion, struct net *net) { struct nfs_client_initdata cl_init = { .hostname = hostname, @@ -1403,6 +1423,7 @@ static int nfs4_set_client(struct nfs_server *server, .rpc_ops = &nfs_v4_clientops, .proto = proto, .minorversion = minorversion, + .net = net, }; struct nfs_client *clp; int error; @@ -1454,6 +1475,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, .rpc_ops = &nfs_v4_clientops, .proto = ds_proto, .minorversion = mds_clp->cl_minorversion, + .net = mds_clp->net, }; struct rpc_timeout ds_timeout = { .to_initval = 15 * HZ, @@ -1581,7 +1603,8 @@ static int nfs4_init_server(struct nfs_server *server, data->auth_flavors[0], data->nfs_server.protocol, &timeparms, - data->minorversion); + data->minorversion, + data->net); if (error < 0) goto error; @@ -1676,9 +1699,10 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, data->addrlen, parent_client->cl_ipaddr, data->authflavor, - parent_server->client->cl_xprt->prot, + rpc_protocol(parent_server->client), parent_server->client->cl_timeout, - parent_client->cl_mvops->minor_version); + parent_client->cl_mvops->minor_version, + parent_client->net); if (error < 0) goto error; @@ -1771,6 +1795,18 @@ out_free_server: return ERR_PTR(error); } +void nfs_clients_init(struct net *net) +{ + struct nfs_net *nn = net_generic(net, nfs_net_id); + + INIT_LIST_HEAD(&nn->nfs_client_list); + INIT_LIST_HEAD(&nn->nfs_volume_list); +#ifdef CONFIG_NFS_V4 + idr_init(&nn->cb_ident_idr); +#endif + spin_lock_init(&nn->nfs_client_lock); +} + #ifdef CONFIG_PROC_FS static struct proc_dir_entry *proc_fs_nfs; @@ -1824,13 +1860,15 @@ static int nfs_server_list_open(struct inode *inode, struct file *file) { struct seq_file *m; int ret; + struct pid_namespace *pid_ns = file->f_dentry->d_sb->s_fs_info; + struct net *net = pid_ns->child_reaper->nsproxy->net_ns; ret = seq_open(file, &nfs_server_list_ops); if (ret < 0) return ret; m = file->private_data; - m->private = PDE(inode)->data; + m->private = net; return 0; } @@ -1840,9 +1878,11 @@ static int nfs_server_list_open(struct inode *inode, struct file *file) */ static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) { + struct nfs_net *nn = net_generic(m->private, nfs_net_id); + /* lock the list against modification */ - spin_lock(&nfs_client_lock); - return seq_list_start_head(&nfs_client_list, *_pos); + spin_lock(&nn->nfs_client_lock); + return seq_list_start_head(&nn->nfs_client_list, *_pos); } /* @@ -1850,7 +1890,9 @@ static void *nfs_server_list_start(struct seq_file *m, loff_t *_pos) */ static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos) { - return seq_list_next(v, &nfs_client_list, pos); + struct nfs_net *nn = net_generic(p->private, nfs_net_id); + + return seq_list_next(v, &nn->nfs_client_list, pos); } /* @@ -1858,7 +1900,9 @@ static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos) */ static void nfs_server_list_stop(struct seq_file *p, void *v) { - spin_unlock(&nfs_client_lock); + struct nfs_net *nn = net_generic(p->private, nfs_net_id); + + spin_unlock(&nn->nfs_client_lock); } /* @@ -1867,9 +1911,10 @@ static void nfs_server_list_stop(struct seq_file *p, void *v) static int nfs_server_list_show(struct seq_file *m, void *v) { struct nfs_client *clp; + struct nfs_net *nn = net_generic(m->private, nfs_net_id); /* display header on line 1 */ - if (v == &nfs_client_list) { + if (v == &nn->nfs_client_list) { seq_puts(m, "NV SERVER PORT USE HOSTNAME\n"); return 0; } @@ -1881,12 +1926,14 @@ static int nfs_server_list_show(struct seq_file *m, void *v) if (clp->cl_cons_state != NFS_CS_READY) return 0; + rcu_read_lock(); seq_printf(m, "v%u %s %s %3d %s\n", clp->rpc_ops->version, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT), atomic_read(&clp->cl_count), clp->cl_hostname); + rcu_read_unlock(); return 0; } @@ -1898,13 +1945,15 @@ static int nfs_volume_list_open(struct inode *inode, struct file *file) { struct seq_file *m; int ret; + struct pid_namespace *pid_ns = file->f_dentry->d_sb->s_fs_info; + struct net *net = pid_ns->child_reaper->nsproxy->net_ns; ret = seq_open(file, &nfs_volume_list_ops); if (ret < 0) return ret; m = file->private_data; - m->private = PDE(inode)->data; + m->private = net; return 0; } @@ -1914,9 +1963,11 @@ static int nfs_volume_list_open(struct inode *inode, struct file *file) */ static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) { + struct nfs_net *nn = net_generic(m->private, nfs_net_id); + /* lock the list against modification */ - spin_lock(&nfs_client_lock); - return seq_list_start_head(&nfs_volume_list, *_pos); + spin_lock(&nn->nfs_client_lock); + return seq_list_start_head(&nn->nfs_volume_list, *_pos); } /* @@ -1924,7 +1975,9 @@ static void *nfs_volume_list_start(struct seq_file *m, loff_t *_pos) */ static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos) { - return seq_list_next(v, &nfs_volume_list, pos); + struct nfs_net *nn = net_generic(p->private, nfs_net_id); + + return seq_list_next(v, &nn->nfs_volume_list, pos); } /* @@ -1932,7 +1985,9 @@ static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos) */ static void nfs_volume_list_stop(struct seq_file *p, void *v) { - spin_unlock(&nfs_client_lock); + struct nfs_net *nn = net_generic(p->private, nfs_net_id); + + spin_unlock(&nn->nfs_client_lock); } /* @@ -1943,9 +1998,10 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) struct nfs_server *server; struct nfs_client *clp; char dev[8], fsid[17]; + struct nfs_net *nn = net_generic(m->private, nfs_net_id); /* display header on line 1 */ - if (v == &nfs_volume_list) { + if (v == &nn->nfs_volume_list) { seq_puts(m, "NV SERVER PORT DEV FSID FSC\n"); return 0; } @@ -1960,6 +2016,7 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) (unsigned long long) server->fsid.major, (unsigned long long) server->fsid.minor); + rcu_read_lock(); seq_printf(m, "v%u %s %s %-7s %-17s %s\n", clp->rpc_ops->version, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), @@ -1967,6 +2024,7 @@ static int nfs_volume_list_show(struct seq_file *m, void *v) dev, fsid, nfs_server_fscache_state(server)); + rcu_read_unlock(); return 0; } diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 7f26540..89af1d2 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -105,7 +105,7 @@ again: continue; if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) continue; - if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0) + if (!nfs4_stateid_match(&state->stateid, stateid)) continue; get_nfs_open_context(ctx); spin_unlock(&inode->i_lock); @@ -139,8 +139,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, if (delegation != NULL) { spin_lock(&delegation->lock); if (delegation->inode != NULL) { - memcpy(delegation->stateid.data, res->delegation.data, - sizeof(delegation->stateid.data)); + nfs4_stateid_copy(&delegation->stateid, &res->delegation); delegation->type = res->delegation_type; delegation->maxsize = res->maxsize; oldcred = delegation->cred; @@ -236,8 +235,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct delegation = kmalloc(sizeof(*delegation), GFP_NOFS); if (delegation == NULL) return -ENOMEM; - memcpy(delegation->stateid.data, res->delegation.data, - sizeof(delegation->stateid.data)); + nfs4_stateid_copy(&delegation->stateid, &res->delegation); delegation->type = res->delegation_type; delegation->maxsize = res->maxsize; delegation->change_attr = inode->i_version; @@ -250,19 +248,22 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct old_delegation = rcu_dereference_protected(nfsi->delegation, lockdep_is_held(&clp->cl_lock)); if (old_delegation != NULL) { - if (memcmp(&delegation->stateid, &old_delegation->stateid, - sizeof(old_delegation->stateid)) == 0 && + if (nfs4_stateid_match(&delegation->stateid, + &old_delegation->stateid) && delegation->type == old_delegation->type) { goto out; } /* * Deal with broken servers that hand out two * delegations for the same file. + * Allow for upgrades to a WRITE delegation, but + * nothing else. */ dfprintk(FILE, "%s: server %s handed out " "a duplicate delegation!\n", __func__, clp->cl_hostname); - if (delegation->type <= old_delegation->type) { + if (delegation->type == old_delegation->type || + !(delegation->type & FMODE_WRITE)) { freeme = delegation; delegation = NULL; goto out; @@ -455,17 +456,24 @@ static void nfs_client_mark_return_all_delegation_types(struct nfs_client *clp, rcu_read_unlock(); } -static void nfs_client_mark_return_all_delegations(struct nfs_client *clp) -{ - nfs_client_mark_return_all_delegation_types(clp, FMODE_READ|FMODE_WRITE); -} - static void nfs_delegation_run_state_manager(struct nfs_client *clp) { if (test_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) nfs4_schedule_state_manager(clp); } +void nfs_remove_bad_delegation(struct inode *inode) +{ + struct nfs_delegation *delegation; + + delegation = nfs_detach_delegation(NFS_I(inode), NFS_SERVER(inode)); + if (delegation) { + nfs_inode_find_state_and_recover(inode, &delegation->stateid); + nfs_free_delegation(delegation); + } +} +EXPORT_SYMBOL_GPL(nfs_remove_bad_delegation); + /** * nfs_expire_all_delegation_types * @clp: client to process @@ -488,18 +496,6 @@ void nfs_expire_all_delegations(struct nfs_client *clp) nfs_expire_all_delegation_types(clp, FMODE_READ|FMODE_WRITE); } -/** - * nfs_handle_cb_pathdown - return all delegations after NFS4ERR_CB_PATH_DOWN - * @clp: client to process - * - */ -void nfs_handle_cb_pathdown(struct nfs_client *clp) -{ - if (clp == NULL) - return; - nfs_client_mark_return_all_delegations(clp); -} - static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server) { struct nfs_delegation *delegation; @@ -531,7 +527,7 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp) /** * nfs_async_inode_return_delegation - asynchronously return a delegation * @inode: inode to process - * @stateid: state ID information from CB_RECALL arguments + * @stateid: state ID information * * Returns zero on success, or a negative errno value. */ @@ -545,7 +541,7 @@ int nfs_async_inode_return_delegation(struct inode *inode, rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); - if (!clp->cl_mvops->validate_stateid(delegation, stateid)) { + if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) { rcu_read_unlock(); return -ENOENT; } @@ -684,21 +680,25 @@ int nfs_delegations_present(struct nfs_client *clp) * nfs4_copy_delegation_stateid - Copy inode's state ID information * @dst: stateid data structure to fill in * @inode: inode to check + * @flags: delegation type requirement * - * Returns one and fills in "dst->data" * if inode had a delegation, - * otherwise zero is returned. + * Returns "true" and fills in "dst->data" * if inode had a delegation, + * otherwise "false" is returned. */ -int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode) +bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, + fmode_t flags) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_delegation *delegation; - int ret = 0; + bool ret; + flags &= FMODE_READ|FMODE_WRITE; rcu_read_lock(); delegation = rcu_dereference(nfsi->delegation); - if (delegation != NULL) { - memcpy(dst->data, delegation->stateid.data, sizeof(dst->data)); - ret = 1; + ret = (delegation != NULL && (delegation->type & flags) == flags); + if (ret) { + nfs4_stateid_copy(dst, &delegation->stateid); + nfs_mark_delegation_referenced(delegation); } rcu_read_unlock(); return ret; diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index d9322e4..cd6a7a8 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -42,9 +42,9 @@ void nfs_super_return_all_delegations(struct super_block *sb); void nfs_expire_all_delegations(struct nfs_client *clp); void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags); void nfs_expire_unreferenced_delegations(struct nfs_client *clp); -void nfs_handle_cb_pathdown(struct nfs_client *clp); int nfs_client_return_marked_delegations(struct nfs_client *clp); int nfs_delegations_present(struct nfs_client *clp); +void nfs_remove_bad_delegation(struct inode *inode); void nfs_delegation_mark_reclaim(struct nfs_client *clp); void nfs_delegation_reap_unclaimed(struct nfs_client *clp); @@ -53,7 +53,7 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp); int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync); int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid); int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); -int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode); +bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_t flags); void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); int nfs_have_delegation(struct inode *inode, fmode_t flags); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 32aa691..4aaf031 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -207,7 +207,7 @@ struct nfs_cache_array_entry { }; struct nfs_cache_array { - unsigned int size; + int size; int eof_index; u64 last_cookie; struct nfs_cache_array_entry array[0]; @@ -1429,6 +1429,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry } open_flags = nd->intent.open.flags; + attr.ia_valid = 0; ctx = create_nfs_open_context(dentry, open_flags); res = ERR_CAST(ctx); @@ -1437,11 +1438,14 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry if (nd->flags & LOOKUP_CREATE) { attr.ia_mode = nd->intent.open.create_mode; - attr.ia_valid = ATTR_MODE; + attr.ia_valid |= ATTR_MODE; attr.ia_mode &= ~current_umask(); - } else { + } else open_flags &= ~(O_EXCL | O_CREAT); - attr.ia_valid = 0; + + if (open_flags & O_TRUNC) { + attr.ia_valid |= ATTR_SIZE; + attr.ia_size = 0; } /* Open the file on the server */ @@ -1495,6 +1499,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) struct inode *inode; struct inode *dir; struct nfs_open_context *ctx; + struct iattr attr; int openflags, ret = 0; if (nd->flags & LOOKUP_RCU) @@ -1523,19 +1528,27 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) /* We cannot do exclusive creation on a positive dentry */ if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) goto no_open_dput; - /* We can't create new files, or truncate existing ones here */ - openflags &= ~(O_CREAT|O_EXCL|O_TRUNC); + /* We can't create new files here */ + openflags &= ~(O_CREAT|O_EXCL); ctx = create_nfs_open_context(dentry, openflags); ret = PTR_ERR(ctx); if (IS_ERR(ctx)) goto out; + + attr.ia_valid = 0; + if (openflags & O_TRUNC) { + attr.ia_valid |= ATTR_SIZE; + attr.ia_size = 0; + nfs_wb_all(inode); + } + /* * Note: we're not holding inode->i_mutex and so may be racing with * operations that change the directory. We therefore save the * change attribute *before* we do the RPC call. */ - inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, NULL); + inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr); if (IS_ERR(inode)) { ret = PTR_ERR(inode); switch (ret) { diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 1940f1a..9c7f66a 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -265,9 +265,7 @@ static void nfs_direct_read_release(void *calldata) } static const struct rpc_call_ops nfs_read_direct_ops = { -#if defined(CONFIG_NFS_V4_1) .rpc_call_prepare = nfs_read_prepare, -#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_direct_read_result, .rpc_release = nfs_direct_read_release, }; @@ -554,9 +552,7 @@ static void nfs_direct_commit_release(void *calldata) } static const struct rpc_call_ops nfs_commit_direct_ops = { -#if defined(CONFIG_NFS_V4_1) .rpc_call_prepare = nfs_write_prepare, -#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_direct_commit_result, .rpc_release = nfs_direct_commit_release, }; @@ -696,9 +692,7 @@ out_unlock: } static const struct rpc_call_ops nfs_write_direct_ops = { -#if defined(CONFIG_NFS_V4_1) .rpc_call_prepare = nfs_write_prepare, -#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_direct_write_result, .rpc_release = nfs_direct_write_release, }; diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index a6e711a..b3924b8 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -10,8 +10,9 @@ #include <linux/sunrpc/clnt.h> #include <linux/dns_resolver.h> +#include "dns_resolve.h" -ssize_t nfs_dns_resolve_name(char *name, size_t namelen, +ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, struct sockaddr *sa, size_t salen) { ssize_t ret; @@ -20,7 +21,7 @@ ssize_t nfs_dns_resolve_name(char *name, size_t namelen, ip_len = dns_query(NULL, name, namelen, NULL, &ip_addr, NULL); if (ip_len > 0) - ret = rpc_pton(ip_addr, ip_len, sa, salen); + ret = rpc_pton(net, ip_addr, ip_len, sa, salen); else ret = -ESRCH; kfree(ip_addr); @@ -40,15 +41,15 @@ ssize_t nfs_dns_resolve_name(char *name, size_t namelen, #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/cache.h> #include <linux/sunrpc/svcauth.h> +#include <linux/sunrpc/rpc_pipe_fs.h> #include "dns_resolve.h" #include "cache_lib.h" +#include "netns.h" #define NFS_DNS_HASHBITS 4 #define NFS_DNS_HASHTBL_SIZE (1 << NFS_DNS_HASHBITS) -static struct cache_head *nfs_dns_table[NFS_DNS_HASHTBL_SIZE]; - struct nfs_dns_ent { struct cache_head h; @@ -224,7 +225,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen) len = qword_get(&buf, buf1, sizeof(buf1)); if (len <= 0) goto out; - key.addrlen = rpc_pton(buf1, len, + key.addrlen = rpc_pton(cd->net, buf1, len, (struct sockaddr *)&key.addr, sizeof(key.addr)); @@ -259,21 +260,6 @@ out: return ret; } -static struct cache_detail nfs_dns_resolve = { - .owner = THIS_MODULE, - .hash_size = NFS_DNS_HASHTBL_SIZE, - .hash_table = nfs_dns_table, - .name = "dns_resolve", - .cache_put = nfs_dns_ent_put, - .cache_upcall = nfs_dns_upcall, - .cache_parse = nfs_dns_parse, - .cache_show = nfs_dns_show, - .match = nfs_dns_match, - .init = nfs_dns_ent_init, - .update = nfs_dns_ent_update, - .alloc = nfs_dns_ent_alloc, -}; - static int do_cache_lookup(struct cache_detail *cd, struct nfs_dns_ent *key, struct nfs_dns_ent **item, @@ -336,8 +322,8 @@ out: return ret; } -ssize_t nfs_dns_resolve_name(char *name, size_t namelen, - struct sockaddr *sa, size_t salen) +ssize_t nfs_dns_resolve_name(struct net *net, char *name, + size_t namelen, struct sockaddr *sa, size_t salen) { struct nfs_dns_ent key = { .hostname = name, @@ -345,28 +331,118 @@ ssize_t nfs_dns_resolve_name(char *name, size_t namelen, }; struct nfs_dns_ent *item = NULL; ssize_t ret; + struct nfs_net *nn = net_generic(net, nfs_net_id); - ret = do_cache_lookup_wait(&nfs_dns_resolve, &key, &item); + ret = do_cache_lookup_wait(nn->nfs_dns_resolve, &key, &item); if (ret == 0) { if (salen >= item->addrlen) { memcpy(sa, &item->addr, item->addrlen); ret = item->addrlen; } else ret = -EOVERFLOW; - cache_put(&item->h, &nfs_dns_resolve); + cache_put(&item->h, nn->nfs_dns_resolve); } else if (ret == -ENOENT) ret = -ESRCH; return ret; } +int nfs_dns_resolver_cache_init(struct net *net) +{ + int err = -ENOMEM; + struct nfs_net *nn = net_generic(net, nfs_net_id); + struct cache_detail *cd; + struct cache_head **tbl; + + cd = kzalloc(sizeof(struct cache_detail), GFP_KERNEL); + if (cd == NULL) + goto err_cd; + + tbl = kzalloc(NFS_DNS_HASHTBL_SIZE * sizeof(struct cache_head *), + GFP_KERNEL); + if (tbl == NULL) + goto err_tbl; + + cd->owner = THIS_MODULE, + cd->hash_size = NFS_DNS_HASHTBL_SIZE, + cd->hash_table = tbl, + cd->name = "dns_resolve", + cd->cache_put = nfs_dns_ent_put, + cd->cache_upcall = nfs_dns_upcall, + cd->cache_parse = nfs_dns_parse, + cd->cache_show = nfs_dns_show, + cd->match = nfs_dns_match, + cd->init = nfs_dns_ent_init, + cd->update = nfs_dns_ent_update, + cd->alloc = nfs_dns_ent_alloc, + + nfs_cache_init(cd); + err = nfs_cache_register_net(net, cd); + if (err) + goto err_reg; + nn->nfs_dns_resolve = cd; + return 0; + +err_reg: + nfs_cache_destroy(cd); + kfree(cd->hash_table); +err_tbl: + kfree(cd); +err_cd: + return err; +} + +void nfs_dns_resolver_cache_destroy(struct net *net) +{ + struct nfs_net *nn = net_generic(net, nfs_net_id); + struct cache_detail *cd = nn->nfs_dns_resolve; + + nfs_cache_unregister_net(net, cd); + nfs_cache_destroy(cd); + kfree(cd->hash_table); + kfree(cd); +} + +static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, + void *ptr) +{ + struct super_block *sb = ptr; + struct net *net = sb->s_fs_info; + struct nfs_net *nn = net_generic(net, nfs_net_id); + struct cache_detail *cd = nn->nfs_dns_resolve; + int ret = 0; + + if (cd == NULL) + return 0; + + if (!try_module_get(THIS_MODULE)) + return 0; + + switch (event) { + case RPC_PIPEFS_MOUNT: + ret = nfs_cache_register_sb(sb, cd); + break; + case RPC_PIPEFS_UMOUNT: + nfs_cache_unregister_sb(sb, cd); + break; + default: + ret = -ENOTSUPP; + break; + } + module_put(THIS_MODULE); + return ret; +} + +static struct notifier_block nfs_dns_resolver_block = { + .notifier_call = rpc_pipefs_event, +}; + int nfs_dns_resolver_init(void) { - return nfs_cache_register(&nfs_dns_resolve); + return rpc_pipefs_notifier_register(&nfs_dns_resolver_block); } void nfs_dns_resolver_destroy(void) { - nfs_cache_unregister(&nfs_dns_resolve); + rpc_pipefs_notifier_unregister(&nfs_dns_resolver_block); } - #endif diff --git a/fs/nfs/dns_resolve.h b/fs/nfs/dns_resolve.h index 199bb55..2e4f596 100644 --- a/fs/nfs/dns_resolve.h +++ b/fs/nfs/dns_resolve.h @@ -15,12 +15,22 @@ static inline int nfs_dns_resolver_init(void) static inline void nfs_dns_resolver_destroy(void) {} + +static inline int nfs_dns_resolver_cache_init(struct net *net) +{ + return 0; +} + +static inline void nfs_dns_resolver_cache_destroy(struct net *net) +{} #else extern int nfs_dns_resolver_init(void); extern void nfs_dns_resolver_destroy(void); +extern int nfs_dns_resolver_cache_init(struct net *net); +extern void nfs_dns_resolver_cache_destroy(struct net *net); #endif -extern ssize_t nfs_dns_resolve_name(char *name, size_t namelen, - struct sockaddr *sa, size_t salen); +extern ssize_t nfs_dns_resolve_name(struct net *net, char *name, + size_t namelen, struct sockaddr *sa, size_t salen); #endif diff --git a/fs/nfs/file.c b/fs/nfs/file.c index c43a452..4fdaaa6 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -530,6 +530,8 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) if (mapping != dentry->d_inode->i_mapping) goto out_unlock; + wait_on_page_writeback(page); + pagelen = nfs_page_length(page); if (pagelen == 0) goto out_unlock; diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 419119c..ae65c16 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -327,7 +327,7 @@ void nfs_fscache_reset_inode_cookie(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_server *nfss = NFS_SERVER(inode); - struct fscache_cookie *old = nfsi->fscache; + NFS_IFDEBUG(struct fscache_cookie *old = nfsi->fscache); nfs_fscache_inode_lock(inode); if (nfsi->fscache) { diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index a1bbf77..b7f348b 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -34,11 +34,29 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include <linux/types.h> -#include <linux/string.h> -#include <linux/kernel.h> -#include <linux/slab.h> +#include <linux/parser.h> +#include <linux/fs.h> #include <linux/nfs_idmap.h> +#include <net/net_namespace.h> +#include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/nfs_fs.h> +#include <linux/nfs_fs_sb.h> +#include <linux/key.h> +#include <linux/keyctl.h> +#include <linux/key-type.h> +#include <keys/user-type.h> +#include <linux/module.h> + +#include "internal.h" +#include "netns.h" + +#define NFS_UINT_MAXLEN 11 + +/* Default cache timeout is 10 minutes */ +unsigned int nfs_idmap_cache_timeout = 600; +static const struct cred *id_resolver_cache; +static struct key_type key_type_id_resolver_legacy; + /** * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields @@ -142,24 +160,7 @@ static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen) return snprintf(buf, buflen, "%u", id); } -#ifdef CONFIG_NFS_USE_NEW_IDMAPPER - -#include <linux/cred.h> -#include <linux/sunrpc/sched.h> -#include <linux/nfs4.h> -#include <linux/nfs_fs_sb.h> -#include <linux/keyctl.h> -#include <linux/key-type.h> -#include <linux/rcupdate.h> -#include <linux/err.h> - -#include <keys/user-type.h> - -#define NFS_UINT_MAXLEN 11 - -const struct cred *id_resolver_cache; - -struct key_type key_type_id_resolver = { +static struct key_type key_type_id_resolver = { .name = "id_resolver", .instantiate = user_instantiate, .match = user_match, @@ -169,13 +170,14 @@ struct key_type key_type_id_resolver = { .read = user_read, }; -int nfs_idmap_init(void) +static int nfs_idmap_init_keyring(void) { struct cred *cred; struct key *keyring; int ret = 0; - printk(KERN_NOTICE "Registering the %s key type\n", key_type_id_resolver.name); + printk(KERN_NOTICE "NFS: Registering the %s key type\n", + key_type_id_resolver.name); cred = prepare_kernel_cred(NULL); if (!cred) @@ -211,7 +213,7 @@ failed_put_cred: return ret; } -void nfs_idmap_quit(void) +static void nfs_idmap_quit_keyring(void) { key_revoke(id_resolver_cache->thread_keyring); unregister_key_type(&key_type_id_resolver); @@ -246,8 +248,10 @@ static ssize_t nfs_idmap_get_desc(const char *name, size_t namelen, return desclen; } -static ssize_t nfs_idmap_request_key(const char *name, size_t namelen, - const char *type, void *data, size_t data_size) +static ssize_t nfs_idmap_request_key(struct key_type *key_type, + const char *name, size_t namelen, + const char *type, void *data, + size_t data_size, struct idmap *idmap) { const struct cred *saved_cred; struct key *rkey; @@ -260,8 +264,12 @@ static ssize_t nfs_idmap_request_key(const char *name, size_t namelen, goto out; saved_cred = override_creds(id_resolver_cache); - rkey = request_key(&key_type_id_resolver, desc, ""); + if (idmap) + rkey = request_key_with_auxdata(key_type, desc, "", 0, idmap); + else + rkey = request_key(&key_type_id_resolver, desc, ""); revert_creds(saved_cred); + kfree(desc); if (IS_ERR(rkey)) { ret = PTR_ERR(rkey); @@ -294,31 +302,46 @@ out: return ret; } +static ssize_t nfs_idmap_get_key(const char *name, size_t namelen, + const char *type, void *data, + size_t data_size, struct idmap *idmap) +{ + ssize_t ret = nfs_idmap_request_key(&key_type_id_resolver, + name, namelen, type, data, + data_size, NULL); + if (ret < 0) { + ret = nfs_idmap_request_key(&key_type_id_resolver_legacy, + name, namelen, type, data, + data_size, idmap); + } + return ret; +} /* ID -> Name */ -static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf, size_t buflen) +static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf, + size_t buflen, struct idmap *idmap) { char id_str[NFS_UINT_MAXLEN]; int id_len; ssize_t ret; id_len = snprintf(id_str, sizeof(id_str), "%u", id); - ret = nfs_idmap_request_key(id_str, id_len, type, buf, buflen); + ret = nfs_idmap_get_key(id_str, id_len, type, buf, buflen, idmap); if (ret < 0) return -EINVAL; return ret; } /* Name -> ID */ -static int nfs_idmap_lookup_id(const char *name, size_t namelen, - const char *type, __u32 *id) +static int nfs_idmap_lookup_id(const char *name, size_t namelen, const char *type, + __u32 *id, struct idmap *idmap) { char id_str[NFS_UINT_MAXLEN]; long id_long; ssize_t data_size; int ret = 0; - data_size = nfs_idmap_request_key(name, namelen, type, id_str, NFS_UINT_MAXLEN); + data_size = nfs_idmap_get_key(name, namelen, type, id_str, NFS_UINT_MAXLEN, idmap); if (data_size <= 0) { ret = -EINVAL; } else { @@ -328,114 +351,103 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen, return ret; } -int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) -{ - if (nfs_map_string_to_numeric(name, namelen, uid)) - return 0; - return nfs_idmap_lookup_id(name, namelen, "uid", uid); -} - -int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid) -{ - if (nfs_map_string_to_numeric(name, namelen, gid)) - return 0; - return nfs_idmap_lookup_id(name, namelen, "gid", gid); -} - -int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen) -{ - int ret = -EINVAL; - - if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) - ret = nfs_idmap_lookup_name(uid, "user", buf, buflen); - if (ret < 0) - ret = nfs_map_numeric_to_string(uid, buf, buflen); - return ret; -} -int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen) -{ - int ret = -EINVAL; +/* idmap classic begins here */ +module_param(nfs_idmap_cache_timeout, int, 0644); - if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) - ret = nfs_idmap_lookup_name(gid, "group", buf, buflen); - if (ret < 0) - ret = nfs_map_numeric_to_string(gid, buf, buflen); - return ret; -} - -#else /* CONFIG_NFS_USE_NEW_IDMAPPER not defined */ - -#include <linux/module.h> -#include <linux/mutex.h> -#include <linux/init.h> -#include <linux/socket.h> -#include <linux/in.h> -#include <linux/sched.h> -#include <linux/sunrpc/clnt.h> -#include <linux/workqueue.h> -#include <linux/sunrpc/rpc_pipe_fs.h> - -#include <linux/nfs_fs.h> - -#include "nfs4_fs.h" - -#define IDMAP_HASH_SZ 128 - -/* Default cache timeout is 10 minutes */ -unsigned int nfs_idmap_cache_timeout = 600 * HZ; - -static int param_set_idmap_timeout(const char *val, struct kernel_param *kp) -{ - char *endp; - int num = simple_strtol(val, &endp, 0); - int jif = num * HZ; - if (endp == val || *endp || num < 0 || jif < num) - return -EINVAL; - *((int *)kp->arg) = jif; - return 0; -} - -module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int, - &nfs_idmap_cache_timeout, 0644); - -struct idmap_hashent { - unsigned long ih_expires; - __u32 ih_id; - size_t ih_namelen; - char ih_name[IDMAP_NAMESZ]; +struct idmap { + struct rpc_pipe *idmap_pipe; + struct key_construction *idmap_key_cons; }; -struct idmap_hashtable { - __u8 h_type; - struct idmap_hashent h_entries[IDMAP_HASH_SZ]; +enum { + Opt_find_uid, Opt_find_gid, Opt_find_user, Opt_find_group, Opt_find_err }; -struct idmap { - struct dentry *idmap_dentry; - wait_queue_head_t idmap_wq; - struct idmap_msg idmap_im; - struct mutex idmap_lock; /* Serializes upcalls */ - struct mutex idmap_im_lock; /* Protects the hashtable */ - struct idmap_hashtable idmap_user_hash; - struct idmap_hashtable idmap_group_hash; +static const match_table_t nfs_idmap_tokens = { + { Opt_find_uid, "uid:%s" }, + { Opt_find_gid, "gid:%s" }, + { Opt_find_user, "user:%s" }, + { Opt_find_group, "group:%s" }, + { Opt_find_err, NULL } }; +static int nfs_idmap_legacy_upcall(struct key_construction *, const char *, void *); static ssize_t idmap_pipe_downcall(struct file *, const char __user *, size_t); static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); -static unsigned int fnvhash32(const void *, size_t); - static const struct rpc_pipe_ops idmap_upcall_ops = { .upcall = rpc_pipe_generic_upcall, .downcall = idmap_pipe_downcall, .destroy_msg = idmap_pipe_destroy_msg, }; +static struct key_type key_type_id_resolver_legacy = { + .name = "id_resolver", + .instantiate = user_instantiate, + .match = user_match, + .revoke = user_revoke, + .destroy = user_destroy, + .describe = user_describe, + .read = user_read, + .request_key = nfs_idmap_legacy_upcall, +}; + +static void __nfs_idmap_unregister(struct rpc_pipe *pipe) +{ + if (pipe->dentry) + rpc_unlink(pipe->dentry); +} + +static int __nfs_idmap_register(struct dentry *dir, + struct idmap *idmap, + struct rpc_pipe *pipe) +{ + struct dentry *dentry; + + dentry = rpc_mkpipe_dentry(dir, "idmap", idmap, pipe); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + pipe->dentry = dentry; + return 0; +} + +static void nfs_idmap_unregister(struct nfs_client *clp, + struct rpc_pipe *pipe) +{ + struct net *net = clp->net; + struct super_block *pipefs_sb; + + pipefs_sb = rpc_get_sb_net(net); + if (pipefs_sb) { + __nfs_idmap_unregister(pipe); + rpc_put_sb_net(net); + } +} + +static int nfs_idmap_register(struct nfs_client *clp, + struct idmap *idmap, + struct rpc_pipe *pipe) +{ + struct net *net = clp->net; + struct super_block *pipefs_sb; + int err = 0; + + pipefs_sb = rpc_get_sb_net(net); + if (pipefs_sb) { + if (clp->cl_rpcclient->cl_dentry) + err = __nfs_idmap_register(clp->cl_rpcclient->cl_dentry, + idmap, pipe); + rpc_put_sb_net(net); + } + return err; +} + int nfs_idmap_new(struct nfs_client *clp) { struct idmap *idmap; + struct rpc_pipe *pipe; int error; BUG_ON(clp->cl_idmap != NULL); @@ -444,19 +456,19 @@ nfs_idmap_new(struct nfs_client *clp) if (idmap == NULL) return -ENOMEM; - idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_path.dentry, - "idmap", idmap, &idmap_upcall_ops, 0); - if (IS_ERR(idmap->idmap_dentry)) { - error = PTR_ERR(idmap->idmap_dentry); + pipe = rpc_mkpipe_data(&idmap_upcall_ops, 0); + if (IS_ERR(pipe)) { + error = PTR_ERR(pipe); kfree(idmap); return error; } - - mutex_init(&idmap->idmap_lock); - mutex_init(&idmap->idmap_im_lock); - init_waitqueue_head(&idmap->idmap_wq); - idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER; - idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP; + error = nfs_idmap_register(clp, idmap, pipe); + if (error) { + rpc_destroy_pipe_data(pipe); + kfree(idmap); + return error; + } + idmap->idmap_pipe = pipe; clp->cl_idmap = idmap; return 0; @@ -469,211 +481,220 @@ nfs_idmap_delete(struct nfs_client *clp) if (!idmap) return; - rpc_unlink(idmap->idmap_dentry); + nfs_idmap_unregister(clp, idmap->idmap_pipe); + rpc_destroy_pipe_data(idmap->idmap_pipe); clp->cl_idmap = NULL; kfree(idmap); } -/* - * Helper routines for manipulating the hashtable - */ -static inline struct idmap_hashent * -idmap_name_hash(struct idmap_hashtable* h, const char *name, size_t len) -{ - return &h->h_entries[fnvhash32(name, len) % IDMAP_HASH_SZ]; -} - -static struct idmap_hashent * -idmap_lookup_name(struct idmap_hashtable *h, const char *name, size_t len) +static int __rpc_pipefs_event(struct nfs_client *clp, unsigned long event, + struct super_block *sb) { - struct idmap_hashent *he = idmap_name_hash(h, name, len); + int err = 0; - if (he->ih_namelen != len || memcmp(he->ih_name, name, len) != 0) - return NULL; - if (time_after(jiffies, he->ih_expires)) - return NULL; - return he; + switch (event) { + case RPC_PIPEFS_MOUNT: + BUG_ON(clp->cl_rpcclient->cl_dentry == NULL); + err = __nfs_idmap_register(clp->cl_rpcclient->cl_dentry, + clp->cl_idmap, + clp->cl_idmap->idmap_pipe); + break; + case RPC_PIPEFS_UMOUNT: + if (clp->cl_idmap->idmap_pipe) { + struct dentry *parent; + + parent = clp->cl_idmap->idmap_pipe->dentry->d_parent; + __nfs_idmap_unregister(clp->cl_idmap->idmap_pipe); + /* + * Note: This is a dirty hack. SUNRPC hook has been + * called already but simple_rmdir() call for the + * directory returned with error because of idmap pipe + * inside. Thus now we have to remove this directory + * here. + */ + if (rpc_rmdir(parent)) + printk(KERN_ERR "NFS: %s: failed to remove " + "clnt dir!\n", __func__); + } + break; + default: + printk(KERN_ERR "NFS: %s: unknown event: %ld\n", __func__, + event); + return -ENOTSUPP; + } + return err; +} + +static struct nfs_client *nfs_get_client_for_event(struct net *net, int event) +{ + struct nfs_net *nn = net_generic(net, nfs_net_id); + struct dentry *cl_dentry; + struct nfs_client *clp; + + spin_lock(&nn->nfs_client_lock); + list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { + if (clp->rpc_ops != &nfs_v4_clientops) + continue; + cl_dentry = clp->cl_idmap->idmap_pipe->dentry; + if (((event == RPC_PIPEFS_MOUNT) && cl_dentry) || + ((event == RPC_PIPEFS_UMOUNT) && !cl_dentry)) + continue; + atomic_inc(&clp->cl_count); + spin_unlock(&nn->nfs_client_lock); + return clp; + } + spin_unlock(&nn->nfs_client_lock); + return NULL; } -static inline struct idmap_hashent * -idmap_id_hash(struct idmap_hashtable* h, __u32 id) +static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, + void *ptr) { - return &h->h_entries[fnvhash32(&id, sizeof(id)) % IDMAP_HASH_SZ]; -} + struct super_block *sb = ptr; + struct nfs_client *clp; + int error = 0; -static struct idmap_hashent * -idmap_lookup_id(struct idmap_hashtable *h, __u32 id) -{ - struct idmap_hashent *he = idmap_id_hash(h, id); - if (he->ih_id != id || he->ih_namelen == 0) - return NULL; - if (time_after(jiffies, he->ih_expires)) - return NULL; - return he; + while ((clp = nfs_get_client_for_event(sb->s_fs_info, event))) { + error = __rpc_pipefs_event(clp, event, sb); + nfs_put_client(clp); + if (error) + break; + } + return error; } -/* - * Routines for allocating new entries in the hashtable. - * For now, we just have 1 entry per bucket, so it's all - * pretty trivial. - */ -static inline struct idmap_hashent * -idmap_alloc_name(struct idmap_hashtable *h, char *name, size_t len) -{ - return idmap_name_hash(h, name, len); -} +#define PIPEFS_NFS_PRIO 1 + +static struct notifier_block nfs_idmap_block = { + .notifier_call = rpc_pipefs_event, + .priority = SUNRPC_PIPEFS_NFS_PRIO, +}; -static inline struct idmap_hashent * -idmap_alloc_id(struct idmap_hashtable *h, __u32 id) +int nfs_idmap_init(void) { - return idmap_id_hash(h, id); + int ret; + ret = nfs_idmap_init_keyring(); + if (ret != 0) + goto out; + ret = rpc_pipefs_notifier_register(&nfs_idmap_block); + if (ret != 0) + nfs_idmap_quit_keyring(); +out: + return ret; } -static void -idmap_update_entry(struct idmap_hashent *he, const char *name, - size_t namelen, __u32 id) +void nfs_idmap_quit(void) { - he->ih_id = id; - memcpy(he->ih_name, name, namelen); - he->ih_name[namelen] = '\0'; - he->ih_namelen = namelen; - he->ih_expires = jiffies + nfs_idmap_cache_timeout; + rpc_pipefs_notifier_unregister(&nfs_idmap_block); + nfs_idmap_quit_keyring(); } -/* - * Name -> ID - */ -static int -nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h, - const char *name, size_t namelen, __u32 *id) +static int nfs_idmap_prepare_message(char *desc, struct idmap_msg *im, + struct rpc_pipe_msg *msg) { - struct rpc_pipe_msg msg; - struct idmap_msg *im; - struct idmap_hashent *he; - DECLARE_WAITQUEUE(wq, current); - int ret = -EIO; - - im = &idmap->idmap_im; - - /* - * String sanity checks - * Note that the userland daemon expects NUL terminated strings - */ - for (;;) { - if (namelen == 0) - return -EINVAL; - if (name[namelen-1] != '\0') - break; - namelen--; - } - if (namelen >= IDMAP_NAMESZ) - return -EINVAL; + substring_t substr; + int token, ret; - mutex_lock(&idmap->idmap_lock); - mutex_lock(&idmap->idmap_im_lock); - - he = idmap_lookup_name(h, name, namelen); - if (he != NULL) { - *id = he->ih_id; - ret = 0; - goto out; - } + memset(im, 0, sizeof(*im)); + memset(msg, 0, sizeof(*msg)); - memset(im, 0, sizeof(*im)); - memcpy(im->im_name, name, namelen); + im->im_type = IDMAP_TYPE_GROUP; + token = match_token(desc, nfs_idmap_tokens, &substr); - im->im_type = h->h_type; - im->im_conv = IDMAP_CONV_NAMETOID; + switch (token) { + case Opt_find_uid: + im->im_type = IDMAP_TYPE_USER; + case Opt_find_gid: + im->im_conv = IDMAP_CONV_NAMETOID; + ret = match_strlcpy(im->im_name, &substr, IDMAP_NAMESZ); + break; - memset(&msg, 0, sizeof(msg)); - msg.data = im; - msg.len = sizeof(*im); + case Opt_find_user: + im->im_type = IDMAP_TYPE_USER; + case Opt_find_group: + im->im_conv = IDMAP_CONV_IDTONAME; + ret = match_int(&substr, &im->im_id); + break; - add_wait_queue(&idmap->idmap_wq, &wq); - if (rpc_queue_upcall(idmap->idmap_dentry->d_inode, &msg) < 0) { - remove_wait_queue(&idmap->idmap_wq, &wq); + default: + ret = -EINVAL; goto out; } - set_current_state(TASK_UNINTERRUPTIBLE); - mutex_unlock(&idmap->idmap_im_lock); - schedule(); - __set_current_state(TASK_RUNNING); - remove_wait_queue(&idmap->idmap_wq, &wq); - mutex_lock(&idmap->idmap_im_lock); + msg->data = im; + msg->len = sizeof(struct idmap_msg); - if (im->im_status & IDMAP_STATUS_SUCCESS) { - *id = im->im_id; - ret = 0; - } - - out: - memset(im, 0, sizeof(*im)); - mutex_unlock(&idmap->idmap_im_lock); - mutex_unlock(&idmap->idmap_lock); +out: return ret; } -/* - * ID -> Name - */ -static int -nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h, - __u32 id, char *name) +static int nfs_idmap_legacy_upcall(struct key_construction *cons, + const char *op, + void *aux) { - struct rpc_pipe_msg msg; + struct rpc_pipe_msg *msg; struct idmap_msg *im; - struct idmap_hashent *he; - DECLARE_WAITQUEUE(wq, current); - int ret = -EIO; - unsigned int len; - - im = &idmap->idmap_im; + struct idmap *idmap = (struct idmap *)aux; + struct key *key = cons->key; + int ret; - mutex_lock(&idmap->idmap_lock); - mutex_lock(&idmap->idmap_im_lock); + /* msg and im are freed in idmap_pipe_destroy_msg */ + msg = kmalloc(sizeof(*msg), GFP_KERNEL); + if (IS_ERR(msg)) { + ret = PTR_ERR(msg); + goto out0; + } - he = idmap_lookup_id(h, id); - if (he) { - memcpy(name, he->ih_name, he->ih_namelen); - ret = he->ih_namelen; - goto out; + im = kmalloc(sizeof(*im), GFP_KERNEL); + if (IS_ERR(im)) { + ret = PTR_ERR(im); + goto out1; } - memset(im, 0, sizeof(*im)); - im->im_type = h->h_type; - im->im_conv = IDMAP_CONV_IDTONAME; - im->im_id = id; + ret = nfs_idmap_prepare_message(key->description, im, msg); + if (ret < 0) + goto out2; - memset(&msg, 0, sizeof(msg)); - msg.data = im; - msg.len = sizeof(*im); + idmap->idmap_key_cons = cons; - add_wait_queue(&idmap->idmap_wq, &wq); + ret = rpc_queue_upcall(idmap->idmap_pipe, msg); + if (ret < 0) + goto out2; - if (rpc_queue_upcall(idmap->idmap_dentry->d_inode, &msg) < 0) { - remove_wait_queue(&idmap->idmap_wq, &wq); - goto out; - } + return ret; + +out2: + kfree(im); +out1: + kfree(msg); +out0: + key_revoke(cons->key); + key_revoke(cons->authkey); + return ret; +} + +static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data) +{ + return key_instantiate_and_link(key, data, strlen(data) + 1, + id_resolver_cache->thread_keyring, + authkey); +} - set_current_state(TASK_UNINTERRUPTIBLE); - mutex_unlock(&idmap->idmap_im_lock); - schedule(); - __set_current_state(TASK_RUNNING); - remove_wait_queue(&idmap->idmap_wq, &wq); - mutex_lock(&idmap->idmap_im_lock); - - if (im->im_status & IDMAP_STATUS_SUCCESS) { - if ((len = strnlen(im->im_name, IDMAP_NAMESZ)) == 0) - goto out; - memcpy(name, im->im_name, len); - ret = len; +static int nfs_idmap_read_message(struct idmap_msg *im, struct key *key, struct key *authkey) +{ + char id_str[NFS_UINT_MAXLEN]; + int ret = -EINVAL; + + switch (im->im_conv) { + case IDMAP_CONV_NAMETOID: + sprintf(id_str, "%d", im->im_id); + ret = nfs_idmap_instantiate(key, authkey, id_str); + break; + case IDMAP_CONV_IDTONAME: + ret = nfs_idmap_instantiate(key, authkey, im->im_name); + break; } - out: - memset(im, 0, sizeof(*im)); - mutex_unlock(&idmap->idmap_im_lock); - mutex_unlock(&idmap->idmap_lock); return ret; } @@ -682,115 +703,51 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) { struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode); struct idmap *idmap = (struct idmap *)rpci->private; - struct idmap_msg im_in, *im = &idmap->idmap_im; - struct idmap_hashtable *h; - struct idmap_hashent *he = NULL; + struct key_construction *cons = idmap->idmap_key_cons; + struct idmap_msg im; size_t namelen_in; int ret; - if (mlen != sizeof(im_in)) - return -ENOSPC; - - if (copy_from_user(&im_in, src, mlen) != 0) - return -EFAULT; - - mutex_lock(&idmap->idmap_im_lock); - - ret = mlen; - im->im_status = im_in.im_status; - /* If we got an error, terminate now, and wake up pending upcalls */ - if (!(im_in.im_status & IDMAP_STATUS_SUCCESS)) { - wake_up(&idmap->idmap_wq); + if (mlen != sizeof(im)) { + ret = -ENOSPC; goto out; } - /* Sanity checking of strings */ - ret = -EINVAL; - namelen_in = strnlen(im_in.im_name, IDMAP_NAMESZ); - if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) + if (copy_from_user(&im, src, mlen) != 0) { + ret = -EFAULT; goto out; + } - switch (im_in.im_type) { - case IDMAP_TYPE_USER: - h = &idmap->idmap_user_hash; - break; - case IDMAP_TYPE_GROUP: - h = &idmap->idmap_group_hash; - break; - default: - goto out; + if (!(im.im_status & IDMAP_STATUS_SUCCESS)) { + ret = mlen; + complete_request_key(idmap->idmap_key_cons, -ENOKEY); + goto out_incomplete; } - switch (im_in.im_conv) { - case IDMAP_CONV_IDTONAME: - /* Did we match the current upcall? */ - if (im->im_conv == IDMAP_CONV_IDTONAME - && im->im_type == im_in.im_type - && im->im_id == im_in.im_id) { - /* Yes: copy string, including the terminating '\0' */ - memcpy(im->im_name, im_in.im_name, namelen_in); - im->im_name[namelen_in] = '\0'; - wake_up(&idmap->idmap_wq); - } - he = idmap_alloc_id(h, im_in.im_id); - break; - case IDMAP_CONV_NAMETOID: - /* Did we match the current upcall? */ - if (im->im_conv == IDMAP_CONV_NAMETOID - && im->im_type == im_in.im_type - && strnlen(im->im_name, IDMAP_NAMESZ) == namelen_in - && memcmp(im->im_name, im_in.im_name, namelen_in) == 0) { - im->im_id = im_in.im_id; - wake_up(&idmap->idmap_wq); - } - he = idmap_alloc_name(h, im_in.im_name, namelen_in); - break; - default: + namelen_in = strnlen(im.im_name, IDMAP_NAMESZ); + if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) { + ret = -EINVAL; goto out; } - /* If the entry is valid, also copy it to the cache */ - if (he != NULL) - idmap_update_entry(he, im_in.im_name, namelen_in, im_in.im_id); - ret = mlen; + ret = nfs_idmap_read_message(&im, cons->key, cons->authkey); + if (ret >= 0) { + key_set_timeout(cons->key, nfs_idmap_cache_timeout); + ret = mlen; + } + out: - mutex_unlock(&idmap->idmap_im_lock); + complete_request_key(idmap->idmap_key_cons, ret); +out_incomplete: return ret; } static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg) { - struct idmap_msg *im = msg->data; - struct idmap *idmap = container_of(im, struct idmap, idmap_im); - - if (msg->errno >= 0) - return; - mutex_lock(&idmap->idmap_im_lock); - im->im_status = IDMAP_STATUS_LOOKUPFAIL; - wake_up(&idmap->idmap_wq); - mutex_unlock(&idmap->idmap_im_lock); -} - -/* - * Fowler/Noll/Vo hash - * http://www.isthe.com/chongo/tech/comp/fnv/ - */ - -#define FNV_P_32 ((unsigned int)0x01000193) /* 16777619 */ -#define FNV_1_32 ((unsigned int)0x811c9dc5) /* 2166136261 */ - -static unsigned int fnvhash32(const void *buf, size_t buflen) -{ - const unsigned char *p, *end = (const unsigned char *)buf + buflen; - unsigned int hash = FNV_1_32; - - for (p = buf; p < end; p++) { - hash *= FNV_P_32; - hash ^= (unsigned int)*p; - } - - return hash; + /* Free memory allocated in nfs_idmap_legacy_upcall() */ + kfree(msg->data); + kfree(msg); } int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) @@ -799,16 +756,16 @@ int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_ if (nfs_map_string_to_numeric(name, namelen, uid)) return 0; - return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid); + return nfs_idmap_lookup_id(name, namelen, "uid", uid, idmap); } -int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) +int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid) { struct idmap *idmap = server->nfs_client->cl_idmap; - if (nfs_map_string_to_numeric(name, namelen, uid)) + if (nfs_map_string_to_numeric(name, namelen, gid)) return 0; - return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid); + return nfs_idmap_lookup_id(name, namelen, "gid", gid, idmap); } int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen) @@ -817,21 +774,19 @@ int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, s int ret = -EINVAL; if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) - ret = nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); + ret = nfs_idmap_lookup_name(uid, "user", buf, buflen, idmap); if (ret < 0) ret = nfs_map_numeric_to_string(uid, buf, buflen); return ret; } -int nfs_map_gid_to_group(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen) +int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen) { struct idmap *idmap = server->nfs_client->cl_idmap; int ret = -EINVAL; if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) - ret = nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf); + ret = nfs_idmap_lookup_name(gid, "group", buf, buflen, idmap); if (ret < 0) - ret = nfs_map_numeric_to_string(uid, buf, buflen); + ret = nfs_map_numeric_to_string(gid, buf, buflen); return ret; } - -#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */ diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f649fba..7bb4d13 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -39,6 +39,7 @@ #include <linux/slab.h> #include <linux/compat.h> #include <linux/freezer.h> +#include <linux/crc32.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -51,6 +52,7 @@ #include "fscache.h" #include "dns_resolve.h" #include "pnfs.h" +#include "netns.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -388,9 +390,10 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) unlock_new_inode(inode); } else nfs_refresh_inode(inode, fattr); - dprintk("NFS: nfs_fhget(%s/%Ld ct=%d)\n", + dprintk("NFS: nfs_fhget(%s/%Ld fh_crc=0x%08x ct=%d)\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode), + nfs_display_fhandle_hash(fh), atomic_read(&inode->i_count)); out: @@ -401,7 +404,7 @@ out_no_inode: goto out; } -#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE) +#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE|ATTR_OPEN) int nfs_setattr(struct dentry *dentry, struct iattr *attr) @@ -423,7 +426,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) /* Optimization: if the end result is no change, don't RPC */ attr->ia_valid &= NFS_VALID_ATTRS; - if ((attr->ia_valid & ~ATTR_FILE) == 0) + if ((attr->ia_valid & ~(ATTR_FILE|ATTR_OPEN)) == 0) return 0; /* Write all dirty data */ @@ -1044,6 +1047,67 @@ struct nfs_fh *nfs_alloc_fhandle(void) return fh; } +#ifdef NFS_DEBUG +/* + * _nfs_display_fhandle_hash - calculate the crc32 hash for the filehandle + * in the same way that wireshark does + * + * @fh: file handle + * + * For debugging only. + */ +u32 _nfs_display_fhandle_hash(const struct nfs_fh *fh) +{ + /* wireshark uses 32-bit AUTODIN crc and does a bitwise + * not on the result */ + return ~crc32(0xFFFFFFFF, &fh->data[0], fh->size); +} + +/* + * _nfs_display_fhandle - display an NFS file handle on the console + * + * @fh: file handle to display + * @caption: display caption + * + * For debugging only. + */ +void _nfs_display_fhandle(const struct nfs_fh *fh, const char *caption) +{ + unsigned short i; + + if (fh == NULL || fh->size == 0) { + printk(KERN_DEFAULT "%s at %p is empty\n", caption, fh); + return; + } + + printk(KERN_DEFAULT "%s at %p is %u bytes, crc: 0x%08x:\n", + caption, fh, fh->size, _nfs_display_fhandle_hash(fh)); + for (i = 0; i < fh->size; i += 16) { + __be32 *pos = (__be32 *)&fh->data[i]; + + switch ((fh->size - i - 1) >> 2) { + case 0: + printk(KERN_DEFAULT " %08x\n", + be32_to_cpup(pos)); + break; + case 1: + printk(KERN_DEFAULT " %08x %08x\n", + be32_to_cpup(pos), be32_to_cpup(pos + 1)); + break; + case 2: + printk(KERN_DEFAULT " %08x %08x %08x\n", + be32_to_cpup(pos), be32_to_cpup(pos + 1), + be32_to_cpup(pos + 2)); + break; + default: + printk(KERN_DEFAULT " %08x %08x %08x %08x\n", + be32_to_cpup(pos), be32_to_cpup(pos + 1), + be32_to_cpup(pos + 2), be32_to_cpup(pos + 3)); + } + } +} +#endif + /** * nfs_inode_attrs_need_update - check if the inode attributes need updating * @inode - pointer to inode @@ -1211,8 +1275,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) unsigned long now = jiffies; unsigned long save_cache_validity; - dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", + dfprintk(VFS, "NFS: %s(%s/%ld fh_crc=0x%08x ct=%d info=0x%x)\n", __func__, inode->i_sb->s_id, inode->i_ino, + nfs_display_fhandle_hash(NFS_FH(inode)), atomic_read(&inode->i_count), fattr->valid); if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) @@ -1406,7 +1471,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) /* * Big trouble! The inode has become a different object. */ - printk(KERN_DEBUG "%s: inode %ld mode changed, %07o to %07o\n", + printk(KERN_DEBUG "NFS: %s: inode %ld mode changed, %07o to %07o\n", __func__, inode->i_ino, inode->i_mode, fattr->mode); out_err: /* @@ -1495,7 +1560,7 @@ static void init_once(void *foo) INIT_LIST_HEAD(&nfsi->open_files); INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); - INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); + INIT_LIST_HEAD(&nfsi->commit_list); nfsi->npages = 0; nfsi->ncommit = 0; atomic_set(&nfsi->silly_count, 1); @@ -1552,6 +1617,28 @@ static void nfsiod_stop(void) destroy_workqueue(wq); } +int nfs_net_id; +EXPORT_SYMBOL_GPL(nfs_net_id); + +static int nfs_net_init(struct net *net) +{ + nfs_clients_init(net); + return nfs_dns_resolver_cache_init(net); +} + +static void nfs_net_exit(struct net *net) +{ + nfs_dns_resolver_cache_destroy(net); + nfs_cleanup_cb_ident_idr(net); +} + +static struct pernet_operations nfs_net_ops = { + .init = nfs_net_init, + .exit = nfs_net_exit, + .id = &nfs_net_id, + .size = sizeof(struct nfs_net), +}; + /* * Initialize NFS */ @@ -1561,10 +1648,14 @@ static int __init init_nfs_fs(void) err = nfs_idmap_init(); if (err < 0) - goto out9; + goto out10; err = nfs_dns_resolver_init(); if (err < 0) + goto out9; + + err = register_pernet_subsys(&nfs_net_ops); + if (err < 0) goto out8; err = nfs_fscache_register(); @@ -1600,14 +1691,14 @@ static int __init init_nfs_fs(void) goto out0; #ifdef CONFIG_PROC_FS - rpc_proc_register(&nfs_rpcstat); + rpc_proc_register(&init_net, &nfs_rpcstat); #endif if ((err = register_nfs_fs()) != 0) goto out; return 0; out: #ifdef CONFIG_PROC_FS - rpc_proc_unregister("nfs"); + rpc_proc_unregister(&init_net, "nfs"); #endif nfs_destroy_directcache(); out0: @@ -1625,10 +1716,12 @@ out5: out6: nfs_fscache_unregister(); out7: - nfs_dns_resolver_destroy(); + unregister_pernet_subsys(&nfs_net_ops); out8: - nfs_idmap_quit(); + nfs_dns_resolver_destroy(); out9: + nfs_idmap_quit(); +out10: return err; } @@ -1640,12 +1733,12 @@ static void __exit exit_nfs_fs(void) nfs_destroy_inodecache(); nfs_destroy_nfspagecache(); nfs_fscache_unregister(); + unregister_pernet_subsys(&nfs_net_ops); nfs_dns_resolver_destroy(); nfs_idmap_quit(); #ifdef CONFIG_PROC_FS - rpc_proc_unregister("nfs"); + rpc_proc_unregister(&init_net, "nfs"); #endif - nfs_cleanup_cb_ident_idr(); unregister_nfs_fs(); nfs_fs_proc_exit(); nfsiod_stop(); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 8102db9..2476dc6 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -123,6 +123,7 @@ struct nfs_parsed_mount_data { } nfs_server; struct security_mnt_opts lsm_opts; + struct net *net; }; /* mount_clnt.c */ @@ -137,20 +138,22 @@ struct nfs_mount_request { int noresvport; unsigned int *auth_flav_len; rpc_authflavor_t *auth_flavs; + struct net *net; }; extern int nfs_mount(struct nfs_mount_request *info); extern void nfs_umount(const struct nfs_mount_request *info); /* client.c */ -extern struct rpc_program nfs_program; +extern const struct rpc_program nfs_program; +extern void nfs_clients_init(struct net *net); -extern void nfs_cleanup_cb_ident_idr(void); +extern void nfs_cleanup_cb_ident_idr(struct net *); extern void nfs_put_client(struct nfs_client *); -extern struct nfs_client *nfs4_find_client_no_ident(const struct sockaddr *); -extern struct nfs_client *nfs4_find_client_ident(int); +extern struct nfs_client *nfs4_find_client_ident(struct net *, int); extern struct nfs_client * -nfs4_find_client_sessionid(const struct sockaddr *, struct nfs4_sessionid *); +nfs4_find_client_sessionid(struct net *, const struct sockaddr *, + struct nfs4_sessionid *); extern struct nfs_server *nfs_create_server( const struct nfs_parsed_mount_data *, struct nfs_fh *); @@ -329,6 +332,8 @@ void nfs_retry_commit(struct list_head *page_list, void nfs_commit_clear_lock(struct nfs_inode *nfsi); void nfs_commitdata_release(void *data); void nfs_commit_release_pages(struct nfs_write_data *data); +void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head); +void nfs_request_remove_commit_list(struct nfs_page *req); #ifdef CONFIG_MIGRATION extern int nfs_migrate_page(struct address_space *, diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index d4c2d6b..8e65c7f 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -16,7 +16,7 @@ #include <linux/nfs_fs.h> #include "internal.h" -#ifdef RPC_DEBUG +#ifdef NFS_DEBUG # define NFSDBG_FACILITY NFSDBG_MOUNT #endif @@ -67,7 +67,7 @@ enum { MOUNTPROC3_EXPORT = 5, }; -static struct rpc_program mnt_program; +static const struct rpc_program mnt_program; /* * Defined by OpenGroup XNFS Version 3W, chapter 8 @@ -153,7 +153,7 @@ int nfs_mount(struct nfs_mount_request *info) .rpc_resp = &result, }; struct rpc_create_args args = { - .net = &init_net, + .net = info->net, .protocol = info->protocol, .address = info->sap, .addrsize = info->salen, @@ -225,7 +225,7 @@ void nfs_umount(const struct nfs_mount_request *info) .to_retries = 2, }; struct rpc_create_args args = { - .net = &init_net, + .net = info->net, .protocol = IPPROTO_UDP, .address = info->sap, .addrsize = info->salen, @@ -488,19 +488,19 @@ static struct rpc_procinfo mnt3_procedures[] = { }; -static struct rpc_version mnt_version1 = { +static const struct rpc_version mnt_version1 = { .number = 1, .nrprocs = ARRAY_SIZE(mnt_procedures), .procs = mnt_procedures, }; -static struct rpc_version mnt_version3 = { +static const struct rpc_version mnt_version3 = { .number = 3, .nrprocs = ARRAY_SIZE(mnt3_procedures), .procs = mnt3_procedures, }; -static struct rpc_version *mnt_version[] = { +static const struct rpc_version *mnt_version[] = { NULL, &mnt_version1, NULL, @@ -509,7 +509,7 @@ static struct rpc_version *mnt_version[] = { static struct rpc_stat mnt_stats; -static struct rpc_program mnt_program = { +static const struct rpc_program mnt_program = { .name = "mount", .number = NFS_MNT_PROGRAM, .nrvers = ARRAY_SIZE(mnt_version), diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 8102391..1807866 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -276,7 +276,10 @@ out: nfs_free_fattr(fattr); nfs_free_fhandle(fh); out_nofree: - dprintk("<-- nfs_follow_mountpoint() = %p\n", mnt); + if (IS_ERR(mnt)) + dprintk("<-- %s(): error %ld\n", __func__, PTR_ERR(mnt)); + else + dprintk("<-- %s() = %p\n", __func__, mnt); return mnt; } diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h new file mode 100644 index 0000000..aa14ec3 --- /dev/null +++ b/fs/nfs/netns.h @@ -0,0 +1,27 @@ +#ifndef __NFS_NETNS_H__ +#define __NFS_NETNS_H__ + +#include <net/net_namespace.h> +#include <net/netns/generic.h> + +struct bl_dev_msg { + int32_t status; + uint32_t major, minor; +}; + +struct nfs_net { + struct cache_detail *nfs_dns_resolve; + struct rpc_pipe *bl_device_pipe; + struct bl_dev_msg bl_mount_reply; + wait_queue_head_t bl_wq; + struct list_head nfs_client_list; + struct list_head nfs_volume_list; +#ifdef CONFIG_NFS_V4 + struct idr cb_ident_idr; /* Protected by nfs_client_lock */ +#endif + spinlock_t nfs_client_lock; +}; + +extern int nfs_net_id; + +#endif diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 792cb13..1f56000 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -1150,7 +1150,7 @@ struct rpc_procinfo nfs_procedures[] = { PROC(STATFS, fhandle, statfsres, 0), }; -struct rpc_version nfs_version2 = { +const struct rpc_version nfs_version2 = { .number = 2, .nrprocs = ARRAY_SIZE(nfs_procedures), .procs = nfs_procedures diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 7ef2397..e4498dc 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -192,7 +192,7 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) .pages = pages, }; struct nfs3_getaclres res = { - 0 + NULL, }; struct rpc_message msg = { .rpc_argp = &args, diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 91943953..5242eae 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -428,6 +428,11 @@ nfs3_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) msg->rpc_proc = &nfs3_procedures[NFS3PROC_REMOVE]; } +static void nfs3_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data) +{ + rpc_call_start(task); +} + static int nfs3_proc_unlink_done(struct rpc_task *task, struct inode *dir) { @@ -445,6 +450,11 @@ nfs3_proc_rename_setup(struct rpc_message *msg, struct inode *dir) msg->rpc_proc = &nfs3_procedures[NFS3PROC_RENAME]; } +static void nfs3_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data) +{ + rpc_call_start(task); +} + static int nfs3_proc_rename_done(struct rpc_task *task, struct inode *old_dir, struct inode *new_dir) @@ -814,6 +824,11 @@ static void nfs3_proc_read_setup(struct nfs_read_data *data, struct rpc_message msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ]; } +static void nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) +{ + rpc_call_start(task); +} + static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) { if (nfs3_async_handle_jukebox(task, data->inode)) @@ -828,6 +843,11 @@ static void nfs3_proc_write_setup(struct nfs_write_data *data, struct rpc_messag msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; } +static void nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) +{ + rpc_call_start(task); +} + static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data) { if (nfs3_async_handle_jukebox(task, data->inode)) @@ -864,9 +884,11 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .create = nfs3_proc_create, .remove = nfs3_proc_remove, .unlink_setup = nfs3_proc_unlink_setup, + .unlink_rpc_prepare = nfs3_proc_unlink_rpc_prepare, .unlink_done = nfs3_proc_unlink_done, .rename = nfs3_proc_rename, .rename_setup = nfs3_proc_rename_setup, + .rename_rpc_prepare = nfs3_proc_rename_rpc_prepare, .rename_done = nfs3_proc_rename_done, .link = nfs3_proc_link, .symlink = nfs3_proc_symlink, @@ -879,8 +901,10 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .pathconf = nfs3_proc_pathconf, .decode_dirent = nfs3_decode_dirent, .read_setup = nfs3_proc_read_setup, + .read_rpc_prepare = nfs3_proc_read_rpc_prepare, .read_done = nfs3_read_done, .write_setup = nfs3_proc_write_setup, + .write_rpc_prepare = nfs3_proc_write_rpc_prepare, .write_done = nfs3_write_done, .commit_setup = nfs3_proc_commit_setup, .commit_done = nfs3_commit_done, diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 183c6b1..a77cc9a 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -2461,7 +2461,7 @@ struct rpc_procinfo nfs3_procedures[] = { PROC(COMMIT, commit, commit, 5), }; -struct rpc_version nfs_version3 = { +const struct rpc_version nfs_version3 = { .number = 3, .nrprocs = ARRAY_SIZE(nfs3_procedures), .procs = nfs3_procedures @@ -2489,7 +2489,7 @@ static struct rpc_procinfo nfs3_acl_procedures[] = { }, }; -struct rpc_version nfsacl_version3 = { +const struct rpc_version nfsacl_version3 = { .number = 3, .nrprocs = sizeof(nfs3_acl_procedures)/ sizeof(nfs3_acl_procedures[0]), diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 4d7d0ae..97ecc86 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -20,7 +20,6 @@ enum nfs4_client_state { NFS4CLNT_RECLAIM_REBOOT, NFS4CLNT_RECLAIM_NOGRACE, NFS4CLNT_DELEGRETURN, - NFS4CLNT_LAYOUTRECALL, NFS4CLNT_SESSION_RESET, NFS4CLNT_RECALL_SLOT, NFS4CLNT_LEASE_CONFIRM, @@ -44,7 +43,7 @@ struct nfs4_minor_version_ops { struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, int cache_reply); - int (*validate_stateid)(struct nfs_delegation *, + bool (*match_stateid)(const nfs4_stateid *, const nfs4_stateid *); int (*find_root_sec)(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); @@ -53,26 +52,25 @@ struct nfs4_minor_version_ops { const struct nfs4_state_maintenance_ops *state_renewal_ops; }; -/* - * struct rpc_sequence ensures that RPC calls are sent in the exact - * order that they appear on the list. - */ -struct rpc_sequence { - struct rpc_wait_queue wait; /* RPC call delay queue */ - spinlock_t lock; /* Protects the list */ - struct list_head list; /* Defines sequence of RPC calls */ +struct nfs_unique_id { + struct rb_node rb_node; + __u64 id; }; #define NFS_SEQID_CONFIRMED 1 struct nfs_seqid_counter { - struct rpc_sequence *sequence; + int owner_id; int flags; u32 counter; + spinlock_t lock; /* Protects the list */ + struct list_head list; /* Defines sequence of RPC calls */ + struct rpc_wait_queue wait; /* RPC call delay queue */ }; struct nfs_seqid { struct nfs_seqid_counter *sequence; struct list_head list; + struct rpc_task *task; }; static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status) @@ -81,18 +79,12 @@ static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status seqid->flags |= NFS_SEQID_CONFIRMED; } -struct nfs_unique_id { - struct rb_node rb_node; - __u64 id; -}; - /* * NFS4 state_owners and lock_owners are simply labels for ordered * sequences of RPC calls. Their sole purpose is to provide once-only * semantics by allowing the server to identify replayed requests. */ struct nfs4_state_owner { - struct nfs_unique_id so_owner_id; struct nfs_server *so_server; struct list_head so_lru; unsigned long so_expires; @@ -105,7 +97,6 @@ struct nfs4_state_owner { unsigned long so_flags; struct list_head so_states; struct nfs_seqid_counter so_seqid; - struct rpc_sequence so_sequence; }; enum { @@ -146,8 +137,6 @@ struct nfs4_lock_state { #define NFS_LOCK_INITIALIZED 1 int ls_flags; struct nfs_seqid_counter ls_seqid; - struct rpc_sequence ls_sequence; - struct nfs_unique_id ls_id; nfs4_stateid ls_stateid; atomic_t ls_count; struct nfs4_lock_owner ls_owner; @@ -193,6 +182,7 @@ struct nfs4_exception { long timeout; int retry; struct nfs4_state *state; + struct inode *inode; }; struct nfs4_state_recovery_ops { @@ -224,7 +214,7 @@ extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, boo extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, struct nfs4_fs_locations *fs_locations, struct page *page); -extern void nfs4_release_lockowner(const struct nfs4_lock_state *); +extern int nfs4_release_lockowner(struct nfs4_lock_state *); extern const struct xattr_handler *nfs4_xattr_handlers[]; #if defined(CONFIG_NFS_V4_1) @@ -233,12 +223,13 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser return server->nfs_client->cl_session; } +extern bool nfs4_set_task_privileged(struct rpc_task *task, void *dummy); extern int nfs4_setup_sequence(const struct nfs_server *server, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, - int cache_reply, struct rpc_task *task); + struct rpc_task *task); extern int nfs41_setup_sequence(struct nfs4_session *session, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, - int cache_reply, struct rpc_task *task); + struct rpc_task *task); extern void nfs4_destroy_session(struct nfs4_session *session); extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); extern int nfs4_proc_create_session(struct nfs_client *); @@ -269,7 +260,7 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser static inline int nfs4_setup_sequence(const struct nfs_server *server, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, - int cache_reply, struct rpc_task *task) + struct rpc_task *task) { return 0; } @@ -319,7 +310,7 @@ static inline void nfs4_schedule_session_recovery(struct nfs4_session *session) } #endif /* CONFIG_NFS_V4_1 */ -extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); +extern struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *, gfp_t); extern void nfs4_put_state_owner(struct nfs4_state_owner *); extern void nfs4_purge_state_owners(struct nfs_server *); extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); @@ -327,6 +318,8 @@ extern void nfs4_put_open_state(struct nfs4_state *); extern void nfs4_close_state(struct nfs4_state *, fmode_t); extern void nfs4_close_sync(struct nfs4_state *, fmode_t); extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); +extern void nfs_inode_find_state_and_recover(struct inode *inode, + const nfs4_stateid *stateid); extern void nfs4_schedule_lease_recovery(struct nfs_client *); extern void nfs4_schedule_state_manager(struct nfs_client *); extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp); @@ -337,7 +330,8 @@ extern void nfs41_handle_server_scope(struct nfs_client *, struct server_scope **); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); -extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t); +extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *, + fmode_t, fl_owner_t, pid_t); extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask); extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); @@ -346,6 +340,8 @@ extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid); extern void nfs_release_seqid(struct nfs_seqid *seqid); extern void nfs_free_seqid(struct nfs_seqid *seqid); +extern void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp); + extern const nfs4_stateid zero_stateid; /* nfs4xdr.c */ @@ -357,6 +353,16 @@ struct nfs4_mount_data; extern struct svc_version nfs4_callback_version1; extern struct svc_version nfs4_callback_version4; +static inline void nfs4_stateid_copy(nfs4_stateid *dst, const nfs4_stateid *src) +{ + memcpy(dst, src, sizeof(*dst)); +} + +static inline bool nfs4_stateid_match(const nfs4_stateid *dst, const nfs4_stateid *src) +{ + return memcmp(dst, src, sizeof(*dst)) == 0; +} + #else #define nfs4_close_state(a, b) do { } while (0) diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 71ec086..634c0bc 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -33,7 +33,10 @@ #include <linux/nfs_page.h> #include <linux/module.h> +#include <linux/sunrpc/metrics.h> + #include "internal.h" +#include "delegation.h" #include "nfs4filelayout.h" #define NFSDBG_FACILITY NFSDBG_PNFS_LD @@ -84,12 +87,27 @@ static int filelayout_async_handle_error(struct rpc_task *task, struct nfs_client *clp, int *reset) { + struct nfs_server *mds_server = NFS_SERVER(state->inode); + struct nfs_client *mds_client = mds_server->nfs_client; + if (task->tk_status >= 0) return 0; - *reset = 0; switch (task->tk_status) { + /* MDS state errors */ + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_BAD_STATEID: + nfs_remove_bad_delegation(state->inode); + case -NFS4ERR_OPENMODE: + nfs4_schedule_stateid_recovery(mds_server, state); + goto wait_on_recovery; + case -NFS4ERR_EXPIRED: + nfs4_schedule_stateid_recovery(mds_server, state); + nfs4_schedule_lease_recovery(mds_client); + goto wait_on_recovery; + /* DS session errors */ case -NFS4ERR_BADSESSION: case -NFS4ERR_BADSLOT: case -NFS4ERR_BAD_HIGH_SLOT: @@ -115,8 +133,14 @@ static int filelayout_async_handle_error(struct rpc_task *task, *reset = 1; break; } +out: task->tk_status = 0; return -EAGAIN; +wait_on_recovery: + rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL); + if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0) + rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task); + goto out; } /* NFS_PROTO call done callback routines */ @@ -173,7 +197,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data) if (nfs41_setup_sequence(rdata->ds_clp->cl_session, &rdata->args.seq_args, &rdata->res.seq_res, - 0, task)) + task)) return; rpc_call_start(task); @@ -189,10 +213,18 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data) rdata->mds_ops->rpc_call_done(task, data); } +static void filelayout_read_count_stats(struct rpc_task *task, void *data) +{ + struct nfs_read_data *rdata = (struct nfs_read_data *)data; + + rpc_count_iostats(task, NFS_SERVER(rdata->inode)->client->cl_metrics); +} + static void filelayout_read_release(void *data) { struct nfs_read_data *rdata = (struct nfs_read_data *)data; + put_lseg(rdata->lseg); rdata->mds_ops->rpc_release(data); } @@ -254,7 +286,7 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data) if (nfs41_setup_sequence(wdata->ds_clp->cl_session, &wdata->args.seq_args, &wdata->res.seq_res, - 0, task)) + task)) return; rpc_call_start(task); @@ -268,10 +300,18 @@ static void filelayout_write_call_done(struct rpc_task *task, void *data) wdata->mds_ops->rpc_call_done(task, data); } +static void filelayout_write_count_stats(struct rpc_task *task, void *data) +{ + struct nfs_write_data *wdata = (struct nfs_write_data *)data; + + rpc_count_iostats(task, NFS_SERVER(wdata->inode)->client->cl_metrics); +} + static void filelayout_write_release(void *data) { struct nfs_write_data *wdata = (struct nfs_write_data *)data; + put_lseg(wdata->lseg); wdata->mds_ops->rpc_release(data); } @@ -282,24 +322,28 @@ static void filelayout_commit_release(void *data) nfs_commit_release_pages(wdata); if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding)) nfs_commit_clear_lock(NFS_I(wdata->inode)); + put_lseg(wdata->lseg); nfs_commitdata_release(wdata); } -struct rpc_call_ops filelayout_read_call_ops = { +static const struct rpc_call_ops filelayout_read_call_ops = { .rpc_call_prepare = filelayout_read_prepare, .rpc_call_done = filelayout_read_call_done, + .rpc_count_stats = filelayout_read_count_stats, .rpc_release = filelayout_read_release, }; -struct rpc_call_ops filelayout_write_call_ops = { +static const struct rpc_call_ops filelayout_write_call_ops = { .rpc_call_prepare = filelayout_write_prepare, .rpc_call_done = filelayout_write_call_done, + .rpc_count_stats = filelayout_write_count_stats, .rpc_release = filelayout_write_release, }; -struct rpc_call_ops filelayout_commit_call_ops = { +static const struct rpc_call_ops filelayout_commit_call_ops = { .rpc_call_prepare = filelayout_write_prepare, .rpc_call_done = filelayout_write_call_done, + .rpc_count_stats = filelayout_write_count_stats, .rpc_release = filelayout_commit_release, }; @@ -367,7 +411,8 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) idx = nfs4_fl_calc_ds_index(lseg, j); ds = nfs4_fl_prepare_ds(lseg, idx); if (!ds) { - printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__); + printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n", + __func__); set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); return PNFS_NOT_ATTEMPTED; @@ -575,7 +620,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, goto out_err_free; fl->fh_array[i]->size = be32_to_cpup(p++); if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) { - printk(KERN_ERR "Too big fh %d received %d\n", + printk(KERN_ERR "NFS: Too big fh %d received %d\n", i, fl->fh_array[i]->size); goto out_err_free; } @@ -640,14 +685,16 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, int size = (fl->stripe_type == STRIPE_SPARSE) ? fl->dsaddr->ds_num : fl->dsaddr->stripe_count; - fl->commit_buckets = kcalloc(size, sizeof(struct list_head), gfp_flags); + fl->commit_buckets = kcalloc(size, sizeof(struct nfs4_fl_commit_bucket), gfp_flags); if (!fl->commit_buckets) { filelayout_free_lseg(&fl->generic_hdr); return NULL; } fl->number_of_buckets = size; - for (i = 0; i < size; i++) - INIT_LIST_HEAD(&fl->commit_buckets[i]); + for (i = 0; i < size; i++) { + INIT_LIST_HEAD(&fl->commit_buckets[i].written); + INIT_LIST_HEAD(&fl->commit_buckets[i].committing); + } } return &fl->generic_hdr; } @@ -679,7 +726,7 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, return (p_stripe == r_stripe); } -void +static void filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { @@ -696,7 +743,7 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, nfs_pageio_reset_read_mds(pgio); } -void +static void filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { @@ -725,11 +772,6 @@ static const struct nfs_pageio_ops filelayout_pg_write_ops = { .pg_doio = pnfs_generic_pg_writepages, }; -static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg) -{ - return !FILELAYOUT_LSEG(lseg)->commit_through_mds; -} - static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) { if (fl->stripe_type == STRIPE_SPARSE) @@ -738,13 +780,49 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) return j; } -struct list_head *filelayout_choose_commit_list(struct nfs_page *req) +/* The generic layer is about to remove the req from the commit list. + * If this will make the bucket empty, it will need to put the lseg reference. + */ +static void +filelayout_clear_request_commit(struct nfs_page *req) +{ + struct pnfs_layout_segment *freeme = NULL; + struct inode *inode = req->wb_context->dentry->d_inode; + + spin_lock(&inode->i_lock); + if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) + goto out; + if (list_is_singular(&req->wb_list)) { + struct inode *inode = req->wb_context->dentry->d_inode; + struct pnfs_layout_segment *lseg; + + /* From here we can find the bucket, but for the moment, + * since there is only one relevant lseg... + */ + list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) { + if (lseg->pls_range.iomode == IOMODE_RW) { + freeme = lseg; + break; + } + } + } +out: + nfs_request_remove_commit_list(req); + spin_unlock(&inode->i_lock); + put_lseg(freeme); +} + +static struct list_head * +filelayout_choose_commit_list(struct nfs_page *req, + struct pnfs_layout_segment *lseg) { - struct pnfs_layout_segment *lseg = req->wb_commit_lseg; struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); u32 i, j; struct list_head *list; + if (fl->commit_through_mds) + return &NFS_I(req->wb_context->dentry->d_inode)->commit_list; + /* Note that we are calling nfs4_fl_calc_j_index on each page * that ends up being committed to a data server. An attractive * alternative is to add a field to nfs_write_data and nfs_page @@ -754,14 +832,30 @@ struct list_head *filelayout_choose_commit_list(struct nfs_page *req) j = nfs4_fl_calc_j_index(lseg, (loff_t)req->wb_index << PAGE_CACHE_SHIFT); i = select_bucket_index(fl, j); - list = &fl->commit_buckets[i]; + list = &fl->commit_buckets[i].written; if (list_empty(list)) { - /* Non-empty buckets hold a reference on the lseg */ + /* Non-empty buckets hold a reference on the lseg. That ref + * is normally transferred to the COMMIT call and released + * there. It could also be released if the last req is pulled + * off due to a rewrite, in which case it will be done in + * filelayout_remove_commit_req + */ get_lseg(lseg); } + set_bit(PG_COMMIT_TO_DS, &req->wb_flags); return list; } +static void +filelayout_mark_request_commit(struct nfs_page *req, + struct pnfs_layout_segment *lseg) +{ + struct list_head *list; + + list = filelayout_choose_commit_list(req, lseg); + nfs_request_add_commit_list(req, list); +} + static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) { struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); @@ -797,11 +891,12 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how) idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); ds = nfs4_fl_prepare_ds(lseg, idx); if (!ds) { - printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__); + printk(KERN_ERR "NFS: %s: prepare_ds failed, use MDS\n", + __func__); set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); prepare_to_resend_writes(data); - data->mds_ops->rpc_release(data); + filelayout_commit_release(data); return -EAGAIN; } dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how); @@ -817,24 +912,87 @@ static int filelayout_initiate_commit(struct nfs_write_data *data, int how) /* * This is only useful while we are using whole file layouts. */ -static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode) +static struct pnfs_layout_segment * +find_only_write_lseg_locked(struct inode *inode) { - struct pnfs_layout_segment *lseg, *rv = NULL; + struct pnfs_layout_segment *lseg; - spin_lock(&inode->i_lock); list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) if (lseg->pls_range.iomode == IOMODE_RW) - rv = get_lseg(lseg); + return lseg; + return NULL; +} + +static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode) +{ + struct pnfs_layout_segment *rv; + + spin_lock(&inode->i_lock); + rv = find_only_write_lseg_locked(inode); + if (rv) + get_lseg(rv); spin_unlock(&inode->i_lock); return rv; } -static int alloc_ds_commits(struct inode *inode, struct list_head *list) +static int +filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max, + spinlock_t *lock) +{ + struct list_head *src = &bucket->written; + struct list_head *dst = &bucket->committing; + struct nfs_page *req, *tmp; + int ret = 0; + + list_for_each_entry_safe(req, tmp, src, wb_list) { + if (!nfs_lock_request(req)) + continue; + if (cond_resched_lock(lock)) + list_safe_reset_next(req, tmp, wb_list); + nfs_request_remove_commit_list(req); + clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); + nfs_list_add_request(req, dst); + ret++; + if (ret == max) + break; + } + return ret; +} + +/* Move reqs from written to committing lists, returning count of number moved. + * Note called with i_lock held. + */ +static int filelayout_scan_commit_lists(struct inode *inode, int max, + spinlock_t *lock) +{ + struct pnfs_layout_segment *lseg; + struct nfs4_filelayout_segment *fl; + int i, rv = 0, cnt; + + lseg = find_only_write_lseg_locked(inode); + if (!lseg) + goto out_done; + fl = FILELAYOUT_LSEG(lseg); + if (fl->commit_through_mds) + goto out_done; + for (i = 0; i < fl->number_of_buckets && max != 0; i++) { + cnt = filelayout_scan_ds_commit_list(&fl->commit_buckets[i], + max, lock); + max -= cnt; + rv += cnt; + } +out_done: + return rv; +} + +static unsigned int +alloc_ds_commits(struct inode *inode, struct list_head *list) { struct pnfs_layout_segment *lseg; struct nfs4_filelayout_segment *fl; struct nfs_write_data *data; int i, j; + unsigned int nreq = 0; /* Won't need this when non-whole file layout segments are supported * instead we will use a pnfs_layout_hdr structure */ @@ -843,28 +1001,27 @@ static int alloc_ds_commits(struct inode *inode, struct list_head *list) return 0; fl = FILELAYOUT_LSEG(lseg); for (i = 0; i < fl->number_of_buckets; i++) { - if (list_empty(&fl->commit_buckets[i])) + if (list_empty(&fl->commit_buckets[i].committing)) continue; data = nfs_commitdata_alloc(); if (!data) - goto out_bad; + break; data->ds_commit_index = i; data->lseg = lseg; list_add(&data->pages, list); + nreq++; } - put_lseg(lseg); - return 0; -out_bad: + /* Clean up on error */ for (j = i; j < fl->number_of_buckets; j++) { - if (list_empty(&fl->commit_buckets[i])) + if (list_empty(&fl->commit_buckets[i].committing)) continue; - nfs_retry_commit(&fl->commit_buckets[i], lseg); + nfs_retry_commit(&fl->commit_buckets[i].committing, lseg); put_lseg(lseg); /* associated with emptying bucket */ } put_lseg(lseg); /* Caller will clean up entries put on list */ - return -ENOMEM; + return nreq; } /* This follows nfs_commit_list pretty closely */ @@ -874,40 +1031,40 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, { struct nfs_write_data *data, *tmp; LIST_HEAD(list); + unsigned int nreq = 0; if (!list_empty(mds_pages)) { data = nfs_commitdata_alloc(); - if (!data) - goto out_bad; - data->lseg = NULL; - list_add(&data->pages, &list); + if (data != NULL) { + data->lseg = NULL; + list_add(&data->pages, &list); + nreq++; + } else + nfs_retry_commit(mds_pages, NULL); } - if (alloc_ds_commits(inode, &list)) - goto out_bad; + nreq += alloc_ds_commits(inode, &list); + + if (nreq == 0) { + nfs_commit_clear_lock(NFS_I(inode)); + goto out; + } + + atomic_add(nreq, &NFS_I(inode)->commits_outstanding); list_for_each_entry_safe(data, tmp, &list, pages) { list_del_init(&data->pages); - atomic_inc(&NFS_I(inode)->commits_outstanding); if (!data->lseg) { nfs_init_commit(data, mds_pages, NULL); nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how); } else { - nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index], data->lseg); + nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index].committing, data->lseg); filelayout_initiate_commit(data, how); } } - return 0; - out_bad: - list_for_each_entry_safe(data, tmp, &list, pages) { - nfs_retry_commit(&data->pages, data->lseg); - list_del_init(&data->pages); - nfs_commit_free(data); - } - nfs_retry_commit(mds_pages, NULL); - nfs_commit_clear_lock(NFS_I(inode)); - return -ENOMEM; +out: + return PNFS_ATTEMPTED; } static void @@ -924,8 +1081,9 @@ static struct pnfs_layoutdriver_type filelayout_type = { .free_lseg = filelayout_free_lseg, .pg_read_ops = &filelayout_pg_read_ops, .pg_write_ops = &filelayout_pg_write_ops, - .mark_pnfs_commit = filelayout_mark_pnfs_commit, - .choose_commit_list = filelayout_choose_commit_list, + .mark_request_commit = filelayout_mark_request_commit, + .clear_request_commit = filelayout_clear_request_commit, + .scan_commit_lists = filelayout_scan_commit_lists, .commit_pagelist = filelayout_commit_pagelist, .read_pagelist = filelayout_read_pagelist, .write_pagelist = filelayout_write_pagelist, diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 2e42284..21190bb 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -74,6 +74,11 @@ struct nfs4_file_layout_dsaddr { struct nfs4_pnfs_ds *ds_list[1]; }; +struct nfs4_fl_commit_bucket { + struct list_head written; + struct list_head committing; +}; + struct nfs4_filelayout_segment { struct pnfs_layout_segment generic_hdr; u32 stripe_type; @@ -84,7 +89,7 @@ struct nfs4_filelayout_segment { struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ unsigned int num_fh; struct nfs_fh **fh_array; - struct list_head *commit_buckets; /* Sort commits to ds */ + struct nfs4_fl_commit_bucket *commit_buckets; /* Sort commits to ds */ int number_of_buckets; }; diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 8ae9190..a866bbd 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -45,7 +45,7 @@ * - incremented when a device id maps a data server already in the cache. * - decremented when deviceid is removed from the cache. */ -DEFINE_SPINLOCK(nfs4_ds_cache_lock); +static DEFINE_SPINLOCK(nfs4_ds_cache_lock); static LIST_HEAD(nfs4_data_server_cache); /* Debug routines */ @@ -108,58 +108,40 @@ same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2) return false; } -/* - * Lookup DS by addresses. The first matching address returns true. - * nfs4_ds_cache_lock is held - */ -static struct nfs4_pnfs_ds * -_data_server_lookup_locked(struct list_head *dsaddrs) +static bool +_same_data_server_addrs_locked(const struct list_head *dsaddrs1, + const struct list_head *dsaddrs2) { - struct nfs4_pnfs_ds *ds; struct nfs4_pnfs_ds_addr *da1, *da2; - list_for_each_entry(da1, dsaddrs, da_node) { - list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) { - list_for_each_entry(da2, &ds->ds_addrs, da_node) { - if (same_sockaddr( - (struct sockaddr *)&da1->da_addr, - (struct sockaddr *)&da2->da_addr)) - return ds; - } - } + /* step through both lists, comparing as we go */ + for (da1 = list_first_entry(dsaddrs1, typeof(*da1), da_node), + da2 = list_first_entry(dsaddrs2, typeof(*da2), da_node); + da1 != NULL && da2 != NULL; + da1 = list_entry(da1->da_node.next, typeof(*da1), da_node), + da2 = list_entry(da2->da_node.next, typeof(*da2), da_node)) { + if (!same_sockaddr((struct sockaddr *)&da1->da_addr, + (struct sockaddr *)&da2->da_addr)) + return false; } - return NULL; + if (da1 == NULL && da2 == NULL) + return true; + + return false; } /* - * Compare two lists of addresses. + * Lookup DS by addresses. nfs4_ds_cache_lock is held */ -static bool -_data_server_match_all_addrs_locked(struct list_head *dsaddrs1, - struct list_head *dsaddrs2) +static struct nfs4_pnfs_ds * +_data_server_lookup_locked(const struct list_head *dsaddrs) { - struct nfs4_pnfs_ds_addr *da1, *da2; - size_t count1 = 0, - count2 = 0; - - list_for_each_entry(da1, dsaddrs1, da_node) - count1++; - - list_for_each_entry(da2, dsaddrs2, da_node) { - bool found = false; - count2++; - list_for_each_entry(da1, dsaddrs1, da_node) { - if (same_sockaddr((struct sockaddr *)&da1->da_addr, - (struct sockaddr *)&da2->da_addr)) { - found = true; - break; - } - } - if (!found) - return false; - } + struct nfs4_pnfs_ds *ds; - return (count1 == count2); + list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) + if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs)) + return ds; + return NULL; } /* @@ -356,11 +338,6 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) dprintk("%s add new data server %s\n", __func__, ds->ds_remotestr); } else { - if (!_data_server_match_all_addrs_locked(&tmp_ds->ds_addrs, - dsaddrs)) { - dprintk("%s: multipath address mismatch: %s != %s", - __func__, tmp_ds->ds_remotestr, remotestr); - } kfree(remotestr); kfree(ds); atomic_inc(&tmp_ds->ds_count); @@ -378,7 +355,7 @@ out: * Currently only supports ipv4, ipv6 and one multi-path address. */ static struct nfs4_pnfs_ds_addr * -decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags) +decode_ds_addr(struct net *net, struct xdr_stream *streamp, gfp_t gfp_flags) { struct nfs4_pnfs_ds_addr *da = NULL; char *buf, *portstr; @@ -457,7 +434,7 @@ decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags) INIT_LIST_HEAD(&da->da_node); - if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&da->da_addr, + if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr, sizeof(da->da_addr))) { dprintk("%s: error parsing address %s\n", __func__, buf); goto out_free_da; @@ -554,7 +531,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) cnt = be32_to_cpup(p); dprintk("%s stripe count %d\n", __func__, cnt); if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) { - printk(KERN_WARNING "%s: stripe count %d greater than " + printk(KERN_WARNING "NFS: %s: stripe count %d greater than " "supported maximum %d\n", __func__, cnt, NFS4_PNFS_MAX_STRIPE_CNT); goto out_err_free_scratch; @@ -585,7 +562,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) num = be32_to_cpup(p); dprintk("%s ds_num %u\n", __func__, num); if (num > NFS4_PNFS_MAX_MULTI_CNT) { - printk(KERN_WARNING "%s: multipath count %d greater than " + printk(KERN_WARNING "NFS: %s: multipath count %d greater than " "supported maximum %d\n", __func__, num, NFS4_PNFS_MAX_MULTI_CNT); goto out_err_free_stripe_indices; @@ -593,7 +570,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) /* validate stripe indices are all < num */ if (max_stripe_index >= num) { - printk(KERN_WARNING "%s: stripe index %u >= num ds %u\n", + printk(KERN_WARNING "NFS: %s: stripe index %u >= num ds %u\n", __func__, max_stripe_index, num); goto out_err_free_stripe_indices; } @@ -625,7 +602,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) mp_count = be32_to_cpup(p); /* multipath count */ for (j = 0; j < mp_count; j++) { - da = decode_ds_addr(&stream, gfp_flags); + da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->net, + &stream, gfp_flags); if (da) list_add_tail(&da->da_node, &dsaddrs); } @@ -686,7 +664,7 @@ decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_fl new = decode_device(inode, dev, gfp_flags); if (!new) { - printk(KERN_WARNING "%s: Could not decode or add device\n", + printk(KERN_WARNING "NFS: %s: Could not decode or add device\n", __func__); return NULL; } @@ -835,7 +813,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; if (ds == NULL) { - printk(KERN_ERR "%s: No data server for offset index %d\n", + printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", __func__, ds_idx); return NULL; } diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index bb80c49..9c8eca31 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -94,13 +94,14 @@ static int nfs4_validate_fspath(struct dentry *dentry, } static size_t nfs_parse_server_name(char *string, size_t len, - struct sockaddr *sa, size_t salen) + struct sockaddr *sa, size_t salen, struct nfs_server *server) { + struct net *net = rpc_net_ns(server->client); ssize_t ret; - ret = rpc_pton(string, len, sa, salen); + ret = rpc_pton(net, string, len, sa, salen); if (ret == 0) { - ret = nfs_dns_resolve_name(string, len, sa, salen); + ret = nfs_dns_resolve_name(net, string, len, sa, salen); if (ret < 0) ret = 0; } @@ -137,7 +138,8 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, continue; mountdata->addrlen = nfs_parse_server_name(buf->data, buf->len, - mountdata->addr, addr_bufsize); + mountdata->addr, addr_bufsize, + NFS_SB(mountdata->sb)); if (mountdata->addrlen == 0) continue; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index caf92d0..e809d23 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -72,18 +72,21 @@ #define NFS4_MAX_LOOP_ON_RECOVER (10) +static unsigned short max_session_slots = NFS4_DEF_SLOT_TABLE_SIZE; + struct nfs4_opendata; static int _nfs4_proc_open(struct nfs4_opendata *data); static int _nfs4_recover_proc_open(struct nfs4_opendata *data); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); +static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, struct nfs_fattr *fattr, struct iattr *sattr, struct nfs4_state *state); #ifdef CONFIG_NFS_V4_1 -static int nfs41_test_stateid(struct nfs_server *, struct nfs4_state *); -static int nfs41_free_stateid(struct nfs_server *, struct nfs4_state *); +static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *); +static int nfs41_free_stateid(struct nfs_server *, nfs4_stateid *); #endif /* Prevent leaks of NFSv4 errors into userland */ static int nfs4_map_errors(int err) @@ -259,15 +262,28 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc { struct nfs_client *clp = server->nfs_client; struct nfs4_state *state = exception->state; + struct inode *inode = exception->inode; int ret = errorcode; exception->retry = 0; switch(errorcode) { case 0: return 0; + case -NFS4ERR_OPENMODE: + if (nfs_have_delegation(inode, FMODE_READ)) { + nfs_inode_return_delegation(inode); + exception->retry = 1; + return 0; + } + if (state == NULL) + break; + nfs4_schedule_stateid_recovery(server, state); + goto wait_on_recovery; + case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: - case -NFS4ERR_OPENMODE: + if (state != NULL) + nfs_remove_bad_delegation(state->inode); if (state == NULL) break; nfs4_schedule_stateid_recovery(server, state); @@ -360,16 +376,14 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp * When updating highest_used_slotid there may be "holes" in the bitmap * so we need to scan down from highest_used_slotid to 0 looking for the now * highest slotid in use. - * If none found, highest_used_slotid is set to -1. + * If none found, highest_used_slotid is set to NFS4_NO_SLOT. * * Must be called while holding tbl->slot_tbl_lock */ static void -nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid) +nfs4_free_slot(struct nfs4_slot_table *tbl, u32 slotid) { - int slotid = free_slotid; - - BUG_ON(slotid < 0 || slotid >= NFS4_MAX_SLOT_TABLE); + BUG_ON(slotid >= NFS4_MAX_SLOT_TABLE); /* clear used bit in bitmap */ __clear_bit(slotid, tbl->used_slots); @@ -379,10 +393,16 @@ nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid) if (slotid < tbl->max_slots) tbl->highest_used_slotid = slotid; else - tbl->highest_used_slotid = -1; + tbl->highest_used_slotid = NFS4_NO_SLOT; } - dprintk("%s: free_slotid %u highest_used_slotid %d\n", __func__, - free_slotid, tbl->highest_used_slotid); + dprintk("%s: slotid %u highest_used_slotid %d\n", __func__, + slotid, tbl->highest_used_slotid); +} + +bool nfs4_set_task_privileged(struct rpc_task *task, void *dummy) +{ + rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); + return true; } /* @@ -390,16 +410,13 @@ nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid) */ static void nfs4_check_drain_fc_complete(struct nfs4_session *ses) { - struct rpc_task *task; - if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state)) { - task = rpc_wake_up_next(&ses->fc_slot_table.slot_tbl_waitq); - if (task) - rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); + rpc_wake_up_first(&ses->fc_slot_table.slot_tbl_waitq, + nfs4_set_task_privileged, NULL); return; } - if (ses->fc_slot_table.highest_used_slotid != -1) + if (ses->fc_slot_table.highest_used_slotid != NFS4_NO_SLOT) return; dprintk("%s COMPLETE: Session Fore Channel Drained\n", __func__); @@ -412,7 +429,7 @@ static void nfs4_check_drain_fc_complete(struct nfs4_session *ses) void nfs4_check_drain_bc_complete(struct nfs4_session *ses) { if (!test_bit(NFS4_SESSION_DRAINING, &ses->session_state) || - ses->bc_slot_table.highest_used_slotid != -1) + ses->bc_slot_table.highest_used_slotid != NFS4_NO_SLOT) return; dprintk("%s COMPLETE: Session Back Channel Drained\n", __func__); complete(&ses->bc_slot_table.complete); @@ -507,25 +524,25 @@ static int nfs4_sequence_done(struct rpc_task *task, * nfs4_find_slot looks for an unset bit in the used_slots bitmap. * If found, we mark the slot as used, update the highest_used_slotid, * and respectively set up the sequence operation args. - * The slot number is returned if found, or NFS4_MAX_SLOT_TABLE otherwise. + * The slot number is returned if found, or NFS4_NO_SLOT otherwise. * * Note: must be called with under the slot_tbl_lock. */ -static u8 +static u32 nfs4_find_slot(struct nfs4_slot_table *tbl) { - int slotid; - u8 ret_id = NFS4_MAX_SLOT_TABLE; - BUILD_BUG_ON((u8)NFS4_MAX_SLOT_TABLE != (int)NFS4_MAX_SLOT_TABLE); + u32 slotid; + u32 ret_id = NFS4_NO_SLOT; - dprintk("--> %s used_slots=%04lx highest_used=%d max_slots=%d\n", + dprintk("--> %s used_slots=%04lx highest_used=%u max_slots=%u\n", __func__, tbl->used_slots[0], tbl->highest_used_slotid, tbl->max_slots); slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slots); if (slotid >= tbl->max_slots) goto out; __set_bit(slotid, tbl->used_slots); - if (slotid > tbl->highest_used_slotid) + if (slotid > tbl->highest_used_slotid || + tbl->highest_used_slotid == NFS4_NO_SLOT) tbl->highest_used_slotid = slotid; ret_id = slotid; out: @@ -534,15 +551,25 @@ out: return ret_id; } +static void nfs41_init_sequence(struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, int cache_reply) +{ + args->sa_session = NULL; + args->sa_cache_this = 0; + if (cache_reply) + args->sa_cache_this = 1; + res->sr_session = NULL; + res->sr_slot = NULL; +} + int nfs41_setup_sequence(struct nfs4_session *session, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, - int cache_reply, struct rpc_task *task) { struct nfs4_slot *slot; struct nfs4_slot_table *tbl; - u8 slotid; + u32 slotid; dprintk("--> %s\n", __func__); /* slot already allocated? */ @@ -570,7 +597,7 @@ int nfs41_setup_sequence(struct nfs4_session *session, } slotid = nfs4_find_slot(tbl); - if (slotid == NFS4_MAX_SLOT_TABLE) { + if (slotid == NFS4_NO_SLOT) { rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); spin_unlock(&tbl->slot_tbl_lock); dprintk("<-- %s: no free slots\n", __func__); @@ -582,7 +609,6 @@ int nfs41_setup_sequence(struct nfs4_session *session, slot = tbl->slots + slotid; args->sa_session = session; args->sa_slotid = slotid; - args->sa_cache_this = cache_reply; dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr); @@ -602,24 +628,19 @@ EXPORT_SYMBOL_GPL(nfs41_setup_sequence); int nfs4_setup_sequence(const struct nfs_server *server, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, - int cache_reply, struct rpc_task *task) { struct nfs4_session *session = nfs4_get_session(server); int ret = 0; - if (session == NULL) { - args->sa_session = NULL; - res->sr_session = NULL; + if (session == NULL) goto out; - } dprintk("--> %s clp %p session %p sr_slot %td\n", __func__, session->clp, session, res->sr_slot ? res->sr_slot - session->fc_slot_table.slots : -1); - ret = nfs41_setup_sequence(session, args, res, cache_reply, - task); + ret = nfs41_setup_sequence(session, args, res, task); out: dprintk("<-- %s status=%d\n", __func__, ret); return ret; @@ -629,7 +650,6 @@ struct nfs41_call_sync_data { const struct nfs_server *seq_server; struct nfs4_sequence_args *seq_args; struct nfs4_sequence_res *seq_res; - int cache_reply; }; static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata) @@ -639,7 +659,7 @@ static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata) dprintk("--> %s data->seq_server %p\n", __func__, data->seq_server); if (nfs4_setup_sequence(data->seq_server, data->seq_args, - data->seq_res, data->cache_reply, task)) + data->seq_res, task)) return; rpc_call_start(task); } @@ -657,12 +677,12 @@ static void nfs41_call_sync_done(struct rpc_task *task, void *calldata) nfs41_sequence_done(task, data->seq_res); } -struct rpc_call_ops nfs41_call_sync_ops = { +static const struct rpc_call_ops nfs41_call_sync_ops = { .rpc_call_prepare = nfs41_call_sync_prepare, .rpc_call_done = nfs41_call_sync_done, }; -struct rpc_call_ops nfs41_call_priv_sync_ops = { +static const struct rpc_call_ops nfs41_call_priv_sync_ops = { .rpc_call_prepare = nfs41_call_priv_sync_prepare, .rpc_call_done = nfs41_call_sync_done, }; @@ -672,7 +692,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, struct rpc_message *msg, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, - int cache_reply, int privileged) { int ret; @@ -681,7 +700,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, .seq_server = server, .seq_args = args, .seq_res = res, - .cache_reply = cache_reply, }; struct rpc_task_setup task_setup = { .rpc_client = clnt, @@ -690,7 +708,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, .callback_data = &data }; - res->sr_slot = NULL; if (privileged) task_setup.callback_ops = &nfs41_call_priv_sync_ops; task = rpc_run_task(&task_setup); @@ -710,10 +727,17 @@ int _nfs4_call_sync_session(struct rpc_clnt *clnt, struct nfs4_sequence_res *res, int cache_reply) { - return nfs4_call_sync_sequence(clnt, server, msg, args, res, cache_reply, 0); + nfs41_init_sequence(args, res, cache_reply); + return nfs4_call_sync_sequence(clnt, server, msg, args, res, 0); } #else +static inline +void nfs41_init_sequence(struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, int cache_reply) +{ +} + static int nfs4_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) { @@ -728,7 +752,7 @@ int _nfs4_call_sync(struct rpc_clnt *clnt, struct nfs4_sequence_res *res, int cache_reply) { - args->sa_session = res->sr_session = NULL; + nfs41_init_sequence(args, res, cache_reply); return rpc_call_sync(clnt, msg, 0); } @@ -815,20 +839,22 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.open_flags = flags; p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); p->o_arg.clientid = server->nfs_client->cl_clientid; - p->o_arg.id = sp->so_owner_id.id; + p->o_arg.id = sp->so_seqid.owner_id; p->o_arg.name = &dentry->d_name; p->o_arg.server = server; p->o_arg.bitmask = server->attr_bitmask; p->o_arg.dir_bitmask = server->cache_consistency_bitmask; p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; - if (flags & O_CREAT) { - u32 *s; + if (attrs != NULL && attrs->ia_valid != 0) { + __be32 verf[2]; p->o_arg.u.attrs = &p->attrs; memcpy(&p->attrs, attrs, sizeof(p->attrs)); - s = (u32 *) p->o_arg.u.verifier.data; - s[0] = jiffies; - s[1] = current->pid; + + verf[0] = jiffies; + verf[1] = current->pid; + memcpy(p->o_arg.u.verifier.data, verf, + sizeof(p->o_arg.u.verifier.data)); } p->c_arg.fh = &p->o_res.fh; p->c_arg.stateid = &p->o_res.stateid; @@ -878,7 +904,7 @@ static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode { int ret = 0; - if (open_mode & O_EXCL) + if (open_mode & (O_EXCL|O_TRUNC)) goto out; switch (mode & (FMODE_READ|FMODE_WRITE)) { case FMODE_READ: @@ -927,8 +953,8 @@ static void update_open_stateflags(struct nfs4_state *state, fmode_t fmode) static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) { if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) - memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data)); - memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data)); + nfs4_stateid_copy(&state->stateid, stateid); + nfs4_stateid_copy(&state->open_stateid, stateid); switch (fmode) { case FMODE_READ: set_bit(NFS_O_RDONLY_STATE, &state->flags); @@ -956,7 +982,7 @@ static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_s */ write_seqlock(&state->seqlock); if (deleg_stateid != NULL) { - memcpy(state->stateid.data, deleg_stateid->data, sizeof(state->stateid.data)); + nfs4_stateid_copy(&state->stateid, deleg_stateid); set_bit(NFS_DELEGATED_STATE, &state->flags); } if (open_stateid != NULL) @@ -987,7 +1013,7 @@ static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stat if (delegation == NULL) delegation = &deleg_cur->stateid; - else if (memcmp(deleg_cur->stateid.data, delegation->data, NFS4_STATEID_SIZE) != 0) + else if (!nfs4_stateid_match(&deleg_cur->stateid, delegation)) goto no_delegation_unlock; nfs_mark_delegation_referenced(deleg_cur); @@ -1026,7 +1052,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) struct nfs4_state *state = opendata->state; struct nfs_inode *nfsi = NFS_I(state->inode); struct nfs_delegation *delegation; - int open_mode = opendata->o_arg.open_flags & O_EXCL; + int open_mode = opendata->o_arg.open_flags & (O_EXCL|O_TRUNC); fmode_t fmode = opendata->o_arg.fmode; nfs4_stateid stateid; int ret = -EAGAIN; @@ -1048,7 +1074,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) break; } /* Save the delegation */ - memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data)); + nfs4_stateid_copy(&stateid, &delegation->stateid); rcu_read_unlock(); ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode); if (ret != 0) @@ -1090,6 +1116,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data if (state == NULL) goto err_put_inode; if (data->o_res.delegation_type != 0) { + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; int delegation_flags = 0; rcu_read_lock(); @@ -1101,7 +1128,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data pr_err_ratelimited("NFS: Broken NFSv4 server %s is " "returning a delegation for " "OPEN(CLAIM_DELEGATE_CUR)\n", - NFS_CLIENT(inode)->cl_server); + clp->cl_hostname); } else if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0) nfs_inode_set_delegation(state->inode, data->owner->so_cred, @@ -1210,10 +1237,10 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state * * Check if we need to update the current stateid. */ if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 && - memcmp(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)) != 0) { + !nfs4_stateid_match(&state->stateid, &state->open_stateid)) { write_seqlock(&state->seqlock); if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) - memcpy(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)); + nfs4_stateid_copy(&state->stateid, &state->open_stateid); write_sequnlock(&state->seqlock); } return 0; @@ -1282,8 +1309,7 @@ static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs if (IS_ERR(opendata)) return PTR_ERR(opendata); opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR; - memcpy(opendata->o_arg.u.delegation.data, stateid->data, - sizeof(opendata->o_arg.u.delegation.data)); + nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid); ret = nfs4_open_recover(opendata, state); nfs4_opendata_put(opendata); return ret; @@ -1319,8 +1345,11 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state * The show must go on: exit, but mark the * stateid as needing recovery. */ + case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: + nfs_inode_find_state_and_recover(state->inode, + stateid); nfs4_schedule_stateid_recovery(server, state); case -EKEYEXPIRED: /* @@ -1345,8 +1374,7 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata) data->rpc_status = task->tk_status; if (data->rpc_status == 0) { - memcpy(data->o_res.stateid.data, data->c_res.stateid.data, - sizeof(data->o_res.stateid.data)); + nfs4_stateid_copy(&data->o_res.stateid, &data->c_res.stateid); nfs_confirm_seqid(&data->owner->so_seqid, 0); renew_lease(data->o_res.server, data->timestamp); data->rpc_done = 1; @@ -1440,7 +1468,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) rcu_read_unlock(); } /* Update sequence id. */ - data->o_arg.id = sp->so_owner_id.id; + data->o_arg.id = sp->so_seqid.owner_id; data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid; if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) { task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; @@ -1449,7 +1477,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) data->timestamp = jiffies; if (nfs4_setup_sequence(data->o_arg.server, &data->o_arg.seq_args, - &data->o_res.seq_res, 1, task)) + &data->o_res.seq_res, task)) return; rpc_call_start(task); return; @@ -1551,6 +1579,7 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover) }; int status; + nfs41_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1); kref_get(&data->kref); data->rpc_done = 0; data->rpc_status = 0; @@ -1712,15 +1741,32 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta } #if defined(CONFIG_NFS_V4_1) -static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) +static int nfs41_check_expired_stateid(struct nfs4_state *state, nfs4_stateid *stateid, unsigned int flags) { - int status; + int status = NFS_OK; struct nfs_server *server = NFS_SERVER(state->inode); - status = nfs41_test_stateid(server, state); - if (status == NFS_OK) - return 0; - nfs41_free_stateid(server, state); + if (state->flags & flags) { + status = nfs41_test_stateid(server, stateid); + if (status != NFS_OK) { + nfs41_free_stateid(server, stateid); + state->flags &= ~flags; + } + } + return status; +} + +static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) +{ + int deleg_status, open_status; + int deleg_flags = 1 << NFS_DELEGATED_STATE; + int open_flags = (1 << NFS_O_RDONLY_STATE) | (1 << NFS_O_WRONLY_STATE) | (1 << NFS_O_RDWR_STATE); + + deleg_status = nfs41_check_expired_stateid(state, &state->stateid, deleg_flags); + open_status = nfs41_check_expired_stateid(state, &state->open_stateid, open_flags); + + if ((deleg_status == NFS_OK) && (open_status == NFS_OK)) + return NFS_OK; return nfs4_open_expired(sp, state); } #endif @@ -1754,7 +1800,8 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode /* Protect against reboot recovery conflicts */ status = -ENOMEM; - if (!(sp = nfs4_get_state_owner(server, cred))) { + sp = nfs4_get_state_owner(server, cred, GFP_KERNEL); + if (sp == NULL) { dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n"); goto out_err; } @@ -1829,7 +1876,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, * the user though... */ if (status == -NFS4ERR_BAD_SEQID) { - printk(KERN_WARNING "NFS: v4 server %s " + pr_warn_ratelimited("NFS: v4 server %s " " returned a bad sequence-id error!\n", NFS_SERVER(dir)->nfs_client->cl_hostname); exception.retry = 1; @@ -1882,12 +1929,14 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, nfs_fattr_init(fattr); - if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) { + if (state != NULL) { + nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE, + current->files, current->tgid); + } else if (nfs4_copy_delegation_stateid(&arg.stateid, inode, + FMODE_WRITE)) { /* Use that stateid */ - } else if (state != NULL) { - nfs4_copy_stateid(&arg.stateid, state, current->files, current->tgid); } else - memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); + nfs4_stateid_copy(&arg.stateid, &zero_stateid); status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); if (status == 0 && state != NULL) @@ -1900,7 +1949,10 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, struct nfs4_state *state) { struct nfs_server *server = NFS_SERVER(inode); - struct nfs4_exception exception = { }; + struct nfs4_exception exception = { + .state = state, + .inode = inode, + }; int err; do { err = nfs4_handle_exception(server, @@ -1954,6 +2006,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) struct nfs4_state *state = calldata->state; struct nfs_server *server = NFS_SERVER(calldata->inode); + dprintk("%s: begin!\n", __func__); if (!nfs4_sequence_done(task, &calldata->res.seq_res)) return; /* hmm. we are done with the inode, and in the process of freeing @@ -1981,6 +2034,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) } nfs_release_seqid(calldata->arg.seqid); nfs_refresh_inode(calldata->inode, calldata->res.fattr); + dprintk("%s: done, ret = %d!\n", __func__, task->tk_status); } static void nfs4_close_prepare(struct rpc_task *task, void *data) @@ -1989,6 +2043,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) struct nfs4_state *state = calldata->state; int call_close = 0; + dprintk("%s: begin!\n", __func__); if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) return; @@ -2013,7 +2068,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) if (!call_close) { /* Note: exit _without_ calling nfs4_close_done */ task->tk_action = NULL; - return; + goto out; } if (calldata->arg.fmode == 0) { @@ -2022,17 +2077,20 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) pnfs_roc_drain(calldata->inode, &calldata->roc_barrier)) { rpc_sleep_on(&NFS_SERVER(calldata->inode)->roc_rpcwaitq, task, NULL); - return; + goto out; } } nfs_fattr_init(calldata->res.fattr); calldata->timestamp = jiffies; if (nfs4_setup_sequence(NFS_SERVER(calldata->inode), - &calldata->arg.seq_args, &calldata->res.seq_res, - 1, task)) - return; + &calldata->arg.seq_args, + &calldata->res.seq_res, + task)) + goto out; rpc_call_start(task); +out: + dprintk("%s: done!\n", __func__); } static const struct rpc_call_ops nfs4_close_ops = { @@ -2074,6 +2132,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc) calldata = kzalloc(sizeof(*calldata), gfp_mask); if (calldata == NULL) goto out; + nfs41_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 1); calldata->inode = state->inode; calldata->state = state; calldata->arg.fh = NFS_FH(state->inode); @@ -2182,6 +2241,7 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; server->acl_bitmask = res.acl_bitmask; + server->fh_expire_type = res.fh_expire_type; } return status; @@ -2303,7 +2363,6 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, return nfs4_map_errors(status); } -static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); /* * Get locations and (maybe) other attributes of a referral. * Note that we'll actually follow the referral later when @@ -2420,6 +2479,10 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, } } + /* Deal with open(O_TRUNC) */ + if (sattr->ia_valid & ATTR_OPEN) + sattr->ia_valid &= ~(ATTR_MTIME|ATTR_CTIME|ATTR_OPEN); + status = nfs4_do_setattr(inode, cred, fattr, sattr, state); if (status == 0) nfs_setattr_update_inode(inode, sattr); @@ -2494,7 +2557,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry struct nfs_server *server = NFS_SERVER(inode); struct nfs4_accessargs args = { .fh = NFS_FH(inode), - .bitmask = server->attr_bitmask, + .bitmask = server->cache_consistency_bitmask, }; struct nfs4_accessres res = { .server = server, @@ -2712,8 +2775,18 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) args->bitmask = server->cache_consistency_bitmask; res->server = server; - res->seq_res.sr_slot = NULL; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; + nfs41_init_sequence(&args->seq_args, &res->seq_res, 1); +} + +static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data) +{ + if (nfs4_setup_sequence(NFS_SERVER(data->dir), + &data->args.seq_args, + &data->res.seq_res, + task)) + return; + rpc_call_start(task); } static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) @@ -2738,6 +2811,17 @@ static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir) msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME]; arg->bitmask = server->attr_bitmask; res->server = server; + nfs41_init_sequence(&arg->seq_args, &res->seq_res, 1); +} + +static void nfs4_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data) +{ + if (nfs4_setup_sequence(NFS_SERVER(data->old_dir), + &data->args.seq_args, + &data->res.seq_res, + task)) + return; + rpc_call_start(task); } static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, @@ -3232,6 +3316,17 @@ static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message data->timestamp = jiffies; data->read_done_cb = nfs4_read_done_cb; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; + nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); +} + +static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) +{ + if (nfs4_setup_sequence(NFS_SERVER(data->inode), + &data->args.seq_args, + &data->res.seq_res, + task)) + return; + rpc_call_start(task); } /* Reset the the nfs_read_data to send the read to the MDS. */ @@ -3305,6 +3400,17 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag data->timestamp = jiffies; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; + nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); +} + +static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) +{ + if (nfs4_setup_sequence(NFS_SERVER(data->inode), + &data->args.seq_args, + &data->res.seq_res, + task)) + return; + rpc_call_start(task); } static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *data) @@ -3339,6 +3445,7 @@ static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_messa data->write_done_cb = nfs4_commit_done_cb; data->res.server = server; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; + nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); } struct nfs4_renewdata { @@ -3714,8 +3821,11 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, if (task->tk_status >= 0) return 0; switch(task->tk_status) { + case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: + if (state != NULL) + nfs_remove_bad_delegation(state->inode); case -NFS4ERR_OPENMODE: if (state == NULL) break; @@ -3764,6 +3874,16 @@ wait_on_recovery: return -EAGAIN; } +static void nfs4_construct_boot_verifier(struct nfs_client *clp, + nfs4_verifier *bootverf) +{ + __be32 verf[2]; + + verf[0] = htonl((u32)clp->cl_boot_time.tv_sec); + verf[1] = htonl((u32)clp->cl_boot_time.tv_nsec); + memcpy(bootverf->data, verf, sizeof(bootverf->data)); +} + int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred, struct nfs4_setclientid_res *res) @@ -3780,15 +3900,13 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, .rpc_resp = res, .rpc_cred = cred, }; - __be32 *p; int loop = 0; int status; - p = (__be32*)sc_verifier.data; - *p++ = htonl((u32)clp->cl_boot_time.tv_sec); - *p = htonl((u32)clp->cl_boot_time.tv_nsec); + nfs4_construct_boot_verifier(clp, &sc_verifier); for(;;) { + rcu_read_lock(); setclientid.sc_name_len = scnprintf(setclientid.sc_name, sizeof(setclientid.sc_name), "%s/%s %s %s %u", clp->cl_ipaddr, @@ -3805,6 +3923,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr, sizeof(setclientid.sc_uaddr), "%s.%u.%u", clp->cl_ipaddr, port >> 8, port & 255); + rcu_read_unlock(); status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); if (status != -NFS4ERR_CLID_INUSE) @@ -3891,7 +4010,7 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data) if (nfs4_setup_sequence(d_data->res.server, &d_data->args.seq_args, - &d_data->res.seq_res, 1, task)) + &d_data->res.seq_res, task)) return; rpc_call_start(task); } @@ -3925,11 +4044,12 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co data = kzalloc(sizeof(*data), GFP_NOFS); if (data == NULL) return -ENOMEM; + nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); data->args.fhandle = &data->fh; data->args.stateid = &data->stateid; data->args.bitmask = server->attr_bitmask; nfs_copy_fh(&data->fh, NFS_FH(inode)); - memcpy(&data->stateid, stateid, sizeof(data->stateid)); + nfs4_stateid_copy(&data->stateid, stateid); data->res.fattr = &data->fattr; data->res.server = server; nfs_fattr_init(data->res.fattr); @@ -4016,7 +4136,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock if (status != 0) goto out; lsp = request->fl_u.nfs4_fl.owner; - arg.lock_owner.id = lsp->ls_id.id; + arg.lock_owner.id = lsp->ls_seqid.owner_id; arg.lock_owner.s_dev = server->s_dev; status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); switch (status) { @@ -4112,9 +4232,8 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) return; switch (task->tk_status) { case 0: - memcpy(calldata->lsp->ls_stateid.data, - calldata->res.stateid.data, - sizeof(calldata->lsp->ls_stateid.data)); + nfs4_stateid_copy(&calldata->lsp->ls_stateid, + &calldata->res.stateid); renew_lease(calldata->server, calldata->timestamp); break; case -NFS4ERR_BAD_STATEID: @@ -4142,7 +4261,7 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data) calldata->timestamp = jiffies; if (nfs4_setup_sequence(calldata->server, &calldata->arg.seq_args, - &calldata->res.seq_res, 1, task)) + &calldata->res.seq_res, task)) return; rpc_call_start(task); } @@ -4182,6 +4301,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, return ERR_PTR(-ENOMEM); } + nfs41_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1); msg.rpc_argp = &data->arg; msg.rpc_resp = &data->res; task_setup_data.callback_data = data; @@ -4261,7 +4381,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, goto out_free_seqid; p->arg.lock_stateid = &lsp->ls_stateid; p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; - p->arg.lock_owner.id = lsp->ls_id.id; + p->arg.lock_owner.id = lsp->ls_seqid.owner_id; p->arg.lock_owner.s_dev = server->s_dev; p->res.lock_seqid = p->arg.lock_seqid; p->lsp = lsp; @@ -4297,7 +4417,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) data->timestamp = jiffies; if (nfs4_setup_sequence(data->server, &data->arg.seq_args, - &data->res.seq_res, 1, task)) + &data->res.seq_res, task)) return; rpc_call_start(task); dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status); @@ -4326,8 +4446,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) goto out; } if (data->rpc_status == 0) { - memcpy(data->lsp->ls_stateid.data, data->res.stateid.data, - sizeof(data->lsp->ls_stateid.data)); + nfs4_stateid_copy(&data->lsp->ls_stateid, &data->res.stateid); data->lsp->ls_flags |= NFS_LOCK_INITIALIZED; renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp); } @@ -4415,6 +4534,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f data->arg.reclaim = NFS_LOCK_RECLAIM; task_setup_data.callback_ops = &nfs4_recover_lock_ops; } + nfs41_init_sequence(&data->arg.seq_args, &data->res.seq_res, 1); msg.rpc_argp = &data->arg; msg.rpc_resp = &data->res; task_setup_data.callback_data = data; @@ -4479,15 +4599,34 @@ out: } #if defined(CONFIG_NFS_V4_1) -static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *request) +static int nfs41_check_expired_locks(struct nfs4_state *state) { - int status; + int status, ret = NFS_OK; + struct nfs4_lock_state *lsp; struct nfs_server *server = NFS_SERVER(state->inode); - status = nfs41_test_stateid(server, state); + list_for_each_entry(lsp, &state->lock_states, ls_locks) { + if (lsp->ls_flags & NFS_LOCK_INITIALIZED) { + status = nfs41_test_stateid(server, &lsp->ls_stateid); + if (status != NFS_OK) { + nfs41_free_stateid(server, &lsp->ls_stateid); + lsp->ls_flags &= ~NFS_LOCK_INITIALIZED; + ret = status; + } + } + }; + + return ret; +} + +static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *request) +{ + int status = NFS_OK; + + if (test_bit(LK_STATE_IN_USE, &state->flags)) + status = nfs41_check_expired_locks(state); if (status == NFS_OK) - return 0; - nfs41_free_stateid(server, state); + return status; return nfs4_lock_expired(state, request); } #endif @@ -4523,7 +4662,8 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock /* Note: we always want to sleep here! */ request->fl_flags = fl_flags | FL_SLEEP; if (do_vfs_lock(request->fl_file, request) < 0) - printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __func__); + printk(KERN_WARNING "NFS: %s: VFS is out of sync with lock " + "manager!\n", __func__); out_unlock: up_read(&nfsi->rwsem); out: @@ -4533,7 +4673,9 @@ out: static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) { - struct nfs4_exception exception = { }; + struct nfs4_exception exception = { + .state = state, + }; int err; do { @@ -4603,8 +4745,8 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW); switch (err) { default: - printk(KERN_ERR "%s: unhandled error %d.\n", - __func__, err); + printk(KERN_ERR "NFS: %s: unhandled error " + "%d.\n", __func__, err); case 0: case -ESTALE: goto out; @@ -4626,6 +4768,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) * The show must go on: exit, but mark the * stateid as needing recovery. */ + case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: case -NFS4ERR_OPENMODE: @@ -4655,33 +4798,44 @@ out: return err; } +struct nfs_release_lockowner_data { + struct nfs4_lock_state *lsp; + struct nfs_server *server; + struct nfs_release_lockowner_args args; +}; + static void nfs4_release_lockowner_release(void *calldata) { + struct nfs_release_lockowner_data *data = calldata; + nfs4_free_lock_state(data->server, data->lsp); kfree(calldata); } -const struct rpc_call_ops nfs4_release_lockowner_ops = { +static const struct rpc_call_ops nfs4_release_lockowner_ops = { .rpc_release = nfs4_release_lockowner_release, }; -void nfs4_release_lockowner(const struct nfs4_lock_state *lsp) +int nfs4_release_lockowner(struct nfs4_lock_state *lsp) { struct nfs_server *server = lsp->ls_state->owner->so_server; - struct nfs_release_lockowner_args *args; + struct nfs_release_lockowner_data *data; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RELEASE_LOCKOWNER], }; if (server->nfs_client->cl_mvops->minor_version != 0) - return; - args = kmalloc(sizeof(*args), GFP_NOFS); - if (!args) - return; - args->lock_owner.clientid = server->nfs_client->cl_clientid; - args->lock_owner.id = lsp->ls_id.id; - args->lock_owner.s_dev = server->s_dev; - msg.rpc_argp = args; - rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, args); + return -EINVAL; + data = kmalloc(sizeof(*data), GFP_NOFS); + if (!data) + return -ENOMEM; + data->lsp = lsp; + data->server = server; + data->args.lock_owner.clientid = server->nfs_client->cl_clientid; + data->args.lock_owner.id = lsp->ls_seqid.owner_id; + data->args.lock_owner.s_dev = server->s_dev; + msg.rpc_argp = &data->args; + rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data); + return 0; } #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" @@ -4727,11 +4881,11 @@ static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr) if (!(((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) || (fattr->valid & NFS_ATTR_FATTR_FILEID)) && (fattr->valid & NFS_ATTR_FATTR_FSID) && - (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL))) + (fattr->valid & NFS_ATTR_FATTR_V4_LOCATIONS))) return; fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE | - NFS_ATTR_FATTR_NLINK; + NFS_ATTR_FATTR_NLINK | NFS_ATTR_FATTR_V4_REFERRAL; fattr->mode = S_IFDIR | S_IRUGO | S_IXUGO; fattr->nlink = 2; } @@ -4798,7 +4952,8 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct return status; } -int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors) +static int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, + struct nfs4_secinfo_flavors *flavors) { struct nfs4_exception exception = { }; int err; @@ -4852,6 +5007,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) { nfs4_verifier verifier; struct nfs41_exchange_id_args args = { + .verifier = &verifier, .client = clp, .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER, }; @@ -4865,15 +5021,11 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) .rpc_resp = &res, .rpc_cred = cred, }; - __be32 *p; dprintk("--> %s\n", __func__); BUG_ON(clp == NULL); - p = (u32 *)verifier.data; - *p++ = htonl((u32)clp->cl_boot_time.tv_sec); - *p = htonl((u32)clp->cl_boot_time.tv_nsec); - args.verifier = &verifier; + nfs4_construct_boot_verifier(clp, &verifier); args.id_len = scnprintf(args.id, sizeof(args.id), "%s/%s.%s/%u", @@ -4888,11 +5040,24 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) goto out; } + res.impl_id = kzalloc(sizeof(struct nfs41_impl_id), GFP_KERNEL); + if (unlikely(!res.impl_id)) { + status = -ENOMEM; + goto out_server_scope; + } + status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); if (!status) status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags); if (!status) { + /* use the most recent implementation id */ + kfree(clp->impl_id); + clp->impl_id = res.impl_id; + } else + kfree(res.impl_id); + + if (!status) { if (clp->server_scope && !nfs41_same_server_scope(clp->server_scope, res.server_scope)) { @@ -4908,8 +5073,16 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) goto out; } } + +out_server_scope: kfree(res.server_scope); out: + if (clp->impl_id) + dprintk("%s: Server Implementation ID: " + "domain: %s, name: %s, date: %llu,%u\n", + __func__, clp->impl_id->domain, clp->impl_id->name, + clp->impl_id->date.seconds, + clp->impl_id->date.nseconds); dprintk("<-- %s status= %d\n", __func__, status); return status; } @@ -4933,7 +5106,7 @@ static void nfs4_get_lease_time_prepare(struct rpc_task *task, since we're invoked within one */ ret = nfs41_setup_sequence(data->clp->cl_session, &data->args->la_seq_args, - &data->res->lr_seq_res, 0, task); + &data->res->lr_seq_res, task); BUG_ON(ret == -EAGAIN); rpc_call_start(task); @@ -4966,7 +5139,7 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata) dprintk("<-- %s\n", __func__); } -struct rpc_call_ops nfs4_get_lease_time_ops = { +static const struct rpc_call_ops nfs4_get_lease_time_ops = { .rpc_call_prepare = nfs4_get_lease_time_prepare, .rpc_call_done = nfs4_get_lease_time_done, }; @@ -4997,6 +5170,7 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) }; int status; + nfs41_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0); dprintk("--> %s\n", __func__); task = rpc_run_task(&task_setup); @@ -5113,13 +5287,13 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp) return NULL; tbl = &session->fc_slot_table; - tbl->highest_used_slotid = -1; + tbl->highest_used_slotid = NFS4_NO_SLOT; spin_lock_init(&tbl->slot_tbl_lock); rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table"); init_completion(&tbl->complete); tbl = &session->bc_slot_table; - tbl->highest_used_slotid = -1; + tbl->highest_used_slotid = NFS4_NO_SLOT; spin_lock_init(&tbl->slot_tbl_lock); rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table"); init_completion(&tbl->complete); @@ -5132,11 +5306,16 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp) void nfs4_destroy_session(struct nfs4_session *session) { + struct rpc_xprt *xprt; + nfs4_proc_destroy_session(session); + + rcu_read_lock(); + xprt = rcu_dereference(session->clp->cl_rpcclient->cl_xprt); + rcu_read_unlock(); dprintk("%s Destroy backchannel for xprt %p\n", - __func__, session->clp->cl_rpcclient->cl_xprt); - xprt_destroy_backchannel(session->clp->cl_rpcclient->cl_xprt, - NFS41_BC_MIN_CALLBACKS); + __func__, xprt); + xprt_destroy_backchannel(xprt, NFS41_BC_MIN_CALLBACKS); nfs4_destroy_slot_tables(session); kfree(session); } @@ -5164,7 +5343,7 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args) args->fc_attrs.max_rqst_sz = mxrqst_sz; args->fc_attrs.max_resp_sz = mxresp_sz; args->fc_attrs.max_ops = NFS4_MAX_OPS; - args->fc_attrs.max_reqs = session->clp->cl_rpcclient->cl_xprt->max_reqs; + args->fc_attrs.max_reqs = max_session_slots; dprintk("%s: Fore Channel : max_rqst_sz=%u max_resp_sz=%u " "max_ops=%u max_reqs=%u\n", @@ -5204,6 +5383,8 @@ static int nfs4_verify_fore_channel_attrs(struct nfs41_create_session_args *args return -EINVAL; if (rcvd->max_reqs == 0) return -EINVAL; + if (rcvd->max_reqs > NFS4_MAX_SLOT_TABLE) + rcvd->max_reqs = NFS4_MAX_SLOT_TABLE; return 0; } @@ -5219,9 +5400,9 @@ static int nfs4_verify_back_channel_attrs(struct nfs41_create_session_args *args if (rcvd->max_resp_sz_cached > sent->max_resp_sz_cached) return -EINVAL; /* These would render the backchannel useless: */ - if (rcvd->max_ops == 0) + if (rcvd->max_ops != sent->max_ops) return -EINVAL; - if (rcvd->max_reqs == 0) + if (rcvd->max_reqs != sent->max_reqs) return -EINVAL; return 0; } @@ -5324,7 +5505,7 @@ int nfs4_proc_destroy_session(struct nfs4_session *session) if (status) printk(KERN_WARNING - "Got error %d from the server on DESTROY_SESSION. " + "NFS: Got error %d from the server on DESTROY_SESSION. " "Session has been destroyed regardless...\n", status); dprintk("<-- nfs4_proc_destroy_session\n"); @@ -5447,7 +5628,7 @@ static void nfs41_sequence_prepare(struct rpc_task *task, void *data) args = task->tk_msg.rpc_argp; res = task->tk_msg.rpc_resp; - if (nfs41_setup_sequence(clp->cl_session, args, res, 0, task)) + if (nfs41_setup_sequence(clp->cl_session, args, res, task)) return; rpc_call_start(task); } @@ -5479,6 +5660,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_ nfs_put_client(clp); return ERR_PTR(-ENOMEM); } + nfs41_init_sequence(&calldata->args, &calldata->res, 0); msg.rpc_argp = &calldata->args; msg.rpc_resp = &calldata->res; calldata->clp = clp; @@ -5540,7 +5722,7 @@ static void nfs4_reclaim_complete_prepare(struct rpc_task *task, void *data) rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); if (nfs41_setup_sequence(calldata->clp->cl_session, &calldata->arg.seq_args, - &calldata->res.seq_res, 0, task)) + &calldata->res.seq_res, task)) return; rpc_call_start(task); @@ -5619,6 +5801,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp) calldata->clp = clp; calldata->arg.one_fs = 0; + nfs41_init_sequence(&calldata->arg.seq_args, &calldata->res.seq_res, 0); msg.rpc_argp = &calldata->arg; msg.rpc_resp = &calldata->res; task_setup_data.callback_data = calldata; @@ -5650,7 +5833,7 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata) * to be no way to prevent it completely. */ if (nfs4_setup_sequence(server, &lgp->args.seq_args, - &lgp->res.seq_res, 0, task)) + &lgp->res.seq_res, task)) return; if (pnfs_choose_layoutget_stateid(&lgp->args.stateid, NFS_I(lgp->args.inode)->layout, @@ -5725,6 +5908,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) lgp->res.layoutp = &lgp->args.layout; lgp->res.seq_res.sr_slot = NULL; + nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -5745,7 +5929,7 @@ nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata) dprintk("--> %s\n", __func__); if (nfs41_setup_sequence(lrp->clp->cl_session, &lrp->args.seq_args, - &lrp->res.seq_res, 0, task)) + &lrp->res.seq_res, task)) return; rpc_call_start(task); } @@ -5811,6 +5995,7 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp) int status; dprintk("--> %s\n", __func__); + nfs41_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -5911,7 +6096,7 @@ static void nfs4_layoutcommit_prepare(struct rpc_task *task, void *calldata) struct nfs_server *server = NFS_SERVER(data->args.inode); if (nfs4_setup_sequence(server, &data->args.seq_args, - &data->res.seq_res, 1, task)) + &data->res.seq_res, task)) return; rpc_call_start(task); } @@ -5998,6 +6183,7 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync) data->args.lastbytewritten, data->args.inode->i_ino); + nfs41_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -6091,11 +6277,12 @@ out_freepage: out: return err; } -static int _nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state) + +static int _nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) { int status; struct nfs41_test_stateid_args args = { - .stateid = &state->stateid, + .stateid = stateid, }; struct nfs41_test_stateid_res res; struct rpc_message msg = { @@ -6103,28 +6290,31 @@ static int _nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *sta .rpc_argp = &args, .rpc_resp = &res, }; - args.seq_args.sa_session = res.seq_res.sr_session = NULL; - status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1); + + nfs41_init_sequence(&args.seq_args, &res.seq_res, 0); + status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); + + if (status == NFS_OK) + return res.status; return status; } -static int nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state) +static int nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid) { struct nfs4_exception exception = { }; int err; do { err = nfs4_handle_exception(server, - _nfs41_test_stateid(server, state), + _nfs41_test_stateid(server, stateid), &exception); } while (exception.retry); return err; } -static int _nfs4_free_stateid(struct nfs_server *server, struct nfs4_state *state) +static int _nfs4_free_stateid(struct nfs_server *server, nfs4_stateid *stateid) { - int status; struct nfs41_free_stateid_args args = { - .stateid = &state->stateid, + .stateid = stateid, }; struct nfs41_free_stateid_res res; struct rpc_message msg = { @@ -6133,25 +6323,46 @@ static int _nfs4_free_stateid(struct nfs_server *server, struct nfs4_state *stat .rpc_resp = &res, }; - args.seq_args.sa_session = res.seq_res.sr_session = NULL; - status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1); - return status; + nfs41_init_sequence(&args.seq_args, &res.seq_res, 0); + return nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); } -static int nfs41_free_stateid(struct nfs_server *server, struct nfs4_state *state) +static int nfs41_free_stateid(struct nfs_server *server, nfs4_stateid *stateid) { struct nfs4_exception exception = { }; int err; do { err = nfs4_handle_exception(server, - _nfs4_free_stateid(server, state), + _nfs4_free_stateid(server, stateid), &exception); } while (exception.retry); return err; } + +static bool nfs41_match_stateid(const nfs4_stateid *s1, + const nfs4_stateid *s2) +{ + if (memcmp(s1->other, s2->other, sizeof(s1->other)) != 0) + return false; + + if (s1->seqid == s2->seqid) + return true; + if (s1->seqid == 0 || s2->seqid == 0) + return true; + + return false; +} + #endif /* CONFIG_NFS_V4_1 */ -struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { +static bool nfs4_match_stateid(const nfs4_stateid *s1, + const nfs4_stateid *s2) +{ + return nfs4_stateid_match(s1, s2); +} + + +static const struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { .owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT, .state_flag_bit = NFS_STATE_RECLAIM_REBOOT, .recover_open = nfs4_open_reclaim, @@ -6161,7 +6372,7 @@ struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { }; #if defined(CONFIG_NFS_V4_1) -struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { +static const struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { .owner_flag_bit = NFS_OWNER_RECLAIM_REBOOT, .state_flag_bit = NFS_STATE_RECLAIM_REBOOT, .recover_open = nfs4_open_reclaim, @@ -6172,7 +6383,7 @@ struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { }; #endif /* CONFIG_NFS_V4_1 */ -struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { +static const struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, .recover_open = nfs4_open_expired, @@ -6182,7 +6393,7 @@ struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { }; #if defined(CONFIG_NFS_V4_1) -struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = { +static const struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = { .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, .recover_open = nfs41_open_expired, @@ -6192,14 +6403,14 @@ struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = { }; #endif /* CONFIG_NFS_V4_1 */ -struct nfs4_state_maintenance_ops nfs40_state_renewal_ops = { +static const struct nfs4_state_maintenance_ops nfs40_state_renewal_ops = { .sched_state_renewal = nfs4_proc_async_renew, .get_state_renewal_cred_locked = nfs4_get_renew_cred_locked, .renew_lease = nfs4_proc_renew, }; #if defined(CONFIG_NFS_V4_1) -struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = { +static const struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = { .sched_state_renewal = nfs41_proc_async_sequence, .get_state_renewal_cred_locked = nfs4_get_machine_cred_locked, .renew_lease = nfs4_proc_sequence, @@ -6209,7 +6420,7 @@ struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = { static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { .minor_version = 0, .call_sync = _nfs4_call_sync, - .validate_stateid = nfs4_validate_delegation_stateid, + .match_stateid = nfs4_match_stateid, .find_root_sec = nfs4_find_root_sec, .reboot_recovery_ops = &nfs40_reboot_recovery_ops, .nograce_recovery_ops = &nfs40_nograce_recovery_ops, @@ -6220,7 +6431,7 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { .minor_version = 1, .call_sync = _nfs4_call_sync_session, - .validate_stateid = nfs41_validate_delegation_stateid, + .match_stateid = nfs41_match_stateid, .find_root_sec = nfs41_find_root_sec, .reboot_recovery_ops = &nfs41_reboot_recovery_ops, .nograce_recovery_ops = &nfs41_nograce_recovery_ops, @@ -6260,9 +6471,11 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .create = nfs4_proc_create, .remove = nfs4_proc_remove, .unlink_setup = nfs4_proc_unlink_setup, + .unlink_rpc_prepare = nfs4_proc_unlink_rpc_prepare, .unlink_done = nfs4_proc_unlink_done, .rename = nfs4_proc_rename, .rename_setup = nfs4_proc_rename_setup, + .rename_rpc_prepare = nfs4_proc_rename_rpc_prepare, .rename_done = nfs4_proc_rename_done, .link = nfs4_proc_link, .symlink = nfs4_proc_symlink, @@ -6276,8 +6489,10 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .set_capabilities = nfs4_server_capabilities, .decode_dirent = nfs4_decode_dirent, .read_setup = nfs4_proc_read_setup, + .read_rpc_prepare = nfs4_proc_read_rpc_prepare, .read_done = nfs4_read_done, .write_setup = nfs4_proc_write_setup, + .write_rpc_prepare = nfs4_proc_write_rpc_prepare, .write_done = nfs4_write_done, .commit_setup = nfs4_proc_commit_setup, .commit_done = nfs4_commit_done, @@ -6301,6 +6516,10 @@ const struct xattr_handler *nfs4_xattr_handlers[] = { NULL }; +module_param(max_session_slots, ushort, 0644); +MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 " + "requests the client will negotiate"); + /* * Local variables: * c-basic-offset: 8 diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 4539203..0f43414 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -146,6 +146,11 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp) struct rpc_cred *cred = NULL; struct nfs_server *server; + /* Use machine credentials if available */ + cred = nfs4_get_machine_cred_locked(clp); + if (cred != NULL) + goto out; + rcu_read_lock(); list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { cred = nfs4_get_renew_cred_server_locked(server); @@ -153,6 +158,8 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp) break; } rcu_read_unlock(); + +out: return cred; } @@ -190,30 +197,29 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp) static void nfs4_end_drain_session(struct nfs_client *clp) { struct nfs4_session *ses = clp->cl_session; + struct nfs4_slot_table *tbl; int max_slots; if (ses == NULL) return; + tbl = &ses->fc_slot_table; if (test_and_clear_bit(NFS4_SESSION_DRAINING, &ses->session_state)) { - spin_lock(&ses->fc_slot_table.slot_tbl_lock); - max_slots = ses->fc_slot_table.max_slots; + spin_lock(&tbl->slot_tbl_lock); + max_slots = tbl->max_slots; while (max_slots--) { - struct rpc_task *task; - - task = rpc_wake_up_next(&ses->fc_slot_table. - slot_tbl_waitq); - if (!task) + if (rpc_wake_up_first(&tbl->slot_tbl_waitq, + nfs4_set_task_privileged, + NULL) == NULL) break; - rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED); } - spin_unlock(&ses->fc_slot_table.slot_tbl_lock); + spin_unlock(&tbl->slot_tbl_lock); } } static int nfs4_wait_on_slot_tbl(struct nfs4_slot_table *tbl) { spin_lock(&tbl->slot_tbl_lock); - if (tbl->highest_used_slotid != -1) { + if (tbl->highest_used_slotid != NFS4_NO_SLOT) { INIT_COMPLETION(tbl->complete); spin_unlock(&tbl->slot_tbl_lock); return wait_for_completion_interruptible(&tbl->complete); @@ -317,62 +323,6 @@ out: return cred; } -static void nfs_alloc_unique_id_locked(struct rb_root *root, - struct nfs_unique_id *new, - __u64 minval, int maxbits) -{ - struct rb_node **p, *parent; - struct nfs_unique_id *pos; - __u64 mask = ~0ULL; - - if (maxbits < 64) - mask = (1ULL << maxbits) - 1ULL; - - /* Ensure distribution is more or less flat */ - get_random_bytes(&new->id, sizeof(new->id)); - new->id &= mask; - if (new->id < minval) - new->id += minval; -retry: - p = &root->rb_node; - parent = NULL; - - while (*p != NULL) { - parent = *p; - pos = rb_entry(parent, struct nfs_unique_id, rb_node); - - if (new->id < pos->id) - p = &(*p)->rb_left; - else if (new->id > pos->id) - p = &(*p)->rb_right; - else - goto id_exists; - } - rb_link_node(&new->rb_node, parent, p); - rb_insert_color(&new->rb_node, root); - return; -id_exists: - for (;;) { - new->id++; - if (new->id < minval || (new->id & mask) != new->id) { - new->id = minval; - break; - } - parent = rb_next(parent); - if (parent == NULL) - break; - pos = rb_entry(parent, struct nfs_unique_id, rb_node); - if (new->id < pos->id) - break; - } - goto retry; -} - -static void nfs_free_unique_id(struct rb_root *root, struct nfs_unique_id *id) -{ - rb_erase(&id->rb_node, root); -} - static struct nfs4_state_owner * nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred) { @@ -405,6 +355,7 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new) struct rb_node **p = &server->state_owners.rb_node, *parent = NULL; struct nfs4_state_owner *sp; + int err; while (*p != NULL) { parent = *p; @@ -421,8 +372,9 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new) return sp; } } - nfs_alloc_unique_id_locked(&server->openowner_id, - &new->so_owner_id, 1, 64); + err = ida_get_new(&server->openowner_id, &new->so_seqid.owner_id); + if (err) + return ERR_PTR(err); rb_link_node(&new->so_server_node, parent, p); rb_insert_color(&new->so_server_node, &server->state_owners); return new; @@ -435,7 +387,23 @@ nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp) if (!RB_EMPTY_NODE(&sp->so_server_node)) rb_erase(&sp->so_server_node, &server->state_owners); - nfs_free_unique_id(&server->openowner_id, &sp->so_owner_id); + ida_remove(&server->openowner_id, sp->so_seqid.owner_id); +} + +static void +nfs4_init_seqid_counter(struct nfs_seqid_counter *sc) +{ + sc->flags = 0; + sc->counter = 0; + spin_lock_init(&sc->lock); + INIT_LIST_HEAD(&sc->list); + rpc_init_wait_queue(&sc->wait, "Seqid_waitqueue"); +} + +static void +nfs4_destroy_seqid_counter(struct nfs_seqid_counter *sc) +{ + rpc_destroy_wait_queue(&sc->wait); } /* @@ -444,19 +412,20 @@ nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp) * */ static struct nfs4_state_owner * -nfs4_alloc_state_owner(void) +nfs4_alloc_state_owner(struct nfs_server *server, + struct rpc_cred *cred, + gfp_t gfp_flags) { struct nfs4_state_owner *sp; - sp = kzalloc(sizeof(*sp),GFP_NOFS); + sp = kzalloc(sizeof(*sp), gfp_flags); if (!sp) return NULL; + sp->so_server = server; + sp->so_cred = get_rpccred(cred); spin_lock_init(&sp->so_lock); INIT_LIST_HEAD(&sp->so_states); - rpc_init_wait_queue(&sp->so_sequence.wait, "Seqid_waitqueue"); - sp->so_seqid.sequence = &sp->so_sequence; - spin_lock_init(&sp->so_sequence.lock); - INIT_LIST_HEAD(&sp->so_sequence.list); + nfs4_init_seqid_counter(&sp->so_seqid); atomic_set(&sp->so_count, 1); INIT_LIST_HEAD(&sp->so_lru); return sp; @@ -478,7 +447,7 @@ nfs4_drop_state_owner(struct nfs4_state_owner *sp) static void nfs4_free_state_owner(struct nfs4_state_owner *sp) { - rpc_destroy_wait_queue(&sp->so_sequence.wait); + nfs4_destroy_seqid_counter(&sp->so_seqid); put_rpccred(sp->so_cred); kfree(sp); } @@ -516,7 +485,8 @@ static void nfs4_gc_state_owners(struct nfs_server *server) * Returns a pointer to an instantiated nfs4_state_owner struct, or NULL. */ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, - struct rpc_cred *cred) + struct rpc_cred *cred, + gfp_t gfp_flags) { struct nfs_client *clp = server->nfs_client; struct nfs4_state_owner *sp, *new; @@ -526,20 +496,18 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, spin_unlock(&clp->cl_lock); if (sp != NULL) goto out; - new = nfs4_alloc_state_owner(); + new = nfs4_alloc_state_owner(server, cred, gfp_flags); if (new == NULL) goto out; - new->so_server = server; - new->so_cred = cred; - spin_lock(&clp->cl_lock); - sp = nfs4_insert_state_owner_locked(new); - spin_unlock(&clp->cl_lock); - if (sp == new) - get_rpccred(cred); - else { - rpc_destroy_wait_queue(&new->so_sequence.wait); - kfree(new); - } + do { + if (ida_pre_get(&server->openowner_id, gfp_flags) == 0) + break; + spin_lock(&clp->cl_lock); + sp = nfs4_insert_state_owner_locked(new); + spin_unlock(&clp->cl_lock); + } while (sp == ERR_PTR(-EAGAIN)); + if (sp != new) + nfs4_free_state_owner(new); out: nfs4_gc_state_owners(server); return sp; @@ -795,15 +763,11 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f { struct nfs4_lock_state *lsp; struct nfs_server *server = state->owner->so_server; - struct nfs_client *clp = server->nfs_client; lsp = kzalloc(sizeof(*lsp), GFP_NOFS); if (lsp == NULL) return NULL; - rpc_init_wait_queue(&lsp->ls_sequence.wait, "lock_seqid_waitqueue"); - spin_lock_init(&lsp->ls_sequence.lock); - INIT_LIST_HEAD(&lsp->ls_sequence.list); - lsp->ls_seqid.sequence = &lsp->ls_sequence; + nfs4_init_seqid_counter(&lsp->ls_seqid); atomic_set(&lsp->ls_count, 1); lsp->ls_state = state; lsp->ls_owner.lo_type = type; @@ -815,25 +779,22 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f lsp->ls_owner.lo_u.posix_owner = fl_owner; break; default: - kfree(lsp); - return NULL; + goto out_free; } - spin_lock(&clp->cl_lock); - nfs_alloc_unique_id_locked(&server->lockowner_id, &lsp->ls_id, 1, 64); - spin_unlock(&clp->cl_lock); + lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS); + if (lsp->ls_seqid.owner_id < 0) + goto out_free; INIT_LIST_HEAD(&lsp->ls_locks); return lsp; +out_free: + kfree(lsp); + return NULL; } -static void nfs4_free_lock_state(struct nfs4_lock_state *lsp) +void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp) { - struct nfs_server *server = lsp->ls_state->owner->so_server; - struct nfs_client *clp = server->nfs_client; - - spin_lock(&clp->cl_lock); - nfs_free_unique_id(&server->lockowner_id, &lsp->ls_id); - spin_unlock(&clp->cl_lock); - rpc_destroy_wait_queue(&lsp->ls_sequence.wait); + ida_simple_remove(&server->lockowner_id, lsp->ls_seqid.owner_id); + nfs4_destroy_seqid_counter(&lsp->ls_seqid); kfree(lsp); } @@ -865,7 +826,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_ } spin_unlock(&state->state_lock); if (new != NULL) - nfs4_free_lock_state(new); + nfs4_free_lock_state(state->owner->so_server, new); return lsp; } @@ -886,9 +847,11 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp) if (list_empty(&state->lock_states)) clear_bit(LK_STATE_IN_USE, &state->flags); spin_unlock(&state->state_lock); - if (lsp->ls_flags & NFS_LOCK_INITIALIZED) - nfs4_release_lockowner(lsp); - nfs4_free_lock_state(lsp); + if (lsp->ls_flags & NFS_LOCK_INITIALIZED) { + if (nfs4_release_lockowner(lsp) == 0) + return; + } + nfs4_free_lock_state(lsp->ls_state->owner->so_server, lsp); } static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) @@ -918,7 +881,8 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) if (fl->fl_flags & FL_POSIX) lsp = nfs4_get_lock_state(state, fl->fl_owner, 0, NFS4_POSIX_LOCK_TYPE); else if (fl->fl_flags & FL_FLOCK) - lsp = nfs4_get_lock_state(state, 0, fl->fl_pid, NFS4_FLOCK_LOCK_TYPE); + lsp = nfs4_get_lock_state(state, NULL, fl->fl_pid, + NFS4_FLOCK_LOCK_TYPE); else return -EINVAL; if (lsp == NULL) @@ -928,28 +892,49 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) return 0; } -/* - * Byte-range lock aware utility to initialize the stateid of read/write - * requests. - */ -void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid) +static bool nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state, + fl_owner_t fl_owner, pid_t fl_pid) { struct nfs4_lock_state *lsp; - int seq; + bool ret = false; - do { - seq = read_seqbegin(&state->seqlock); - memcpy(dst, &state->stateid, sizeof(*dst)); - } while (read_seqretry(&state->seqlock, seq)); if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) - return; + goto out; spin_lock(&state->state_lock); lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE); - if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) - memcpy(dst, &lsp->ls_stateid, sizeof(*dst)); + if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) { + nfs4_stateid_copy(dst, &lsp->ls_stateid); + ret = true; + } spin_unlock(&state->state_lock); nfs4_put_lock_state(lsp); +out: + return ret; +} + +static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) +{ + int seq; + + do { + seq = read_seqbegin(&state->seqlock); + nfs4_stateid_copy(dst, &state->stateid); + } while (read_seqretry(&state->seqlock, seq)); +} + +/* + * Byte-range lock aware utility to initialize the stateid of read/write + * requests. + */ +void nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, + fmode_t fmode, fl_owner_t fl_owner, pid_t fl_pid) +{ + if (nfs4_copy_delegation_stateid(dst, state->inode, fmode)) + return; + if (nfs4_copy_lock_stateid(dst, state, fl_owner, fl_pid)) + return; + nfs4_copy_open_stateid(dst, state); } struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask) @@ -960,20 +945,28 @@ struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_m if (new != NULL) { new->sequence = counter; INIT_LIST_HEAD(&new->list); + new->task = NULL; } return new; } void nfs_release_seqid(struct nfs_seqid *seqid) { - if (!list_empty(&seqid->list)) { - struct rpc_sequence *sequence = seqid->sequence->sequence; + struct nfs_seqid_counter *sequence; - spin_lock(&sequence->lock); - list_del_init(&seqid->list); - spin_unlock(&sequence->lock); - rpc_wake_up(&sequence->wait); + if (list_empty(&seqid->list)) + return; + sequence = seqid->sequence; + spin_lock(&sequence->lock); + list_del_init(&seqid->list); + if (!list_empty(&sequence->list)) { + struct nfs_seqid *next; + + next = list_first_entry(&sequence->list, + struct nfs_seqid, list); + rpc_wake_up_queued_task(&sequence->wait, next->task); } + spin_unlock(&sequence->lock); } void nfs_free_seqid(struct nfs_seqid *seqid) @@ -989,14 +982,14 @@ void nfs_free_seqid(struct nfs_seqid *seqid) */ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid) { - BUG_ON(list_first_entry(&seqid->sequence->sequence->list, struct nfs_seqid, list) != seqid); + BUG_ON(list_first_entry(&seqid->sequence->list, struct nfs_seqid, list) != seqid); switch (status) { case 0: break; case -NFS4ERR_BAD_SEQID: if (seqid->sequence->flags & NFS_SEQID_CONFIRMED) return; - printk(KERN_WARNING "NFS: v4 server returned a bad" + pr_warn_ratelimited("NFS: v4 server returned a bad" " sequence-id error on an" " unconfirmed sequence %p!\n", seqid->sequence); @@ -1040,10 +1033,11 @@ void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid) int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task) { - struct rpc_sequence *sequence = seqid->sequence->sequence; + struct nfs_seqid_counter *sequence = seqid->sequence; int status = 0; spin_lock(&sequence->lock); + seqid->task = task; if (list_empty(&seqid->list)) list_add_tail(&seqid->list, &sequence->list); if (list_first_entry(&sequence->list, struct nfs_seqid, list) == seqid) @@ -1072,19 +1066,28 @@ static void nfs4_clear_state_manager_bit(struct nfs_client *clp) void nfs4_schedule_state_manager(struct nfs_client *clp) { struct task_struct *task; + char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1]; if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) return; __module_get(THIS_MODULE); atomic_inc(&clp->cl_count); - task = kthread_run(nfs4_run_state_manager, clp, "%s-manager", - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_ADDR)); - if (!IS_ERR(task)) - return; - nfs4_clear_state_manager_bit(clp); - nfs_put_client(clp); - module_put(THIS_MODULE); + + /* The rcu_read_lock() is not strictly necessary, as the state + * manager is the only thread that ever changes the rpc_xprt + * after it's initialized. At this point, we're single threaded. */ + rcu_read_lock(); + snprintf(buf, sizeof(buf), "%s-manager", + rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); + rcu_read_unlock(); + task = kthread_run(nfs4_run_state_manager, clp, buf); + if (IS_ERR(task)) { + printk(KERN_ERR "%s: kthread_run: %ld\n", + __func__, PTR_ERR(task)); + nfs4_clear_state_manager_bit(clp); + nfs_put_client(clp); + module_put(THIS_MODULE); + } } /* @@ -1098,10 +1101,25 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp) set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); nfs4_schedule_state_manager(clp); } +EXPORT_SYMBOL_GPL(nfs4_schedule_lease_recovery); + +/* + * nfs40_handle_cb_pathdown - return all delegations after NFS4ERR_CB_PATH_DOWN + * @clp: client to process + * + * Set the NFS4CLNT_LEASE_EXPIRED state in order to force a + * resend of the SETCLIENTID and hence re-establish the + * callback channel. Then return all existing delegations. + */ +static void nfs40_handle_cb_pathdown(struct nfs_client *clp) +{ + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + nfs_expire_all_delegations(clp); +} void nfs4_schedule_path_down_recovery(struct nfs_client *clp) { - nfs_handle_cb_pathdown(clp); + nfs40_handle_cb_pathdown(clp); nfs4_schedule_state_manager(clp); } @@ -1132,11 +1150,37 @@ void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4 { struct nfs_client *clp = server->nfs_client; - if (test_and_clear_bit(NFS_DELEGATED_STATE, &state->flags)) - nfs_async_inode_return_delegation(state->inode, &state->stateid); nfs4_state_mark_reclaim_nograce(clp, state); nfs4_schedule_state_manager(clp); } +EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery); + +void nfs_inode_find_state_and_recover(struct inode *inode, + const nfs4_stateid *stateid) +{ + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_open_context *ctx; + struct nfs4_state *state; + bool found = false; + + spin_lock(&inode->i_lock); + list_for_each_entry(ctx, &nfsi->open_files, list) { + state = ctx->state; + if (state == NULL) + continue; + if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) + continue; + if (!nfs4_stateid_match(&state->stateid, stateid)) + continue; + nfs4_state_mark_reclaim_nograce(clp, state); + found = true; + } + spin_unlock(&inode->i_lock); + if (found) + nfs4_schedule_state_manager(clp); +} + static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops) { @@ -1175,8 +1219,8 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: goto out; default: - printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", - __func__, status); + printk(KERN_ERR "NFS: %s: unhandled error %d. " + "Zeroing state\n", __func__, status); case -ENOMEM: case -NFS4ERR_DENIED: case -NFS4ERR_RECLAIM_BAD: @@ -1222,8 +1266,9 @@ restart: spin_lock(&state->state_lock); list_for_each_entry(lock, &state->lock_states, ls_locks) { if (!(lock->ls_flags & NFS_LOCK_INITIALIZED)) - printk("%s: Lock reclaim failed!\n", - __func__); + pr_warn_ratelimited("NFS: " + "%s: Lock reclaim " + "failed!\n", __func__); } spin_unlock(&state->state_lock); nfs4_put_open_state(state); @@ -1232,8 +1277,8 @@ restart: } switch (status) { default: - printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", - __func__, status); + printk(KERN_ERR "NFS: %s: unhandled error %d. " + "Zeroing state\n", __func__, status); case -ENOENT: case -ENOMEM: case -ESTALE: @@ -1241,8 +1286,8 @@ restart: * Open state on this file cannot be recovered * All we can do is revert to using the zero stateid. */ - memset(state->stateid.data, 0, - sizeof(state->stateid.data)); + memset(&state->stateid, 0, + sizeof(state->stateid)); /* Mark the file as being 'closed' */ state->state = 0; break; @@ -1420,7 +1465,7 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) case 0: break; case -NFS4ERR_CB_PATH_DOWN: - nfs_handle_cb_pathdown(clp); + nfs40_handle_cb_pathdown(clp); break; case -NFS4ERR_NO_GRACE: nfs4_state_end_reclaim_reboot(clp); @@ -1801,7 +1846,7 @@ static void nfs4_state_manager(struct nfs_client *clp) } while (atomic_read(&clp->cl_count) > 1); return; out_error: - printk(KERN_WARNING "Error: state manager failed on NFSv4 server %s" + pr_warn_ratelimited("NFS: state manager failed on NFSv4 server %s" " with error %d\n", clp->cl_hostname, -status); nfs4_end_drain_session(clp); nfs4_clear_state_manager_bit(clp); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 33bd8d0..c74fdb1 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -44,6 +44,8 @@ #include <linux/pagemap.h> #include <linux/proc_fs.h> #include <linux/kdev_t.h> +#include <linux/module.h> +#include <linux/utsname.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/msg_prot.h> #include <linux/sunrpc/gss_api.h> @@ -271,7 +273,12 @@ static int nfs4_stat_to_errno(int); 1 /* flags */ + \ 1 /* spa_how */ + \ 0 /* SP4_NONE (for now) */ + \ - 1 /* zero implemetation id array */) + 1 /* implementation id array of size 1 */ + \ + 1 /* nii_domain */ + \ + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ + 1 /* nii_name */ + \ + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ + 3 /* nii_date */) #define decode_exchange_id_maxsz (op_decode_hdr_maxsz + \ 2 /* eir_clientid */ + \ 1 /* eir_sequenceid */ + \ @@ -284,7 +291,11 @@ static int nfs4_stat_to_errno(int); /* eir_server_scope<> */ \ XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 + \ 1 /* eir_server_impl_id array length */ + \ - 0 /* ignored eir_server_impl_id contents */) + 1 /* nii_domain */ + \ + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ + 1 /* nii_name */ + \ + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + \ + 3 /* nii_date */) #define encode_channel_attrs_maxsz (6 + 1 /* ca_rdma_ird.len (0) */) #define decode_channel_attrs_maxsz (6 + \ 1 /* ca_rdma_ird.len */ + \ @@ -838,6 +849,12 @@ const u32 nfs41_maxread_overhead = ((RPC_MAX_HEADER_WITH_AUTH + XDR_UNIT); #endif /* CONFIG_NFS_V4_1 */ +static unsigned short send_implementation_id = 1; + +module_param(send_implementation_id, ushort, 0644); +MODULE_PARM_DESC(send_implementation_id, + "Send implementation ID with NFSv4.1 exchange_id"); + static const umode_t nfs_type2fmt[] = { [NF4BAD] = 0, [NF4REG] = S_IFREG, @@ -868,15 +885,44 @@ static __be32 *reserve_space(struct xdr_stream *xdr, size_t nbytes) return p; } +static void encode_opaque_fixed(struct xdr_stream *xdr, const void *buf, size_t len) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, len); + xdr_encode_opaque_fixed(p, buf, len); +} + static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) { __be32 *p; - p = xdr_reserve_space(xdr, 4 + len); - BUG_ON(p == NULL); + p = reserve_space(xdr, 4 + len); xdr_encode_opaque(p, str, len); } +static void encode_uint32(struct xdr_stream *xdr, u32 n) +{ + __be32 *p; + + p = reserve_space(xdr, 4); + *p = cpu_to_be32(n); +} + +static void encode_uint64(struct xdr_stream *xdr, u64 n) +{ + __be32 *p; + + p = reserve_space(xdr, 8); + xdr_encode_hyper(p, n); +} + +static void encode_nfs4_seqid(struct xdr_stream *xdr, + const struct nfs_seqid *seqid) +{ + encode_uint32(xdr, seqid->sequence->counter); +} + static void encode_compound_hdr(struct xdr_stream *xdr, struct rpc_rqst *req, struct compound_hdr *hdr) @@ -889,28 +935,37 @@ static void encode_compound_hdr(struct xdr_stream *xdr, * but this is not required as a MUST for the server to do so. */ hdr->replen = RPC_REPHDRSIZE + auth->au_rslack + 3 + hdr->taglen; - dprintk("encode_compound: tag=%.*s\n", (int)hdr->taglen, hdr->tag); BUG_ON(hdr->taglen > NFS4_MAXTAGLEN); - p = reserve_space(xdr, 4 + hdr->taglen + 8); - p = xdr_encode_opaque(p, hdr->tag, hdr->taglen); + encode_string(xdr, hdr->taglen, hdr->tag); + p = reserve_space(xdr, 8); *p++ = cpu_to_be32(hdr->minorversion); hdr->nops_p = p; *p = cpu_to_be32(hdr->nops); } +static void encode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 op, + uint32_t replen, + struct compound_hdr *hdr) +{ + encode_uint32(xdr, op); + hdr->nops++; + hdr->replen += replen; +} + static void encode_nops(struct compound_hdr *hdr) { BUG_ON(hdr->nops > NFS4_MAX_OPS); *hdr->nops_p = htonl(hdr->nops); } -static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf) +static void encode_nfs4_stateid(struct xdr_stream *xdr, const nfs4_stateid *stateid) { - __be32 *p; + encode_opaque_fixed(xdr, stateid, NFS4_STATEID_SIZE); +} - p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); - BUG_ON(p == NULL); - xdr_encode_opaque_fixed(p, verf->data, NFS4_VERIFIER_SIZE); +static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf) +{ + encode_opaque_fixed(xdr, verf->data, NFS4_VERIFIER_SIZE); } static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs_server *server) @@ -1023,7 +1078,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const * Now we backfill the bitmap and the attribute buffer length. */ if (len != ((char *)p - (char *)q) + 4) { - printk(KERN_ERR "nfs: Attr length error, %u != %Zu\n", + printk(KERN_ERR "NFS: Attr length error, %u != %Zu\n", len, ((char *)p - (char *)q) + 4); BUG(); } @@ -1037,46 +1092,33 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const static void encode_access(struct xdr_stream *xdr, u32 access, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 8); - *p++ = cpu_to_be32(OP_ACCESS); - *p = cpu_to_be32(access); - hdr->nops++; - hdr->replen += decode_access_maxsz; + encode_op_hdr(xdr, OP_ACCESS, decode_access_maxsz, hdr); + encode_uint32(xdr, access); } static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 8+NFS4_STATEID_SIZE); - *p++ = cpu_to_be32(OP_CLOSE); - *p++ = cpu_to_be32(arg->seqid->sequence->counter); - xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE); - hdr->nops++; - hdr->replen += decode_close_maxsz; + encode_op_hdr(xdr, OP_CLOSE, decode_close_maxsz, hdr); + encode_nfs4_seqid(xdr, arg->seqid); + encode_nfs4_stateid(xdr, arg->stateid); } static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) { __be32 *p; - p = reserve_space(xdr, 16); - *p++ = cpu_to_be32(OP_COMMIT); + encode_op_hdr(xdr, OP_COMMIT, decode_commit_maxsz, hdr); + p = reserve_space(xdr, 12); p = xdr_encode_hyper(p, args->offset); *p = cpu_to_be32(args->count); - hdr->nops++; - hdr->replen += decode_commit_maxsz; } static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *create, struct compound_hdr *hdr) { __be32 *p; - p = reserve_space(xdr, 8); - *p++ = cpu_to_be32(OP_CREATE); - *p = cpu_to_be32(create->ftype); + encode_op_hdr(xdr, OP_CREATE, decode_create_maxsz, hdr); + encode_uint32(xdr, create->ftype); switch (create->ftype) { case NF4LNK: @@ -1096,9 +1138,6 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg * } encode_string(xdr, create->name->len, create->name->name); - hdr->nops++; - hdr->replen += decode_create_maxsz; - encode_attrs(xdr, create->attrs, create->server); } @@ -1106,25 +1145,21 @@ static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct c { __be32 *p; - p = reserve_space(xdr, 12); - *p++ = cpu_to_be32(OP_GETATTR); + encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr); + p = reserve_space(xdr, 8); *p++ = cpu_to_be32(1); *p = cpu_to_be32(bitmap); - hdr->nops++; - hdr->replen += decode_getattr_maxsz; } static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm1, struct compound_hdr *hdr) { __be32 *p; - p = reserve_space(xdr, 16); - *p++ = cpu_to_be32(OP_GETATTR); + encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr); + p = reserve_space(xdr, 12); *p++ = cpu_to_be32(2); *p++ = cpu_to_be32(bm0); *p = cpu_to_be32(bm1); - hdr->nops++; - hdr->replen += decode_getattr_maxsz; } static void @@ -1134,8 +1169,7 @@ encode_getattr_three(struct xdr_stream *xdr, { __be32 *p; - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_GETATTR); + encode_op_hdr(xdr, OP_GETATTR, decode_getattr_maxsz, hdr); if (bm2) { p = reserve_space(xdr, 16); *p++ = cpu_to_be32(3); @@ -1152,8 +1186,6 @@ encode_getattr_three(struct xdr_stream *xdr, *p++ = cpu_to_be32(1); *p = cpu_to_be32(bm0); } - hdr->nops++; - hdr->replen += decode_getattr_maxsz; } static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct compound_hdr *hdr) @@ -1179,23 +1211,13 @@ static void encode_fs_locations(struct xdr_stream *xdr, const u32* bitmask, stru static void encode_getfh(struct xdr_stream *xdr, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_GETFH); - hdr->nops++; - hdr->replen += decode_getfh_maxsz; + encode_op_hdr(xdr, OP_GETFH, decode_getfh_maxsz, hdr); } static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 8 + name->len); - *p++ = cpu_to_be32(OP_LINK); - xdr_encode_opaque(p, name->name, name->len); - hdr->nops++; - hdr->replen += decode_link_maxsz; + encode_op_hdr(xdr, OP_LINK, decode_link_maxsz, hdr); + encode_string(xdr, name->len, name->name); } static inline int nfs4_lock_type(struct file_lock *fl, int block) @@ -1232,79 +1254,60 @@ static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args { __be32 *p; - p = reserve_space(xdr, 32); - *p++ = cpu_to_be32(OP_LOCK); + encode_op_hdr(xdr, OP_LOCK, decode_lock_maxsz, hdr); + p = reserve_space(xdr, 28); *p++ = cpu_to_be32(nfs4_lock_type(args->fl, args->block)); *p++ = cpu_to_be32(args->reclaim); p = xdr_encode_hyper(p, args->fl->fl_start); p = xdr_encode_hyper(p, nfs4_lock_length(args->fl)); *p = cpu_to_be32(args->new_lock_owner); if (args->new_lock_owner){ - p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4); - *p++ = cpu_to_be32(args->open_seqid->sequence->counter); - p = xdr_encode_opaque_fixed(p, args->open_stateid->data, NFS4_STATEID_SIZE); - *p++ = cpu_to_be32(args->lock_seqid->sequence->counter); + encode_nfs4_seqid(xdr, args->open_seqid); + encode_nfs4_stateid(xdr, args->open_stateid); + encode_nfs4_seqid(xdr, args->lock_seqid); encode_lockowner(xdr, &args->lock_owner); } else { - p = reserve_space(xdr, NFS4_STATEID_SIZE+4); - p = xdr_encode_opaque_fixed(p, args->lock_stateid->data, NFS4_STATEID_SIZE); - *p = cpu_to_be32(args->lock_seqid->sequence->counter); + encode_nfs4_stateid(xdr, args->lock_stateid); + encode_nfs4_seqid(xdr, args->lock_seqid); } - hdr->nops++; - hdr->replen += decode_lock_maxsz; } static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *args, struct compound_hdr *hdr) { __be32 *p; - p = reserve_space(xdr, 24); - *p++ = cpu_to_be32(OP_LOCKT); + encode_op_hdr(xdr, OP_LOCKT, decode_lockt_maxsz, hdr); + p = reserve_space(xdr, 20); *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0)); p = xdr_encode_hyper(p, args->fl->fl_start); p = xdr_encode_hyper(p, nfs4_lock_length(args->fl)); encode_lockowner(xdr, &args->lock_owner); - hdr->nops++; - hdr->replen += decode_lockt_maxsz; } static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *args, struct compound_hdr *hdr) { __be32 *p; - p = reserve_space(xdr, 12+NFS4_STATEID_SIZE+16); - *p++ = cpu_to_be32(OP_LOCKU); - *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0)); - *p++ = cpu_to_be32(args->seqid->sequence->counter); - p = xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE); + encode_op_hdr(xdr, OP_LOCKU, decode_locku_maxsz, hdr); + encode_uint32(xdr, nfs4_lock_type(args->fl, 0)); + encode_nfs4_seqid(xdr, args->seqid); + encode_nfs4_stateid(xdr, args->stateid); + p = reserve_space(xdr, 16); p = xdr_encode_hyper(p, args->fl->fl_start); xdr_encode_hyper(p, nfs4_lock_length(args->fl)); - hdr->nops++; - hdr->replen += decode_locku_maxsz; } static void encode_release_lockowner(struct xdr_stream *xdr, const struct nfs_lowner *lowner, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_RELEASE_LOCKOWNER); + encode_op_hdr(xdr, OP_RELEASE_LOCKOWNER, decode_release_lockowner_maxsz, hdr); encode_lockowner(xdr, lowner); - hdr->nops++; - hdr->replen += decode_release_lockowner_maxsz; } static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) { - int len = name->len; - __be32 *p; - - p = reserve_space(xdr, 8 + len); - *p++ = cpu_to_be32(OP_LOOKUP); - xdr_encode_opaque(p, name->name, len); - hdr->nops++; - hdr->replen += decode_lookup_maxsz; + encode_op_hdr(xdr, OP_LOOKUP, decode_lookup_maxsz, hdr); + encode_string(xdr, name->len, name->name); } static void encode_share_access(struct xdr_stream *xdr, fmode_t fmode) @@ -1335,9 +1338,7 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4, * owner 4 = 32 */ - p = reserve_space(xdr, 8); - *p++ = cpu_to_be32(OP_OPEN); - *p = cpu_to_be32(arg->seqid->sequence->counter); + encode_nfs4_seqid(xdr, arg->seqid); encode_share_access(xdr, arg->fmode); p = reserve_space(xdr, 32); p = xdr_encode_hyper(p, arg->clientid); @@ -1437,14 +1438,15 @@ static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struc { __be32 *p; - p = reserve_space(xdr, 4+NFS4_STATEID_SIZE); - *p++ = cpu_to_be32(NFS4_OPEN_CLAIM_DELEGATE_CUR); - xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE); + p = reserve_space(xdr, 4); + *p = cpu_to_be32(NFS4_OPEN_CLAIM_DELEGATE_CUR); + encode_nfs4_stateid(xdr, stateid); encode_string(xdr, name->len, name->name); } static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, struct compound_hdr *hdr) { + encode_op_hdr(xdr, OP_OPEN, decode_open_maxsz, hdr); encode_openhdr(xdr, arg); encode_opentype(xdr, arg); switch (arg->claim) { @@ -1460,88 +1462,64 @@ static void encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg, default: BUG(); } - hdr->nops++; - hdr->replen += decode_open_maxsz; } static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4); - *p++ = cpu_to_be32(OP_OPEN_CONFIRM); - p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE); - *p = cpu_to_be32(arg->seqid->sequence->counter); - hdr->nops++; - hdr->replen += decode_open_confirm_maxsz; + encode_op_hdr(xdr, OP_OPEN_CONFIRM, decode_open_confirm_maxsz, hdr); + encode_nfs4_stateid(xdr, arg->stateid); + encode_nfs4_seqid(xdr, arg->seqid); } static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4); - *p++ = cpu_to_be32(OP_OPEN_DOWNGRADE); - p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE); - *p = cpu_to_be32(arg->seqid->sequence->counter); + encode_op_hdr(xdr, OP_OPEN_DOWNGRADE, decode_open_downgrade_maxsz, hdr); + encode_nfs4_stateid(xdr, arg->stateid); + encode_nfs4_seqid(xdr, arg->seqid); encode_share_access(xdr, arg->fmode); - hdr->nops++; - hdr->replen += decode_open_downgrade_maxsz; } static void encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh, struct compound_hdr *hdr) { - int len = fh->size; - __be32 *p; - - p = reserve_space(xdr, 8 + len); - *p++ = cpu_to_be32(OP_PUTFH); - xdr_encode_opaque(p, fh->data, len); - hdr->nops++; - hdr->replen += decode_putfh_maxsz; + encode_op_hdr(xdr, OP_PUTFH, decode_putfh_maxsz, hdr); + encode_string(xdr, fh->size, fh->data); } static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_PUTROOTFH); - hdr->nops++; - hdr->replen += decode_putrootfh_maxsz; + encode_op_hdr(xdr, OP_PUTROOTFH, decode_putrootfh_maxsz, hdr); } -static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx, int zero_seqid) +static void encode_open_stateid(struct xdr_stream *xdr, + const struct nfs_open_context *ctx, + const struct nfs_lock_context *l_ctx, + fmode_t fmode, + int zero_seqid) { nfs4_stateid stateid; - __be32 *p; - p = reserve_space(xdr, NFS4_STATEID_SIZE); if (ctx->state != NULL) { - nfs4_copy_stateid(&stateid, ctx->state, l_ctx->lockowner, l_ctx->pid); + nfs4_select_rw_stateid(&stateid, ctx->state, + fmode, l_ctx->lockowner, l_ctx->pid); if (zero_seqid) - stateid.stateid.seqid = 0; - xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE); + stateid.seqid = 0; + encode_nfs4_stateid(xdr, &stateid); } else - xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE); + encode_nfs4_stateid(xdr, &zero_stateid); } static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr) { __be32 *p; - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_READ); - - encode_stateid(xdr, args->context, args->lock_context, - hdr->minorversion); + encode_op_hdr(xdr, OP_READ, decode_read_maxsz, hdr); + encode_open_stateid(xdr, args->context, args->lock_context, + FMODE_READ, hdr->minorversion); p = reserve_space(xdr, 12); p = xdr_encode_hyper(p, args->offset); *p = cpu_to_be32(args->count); - hdr->nops++; - hdr->replen += decode_read_maxsz; } static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) @@ -1551,7 +1529,7 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg FATTR4_WORD1_MOUNTED_ON_FILEID, }; uint32_t dircount = readdir->count >> 1; - __be32 *p; + __be32 *p, verf[2]; if (readdir->plus) { attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE| @@ -1566,80 +1544,54 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg if (!(readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)) attrs[0] |= FATTR4_WORD0_FILEID; - p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20); - *p++ = cpu_to_be32(OP_READDIR); - p = xdr_encode_hyper(p, readdir->cookie); - p = xdr_encode_opaque_fixed(p, readdir->verifier.data, NFS4_VERIFIER_SIZE); + encode_op_hdr(xdr, OP_READDIR, decode_readdir_maxsz, hdr); + encode_uint64(xdr, readdir->cookie); + encode_nfs4_verifier(xdr, &readdir->verifier); + p = reserve_space(xdr, 20); *p++ = cpu_to_be32(dircount); *p++ = cpu_to_be32(readdir->count); *p++ = cpu_to_be32(2); *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]); *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]); - hdr->nops++; - hdr->replen += decode_readdir_maxsz; + memcpy(verf, readdir->verifier.data, sizeof(verf)); dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n", __func__, (unsigned long long)readdir->cookie, - ((u32 *)readdir->verifier.data)[0], - ((u32 *)readdir->verifier.data)[1], + verf[0], verf[1], attrs[0] & readdir->bitmask[0], attrs[1] & readdir->bitmask[1]); } static void encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *readlink, struct rpc_rqst *req, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_READLINK); - hdr->nops++; - hdr->replen += decode_readlink_maxsz; + encode_op_hdr(xdr, OP_READLINK, decode_readlink_maxsz, hdr); } static void encode_remove(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 8 + name->len); - *p++ = cpu_to_be32(OP_REMOVE); - xdr_encode_opaque(p, name->name, name->len); - hdr->nops++; - hdr->replen += decode_remove_maxsz; + encode_op_hdr(xdr, OP_REMOVE, decode_remove_maxsz, hdr); + encode_string(xdr, name->len, name->name); } static void encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, const struct qstr *newname, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_RENAME); + encode_op_hdr(xdr, OP_RENAME, decode_rename_maxsz, hdr); encode_string(xdr, oldname->len, oldname->name); encode_string(xdr, newname->len, newname->name); - hdr->nops++; - hdr->replen += decode_rename_maxsz; } -static void encode_renew(struct xdr_stream *xdr, const struct nfs_client *client_stateid, struct compound_hdr *hdr) +static void encode_renew(struct xdr_stream *xdr, clientid4 clid, + struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 12); - *p++ = cpu_to_be32(OP_RENEW); - xdr_encode_hyper(p, client_stateid->cl_clientid); - hdr->nops++; - hdr->replen += decode_renew_maxsz; + encode_op_hdr(xdr, OP_RENEW, decode_renew_maxsz, hdr); + encode_uint64(xdr, clid); } static void encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_RESTOREFH); - hdr->nops++; - hdr->replen += decode_restorefh_maxsz; + encode_op_hdr(xdr, OP_RESTOREFH, decode_restorefh_maxsz, hdr); } static void @@ -1647,9 +1599,8 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun { __be32 *p; - p = reserve_space(xdr, 4+NFS4_STATEID_SIZE); - *p++ = cpu_to_be32(OP_SETATTR); - xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE); + encode_op_hdr(xdr, OP_SETATTR, decode_setacl_maxsz, hdr); + encode_nfs4_stateid(xdr, &zero_stateid); p = reserve_space(xdr, 2*4); *p++ = cpu_to_be32(1); *p = cpu_to_be32(FATTR4_WORD0_ACL); @@ -1657,30 +1608,18 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun p = reserve_space(xdr, 4); *p = cpu_to_be32(arg->acl_len); xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); - hdr->nops++; - hdr->replen += decode_setacl_maxsz; } static void encode_savefh(struct xdr_stream *xdr, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_SAVEFH); - hdr->nops++; - hdr->replen += decode_savefh_maxsz; + encode_op_hdr(xdr, OP_SAVEFH, decode_savefh_maxsz, hdr); } static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs *arg, const struct nfs_server *server, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 4+NFS4_STATEID_SIZE); - *p++ = cpu_to_be32(OP_SETATTR); - xdr_encode_opaque_fixed(p, arg->stateid.data, NFS4_STATEID_SIZE); - hdr->nops++; - hdr->replen += decode_setattr_maxsz; + encode_op_hdr(xdr, OP_SETATTR, decode_setattr_maxsz, hdr); + encode_nfs4_stateid(xdr, &arg->stateid); encode_attrs(xdr, arg->iap, server); } @@ -1688,9 +1627,8 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie { __be32 *p; - p = reserve_space(xdr, 4 + NFS4_VERIFIER_SIZE); - *p++ = cpu_to_be32(OP_SETCLIENTID); - xdr_encode_opaque_fixed(p, setclientid->sc_verifier->data, NFS4_VERIFIER_SIZE); + encode_op_hdr(xdr, OP_SETCLIENTID, decode_setclientid_maxsz, hdr); + encode_nfs4_verifier(xdr, setclientid->sc_verifier); encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name); p = reserve_space(xdr, 4); @@ -1699,31 +1637,23 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr); p = reserve_space(xdr, 4); *p = cpu_to_be32(setclientid->sc_cb_ident); - hdr->nops++; - hdr->replen += decode_setclientid_maxsz; } static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_setclientid_res *arg, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 12 + NFS4_VERIFIER_SIZE); - *p++ = cpu_to_be32(OP_SETCLIENTID_CONFIRM); - p = xdr_encode_hyper(p, arg->clientid); - xdr_encode_opaque_fixed(p, arg->confirm.data, NFS4_VERIFIER_SIZE); - hdr->nops++; - hdr->replen += decode_setclientid_confirm_maxsz; + encode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM, + decode_setclientid_confirm_maxsz, hdr); + encode_uint64(xdr, arg->clientid); + encode_nfs4_verifier(xdr, &arg->confirm); } static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) { __be32 *p; - p = reserve_space(xdr, 4); - *p = cpu_to_be32(OP_WRITE); - - encode_stateid(xdr, args->context, args->lock_context, - hdr->minorversion); + encode_op_hdr(xdr, OP_WRITE, decode_write_maxsz, hdr); + encode_open_stateid(xdr, args->context, args->lock_context, + FMODE_WRITE, hdr->minorversion); p = reserve_space(xdr, 16); p = xdr_encode_hyper(p, args->offset); @@ -1731,32 +1661,18 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg *p = cpu_to_be32(args->count); xdr_write_pages(xdr, args->pages, args->pgbase, args->count); - hdr->nops++; - hdr->replen += decode_write_maxsz; } static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *stateid, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 4+NFS4_STATEID_SIZE); - - *p++ = cpu_to_be32(OP_DELEGRETURN); - xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE); - hdr->nops++; - hdr->replen += decode_delegreturn_maxsz; + encode_op_hdr(xdr, OP_DELEGRETURN, decode_delegreturn_maxsz, hdr); + encode_nfs4_stateid(xdr, stateid); } static void encode_secinfo(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) { - int len = name->len; - __be32 *p; - - p = reserve_space(xdr, 8 + len); - *p++ = cpu_to_be32(OP_SECINFO); - xdr_encode_opaque(p, name->name, len); - hdr->nops++; - hdr->replen += decode_secinfo_maxsz; + encode_op_hdr(xdr, OP_SECINFO, decode_secinfo_maxsz, hdr); + encode_string(xdr, name->len, name->name); } #if defined(CONFIG_NFS_V4_1) @@ -1766,19 +1682,39 @@ static void encode_exchange_id(struct xdr_stream *xdr, struct compound_hdr *hdr) { __be32 *p; + char impl_name[NFS4_OPAQUE_LIMIT]; + int len = 0; - p = reserve_space(xdr, 4 + sizeof(args->verifier->data)); - *p++ = cpu_to_be32(OP_EXCHANGE_ID); - xdr_encode_opaque_fixed(p, args->verifier->data, sizeof(args->verifier->data)); + encode_op_hdr(xdr, OP_EXCHANGE_ID, decode_exchange_id_maxsz, hdr); + encode_nfs4_verifier(xdr, args->verifier); encode_string(xdr, args->id_len, args->id); p = reserve_space(xdr, 12); *p++ = cpu_to_be32(args->flags); *p++ = cpu_to_be32(0); /* zero length state_protect4_a */ - *p = cpu_to_be32(0); /* zero length implementation id array */ - hdr->nops++; - hdr->replen += decode_exchange_id_maxsz; + + if (send_implementation_id && + sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN) > 1 && + sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN) + <= NFS4_OPAQUE_LIMIT + 1) + len = snprintf(impl_name, sizeof(impl_name), "%s %s %s %s", + utsname()->sysname, utsname()->release, + utsname()->version, utsname()->machine); + + if (len > 0) { + *p = cpu_to_be32(1); /* implementation id array length=1 */ + + encode_string(xdr, + sizeof(CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN) - 1, + CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN); + encode_string(xdr, len, impl_name); + /* just send zeros for nii_date - the date is in nii_name */ + p = reserve_space(xdr, 12); + p = xdr_encode_hyper(p, 0); + *p = cpu_to_be32(0); + } else + *p = cpu_to_be32(0); /* implementation id array length=0 */ } static void encode_create_session(struct xdr_stream *xdr, @@ -1801,8 +1737,8 @@ static void encode_create_session(struct xdr_stream *xdr, len = scnprintf(machine_name, sizeof(machine_name), "%s", clp->cl_ipaddr); - p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12); - *p++ = cpu_to_be32(OP_CREATE_SESSION); + encode_op_hdr(xdr, OP_CREATE_SESSION, decode_create_session_maxsz, hdr); + p = reserve_space(xdr, 16 + 2*28 + 20 + len + 12); p = xdr_encode_hyper(p, clp->cl_clientid); *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */ *p++ = cpu_to_be32(args->flags); /*flags */ @@ -1835,33 +1771,22 @@ static void encode_create_session(struct xdr_stream *xdr, *p++ = cpu_to_be32(0); /* UID */ *p++ = cpu_to_be32(0); /* GID */ *p = cpu_to_be32(0); /* No more gids */ - hdr->nops++; - hdr->replen += decode_create_session_maxsz; } static void encode_destroy_session(struct xdr_stream *xdr, struct nfs4_session *session, struct compound_hdr *hdr) { - __be32 *p; - p = reserve_space(xdr, 4 + NFS4_MAX_SESSIONID_LEN); - *p++ = cpu_to_be32(OP_DESTROY_SESSION); - xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN); - hdr->nops++; - hdr->replen += decode_destroy_session_maxsz; + encode_op_hdr(xdr, OP_DESTROY_SESSION, decode_destroy_session_maxsz, hdr); + encode_opaque_fixed(xdr, session->sess_id.data, NFS4_MAX_SESSIONID_LEN); } static void encode_reclaim_complete(struct xdr_stream *xdr, struct nfs41_reclaim_complete_args *args, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 8); - *p++ = cpu_to_be32(OP_RECLAIM_COMPLETE); - *p++ = cpu_to_be32(args->one_fs); - hdr->nops++; - hdr->replen += decode_reclaim_complete_maxsz; + encode_op_hdr(xdr, OP_RECLAIM_COMPLETE, decode_reclaim_complete_maxsz, hdr); + encode_uint32(xdr, args->one_fs); } #endif /* CONFIG_NFS_V4_1 */ @@ -1883,8 +1808,7 @@ static void encode_sequence(struct xdr_stream *xdr, WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE); slot = tp->slots + args->sa_slotid; - p = reserve_space(xdr, 4 + NFS4_MAX_SESSIONID_LEN + 16); - *p++ = cpu_to_be32(OP_SEQUENCE); + encode_op_hdr(xdr, OP_SEQUENCE, decode_sequence_maxsz, hdr); /* * Sessionid + seqid + slotid + max slotid + cache_this @@ -1898,13 +1822,12 @@ static void encode_sequence(struct xdr_stream *xdr, ((u32 *)session->sess_id.data)[3], slot->seq_nr, args->sa_slotid, tp->highest_used_slotid, args->sa_cache_this); + p = reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 16); p = xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN); *p++ = cpu_to_be32(slot->seq_nr); *p++ = cpu_to_be32(args->sa_slotid); *p++ = cpu_to_be32(tp->highest_used_slotid); *p = cpu_to_be32(args->sa_cache_this); - hdr->nops++; - hdr->replen += decode_sequence_maxsz; #endif /* CONFIG_NFS_V4_1 */ } @@ -1919,14 +1842,12 @@ encode_getdevicelist(struct xdr_stream *xdr, .data = "dummmmmy", }; - p = reserve_space(xdr, 20); - *p++ = cpu_to_be32(OP_GETDEVICELIST); + encode_op_hdr(xdr, OP_GETDEVICELIST, decode_getdevicelist_maxsz, hdr); + p = reserve_space(xdr, 16); *p++ = cpu_to_be32(args->layoutclass); *p++ = cpu_to_be32(NFS4_PNFS_GETDEVLIST_MAXNUM); xdr_encode_hyper(p, 0ULL); /* cookie */ encode_nfs4_verifier(xdr, &dummy); - hdr->nops++; - hdr->replen += decode_getdevicelist_maxsz; } static void @@ -1936,15 +1857,13 @@ encode_getdeviceinfo(struct xdr_stream *xdr, { __be32 *p; - p = reserve_space(xdr, 16 + NFS4_DEVICEID4_SIZE); - *p++ = cpu_to_be32(OP_GETDEVICEINFO); + encode_op_hdr(xdr, OP_GETDEVICEINFO, decode_getdeviceinfo_maxsz, hdr); + p = reserve_space(xdr, 12 + NFS4_DEVICEID4_SIZE); p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data, NFS4_DEVICEID4_SIZE); *p++ = cpu_to_be32(args->pdev->layout_type); *p++ = cpu_to_be32(args->pdev->pglen); /* gdia_maxcount */ *p++ = cpu_to_be32(0); /* bitmap length 0 */ - hdr->nops++; - hdr->replen += decode_getdeviceinfo_maxsz; } static void @@ -1954,16 +1873,16 @@ encode_layoutget(struct xdr_stream *xdr, { __be32 *p; - p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE); - *p++ = cpu_to_be32(OP_LAYOUTGET); + encode_op_hdr(xdr, OP_LAYOUTGET, decode_layoutget_maxsz, hdr); + p = reserve_space(xdr, 36); *p++ = cpu_to_be32(0); /* Signal layout available */ *p++ = cpu_to_be32(args->type); *p++ = cpu_to_be32(args->range.iomode); p = xdr_encode_hyper(p, args->range.offset); p = xdr_encode_hyper(p, args->range.length); p = xdr_encode_hyper(p, args->minlength); - p = xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE); - *p = cpu_to_be32(args->maxcount); + encode_nfs4_stateid(xdr, &args->stateid); + encode_uint32(xdr, args->maxcount); dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n", __func__, @@ -1972,8 +1891,6 @@ encode_layoutget(struct xdr_stream *xdr, (unsigned long)args->range.offset, (unsigned long)args->range.length, args->maxcount); - hdr->nops++; - hdr->replen += decode_layoutget_maxsz; } static int @@ -1987,13 +1904,14 @@ encode_layoutcommit(struct xdr_stream *xdr, dprintk("%s: lbw: %llu type: %d\n", __func__, args->lastbytewritten, NFS_SERVER(args->inode)->pnfs_curr_ld->id); - p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE); - *p++ = cpu_to_be32(OP_LAYOUTCOMMIT); + encode_op_hdr(xdr, OP_LAYOUTCOMMIT, decode_layoutcommit_maxsz, hdr); + p = reserve_space(xdr, 20); /* Only whole file layouts */ p = xdr_encode_hyper(p, 0); /* offset */ p = xdr_encode_hyper(p, args->lastbytewritten + 1); /* length */ - *p++ = cpu_to_be32(0); /* reclaim */ - p = xdr_encode_opaque_fixed(p, args->stateid.data, NFS4_STATEID_SIZE); + *p = cpu_to_be32(0); /* reclaim */ + encode_nfs4_stateid(xdr, &args->stateid); + p = reserve_space(xdr, 20); *p++ = cpu_to_be32(1); /* newoffset = TRUE */ p = xdr_encode_hyper(p, args->lastbytewritten); *p++ = cpu_to_be32(0); /* Never send time_modify_changed */ @@ -2002,13 +1920,9 @@ encode_layoutcommit(struct xdr_stream *xdr, if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit) NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit( NFS_I(inode)->layout, xdr, args); - else { - p = reserve_space(xdr, 4); - *p = cpu_to_be32(0); /* no layout-type payload */ - } + else + encode_uint32(xdr, 0); /* no layout-type payload */ - hdr->nops++; - hdr->replen += decode_layoutcommit_maxsz; return 0; } @@ -2019,27 +1933,23 @@ encode_layoutreturn(struct xdr_stream *xdr, { __be32 *p; - p = reserve_space(xdr, 20); - *p++ = cpu_to_be32(OP_LAYOUTRETURN); + encode_op_hdr(xdr, OP_LAYOUTRETURN, decode_layoutreturn_maxsz, hdr); + p = reserve_space(xdr, 16); *p++ = cpu_to_be32(0); /* reclaim. always 0 for now */ *p++ = cpu_to_be32(args->layout_type); *p++ = cpu_to_be32(IOMODE_ANY); *p = cpu_to_be32(RETURN_FILE); - p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE); + p = reserve_space(xdr, 16); p = xdr_encode_hyper(p, 0); p = xdr_encode_hyper(p, NFS4_MAX_UINT64); spin_lock(&args->inode->i_lock); - xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE); + encode_nfs4_stateid(xdr, &args->stateid); spin_unlock(&args->inode->i_lock); if (NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn) { NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn( NFS_I(args->inode)->layout, xdr, args); - } else { - p = reserve_space(xdr, 4); - *p = cpu_to_be32(0); - } - hdr->nops++; - hdr->replen += decode_layoutreturn_maxsz; + } else + encode_uint32(xdr, 0); } static int @@ -2047,12 +1957,8 @@ encode_secinfo_no_name(struct xdr_stream *xdr, const struct nfs41_secinfo_no_name_args *args, struct compound_hdr *hdr) { - __be32 *p; - p = reserve_space(xdr, 8); - *p++ = cpu_to_be32(OP_SECINFO_NO_NAME); - *p++ = cpu_to_be32(args->style); - hdr->nops++; - hdr->replen += decode_secinfo_no_name_maxsz; + encode_op_hdr(xdr, OP_SECINFO_NO_NAME, decode_secinfo_no_name_maxsz, hdr); + encode_uint32(xdr, args->style); return 0; } @@ -2060,26 +1966,17 @@ static void encode_test_stateid(struct xdr_stream *xdr, struct nfs41_test_stateid_args *args, struct compound_hdr *hdr) { - __be32 *p; - - p = reserve_space(xdr, 8 + NFS4_STATEID_SIZE); - *p++ = cpu_to_be32(OP_TEST_STATEID); - *p++ = cpu_to_be32(1); - xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE); - hdr->nops++; - hdr->replen += decode_test_stateid_maxsz; + encode_op_hdr(xdr, OP_TEST_STATEID, decode_test_stateid_maxsz, hdr); + encode_uint32(xdr, 1); + encode_nfs4_stateid(xdr, args->stateid); } static void encode_free_stateid(struct xdr_stream *xdr, struct nfs41_free_stateid_args *args, struct compound_hdr *hdr) { - __be32 *p; - p = reserve_space(xdr, 4 + NFS4_STATEID_SIZE); - *p++ = cpu_to_be32(OP_FREE_STATEID); - xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE); - hdr->nops++; - hdr->replen += decode_free_stateid_maxsz; + encode_op_hdr(xdr, OP_FREE_STATEID, decode_free_stateid_maxsz, hdr); + encode_nfs4_stateid(xdr, args->stateid); } #endif /* CONFIG_NFS_V4_1 */ @@ -2633,6 +2530,7 @@ static void nfs4_xdr_enc_server_caps(struct rpc_rqst *req, encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fhandle, &hdr); encode_getattr_one(xdr, FATTR4_WORD0_SUPPORTED_ATTRS| + FATTR4_WORD0_FH_EXPIRE_TYPE| FATTR4_WORD0_LINK_SUPPORT| FATTR4_WORD0_SYMLINK_SUPPORT| FATTR4_WORD0_ACLSUPPORT, &hdr); @@ -2650,7 +2548,7 @@ static void nfs4_xdr_enc_renew(struct rpc_rqst *req, struct xdr_stream *xdr, }; encode_compound_hdr(xdr, req, &hdr); - encode_renew(xdr, clp, &hdr); + encode_renew(xdr, clp->cl_clientid, &hdr); encode_nops(&hdr); } @@ -3180,6 +3078,28 @@ out_overflow: return -EIO; } +static int decode_attr_fh_expire_type(struct xdr_stream *xdr, + uint32_t *bitmap, uint32_t *type) +{ + __be32 *p; + + *type = 0; + if (unlikely(bitmap[0] & (FATTR4_WORD0_FH_EXPIRE_TYPE - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_FH_EXPIRE_TYPE)) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + *type = be32_to_cpup(p); + bitmap[0] &= ~FATTR4_WORD0_FH_EXPIRE_TYPE; + } + dprintk("%s: expire type=0x%x\n", __func__, *type); + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change) { __be32 *p; @@ -3513,16 +3433,17 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path) n = be32_to_cpup(p); if (n == 0) goto root_path; - dprintk("path "); + dprintk("pathname4: "); path->ncomponents = 0; while (path->ncomponents < n) { struct nfs4_string *component = &path->components[path->ncomponents]; status = decode_opaque_inline(xdr, &component->len, &component->data); if (unlikely(status != 0)) goto out_eio; - if (path->ncomponents != n) - dprintk("/"); - dprintk("%s", component->data); + ifdebug (XDR) + pr_cont("%s%.*s ", + (path->ncomponents != n ? "/ " : ""), + component->len, component->data); if (path->ncomponents < NFS4_PATHNAME_MAXCOMPONENTS) path->ncomponents++; else { @@ -3531,14 +3452,13 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path) } } out: - dprintk("\n"); return status; root_path: /* a root pathname is sent as a zero component4 */ path->ncomponents = 1; path->components[0].len=0; path->components[0].data=NULL; - dprintk("path /\n"); + dprintk("pathname4: /\n"); goto out; out_eio: dprintk(" status %d", status); @@ -3560,7 +3480,11 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st status = 0; if (unlikely(!(bitmap[0] & FATTR4_WORD0_FS_LOCATIONS))) goto out; - dprintk("%s: fsroot ", __func__); + status = -EIO; + /* Ignore borken servers that return unrequested attrs */ + if (unlikely(res == NULL)) + goto out; + dprintk("%s: fsroot:\n", __func__); status = decode_pathname(xdr, &res->fs_path); if (unlikely(status != 0)) goto out; @@ -3581,7 +3505,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st m = be32_to_cpup(p); loc->nservers = 0; - dprintk("%s: servers ", __func__); + dprintk("%s: servers:\n", __func__); while (loc->nservers < m) { struct nfs4_string *server = &loc->servers[loc->nservers]; status = decode_opaque_inline(xdr, &server->len, &server->data); @@ -3613,7 +3537,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st res->nlocations++; } if (res->nlocations != 0) - status = NFS_ATTR_FATTR_V4_REFERRAL; + status = NFS_ATTR_FATTR_V4_LOCATIONS; out: dprintk("%s: fs_locations done, error = %d\n", __func__, status); return status; @@ -4157,7 +4081,7 @@ static int decode_opaque_fixed(struct xdr_stream *xdr, void *buf, size_t len) static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) { - return decode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE); + return decode_opaque_fixed(xdr, stateid, NFS4_STATEID_SIZE); } static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) @@ -4174,7 +4098,7 @@ static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) static int decode_verifier(struct xdr_stream *xdr, void *verifier) { - return decode_opaque_fixed(xdr, verifier, 8); + return decode_opaque_fixed(xdr, verifier, NFS4_VERIFIER_SIZE); } static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res) @@ -4224,6 +4148,9 @@ static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_re goto xdr_error; if ((status = decode_attr_supported(xdr, bitmap, res->attr_bitmask)) != 0) goto xdr_error; + if ((status = decode_attr_fh_expire_type(xdr, bitmap, + &res->fh_expire_type)) != 0) + goto xdr_error; if ((status = decode_attr_link_support(xdr, bitmap, &res->has_links)) != 0) goto xdr_error; if ((status = decode_attr_symlink_support(xdr, bitmap, &res->has_symlinks)) != 0) @@ -4294,6 +4221,7 @@ xdr_error: static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fattr *fattr, struct nfs_fh *fh, + struct nfs4_fs_locations *fs_loc, const struct nfs_server *server) { int status; @@ -4341,9 +4269,7 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, goto xdr_error; fattr->valid |= status; - status = decode_attr_fs_locations(xdr, bitmap, container_of(fattr, - struct nfs4_fs_locations, - fattr)); + status = decode_attr_fs_locations(xdr, bitmap, fs_loc); if (status < 0) goto xdr_error; fattr->valid |= status; @@ -4407,7 +4333,8 @@ xdr_error: } static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fattr, - struct nfs_fh *fh, const struct nfs_server *server) + struct nfs_fh *fh, struct nfs4_fs_locations *fs_loc, + const struct nfs_server *server) { __be32 *savep; uint32_t attrlen, @@ -4426,7 +4353,7 @@ static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fat if (status < 0) goto xdr_error; - status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, server); + status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, fs_loc, server); if (status < 0) goto xdr_error; @@ -4439,7 +4366,7 @@ xdr_error: static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, const struct nfs_server *server) { - return decode_getfattr_generic(xdr, fattr, NULL, server); + return decode_getfattr_generic(xdr, fattr, NULL, NULL, server); } /* @@ -4463,8 +4390,8 @@ static int decode_first_pnfs_layout_type(struct xdr_stream *xdr, return 0; } if (num > 1) - printk(KERN_INFO "%s: Warning: Multiple pNFS layout drivers " - "per filesystem not supported\n", __func__); + printk(KERN_INFO "NFS: %s: Warning: Multiple pNFS layout " + "drivers per filesystem not supported\n", __func__); /* Decode and set first layout type, move xdr->p past unused types */ p = xdr_inline_decode(xdr, num * 4); @@ -4863,17 +4790,16 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n size_t hdrlen; u32 recvd, pglen = rcvbuf->page_len; int status; + __be32 verf[2]; status = decode_op_hdr(xdr, OP_READDIR); if (!status) status = decode_verifier(xdr, readdir->verifier.data); if (unlikely(status)) return status; + memcpy(verf, readdir->verifier.data, sizeof(verf)); dprintk("%s: verifier = %08x:%08x\n", - __func__, - ((u32 *)readdir->verifier.data)[0], - ((u32 *)readdir->verifier.data)[1]); - + __func__, verf[0], verf[1]); hdrlen = (char *) xdr->p - (char *) iov->iov_base; recvd = rcvbuf->len - hdrlen; @@ -5120,7 +5046,7 @@ static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res) goto out_overflow; res->count = be32_to_cpup(p++); res->verf->committed = be32_to_cpup(p++); - memcpy(res->verf->verifier, p, 8); + memcpy(res->verf->verifier, p, NFS4_VERIFIER_SIZE); return 0; out_overflow: print_overflow_msg(__func__, xdr); @@ -5214,6 +5140,7 @@ static int decode_exchange_id(struct xdr_stream *xdr, char *dummy_str; int status; struct nfs_client *clp = res->client; + uint32_t impl_id_count; status = decode_op_hdr(xdr, OP_EXCHANGE_ID); if (status) @@ -5255,11 +5182,38 @@ static int decode_exchange_id(struct xdr_stream *xdr, memcpy(res->server_scope->server_scope, dummy_str, dummy); res->server_scope->server_scope_sz = dummy; - /* Throw away Implementation id array */ - status = decode_opaque_inline(xdr, &dummy, &dummy_str); - if (unlikely(status)) - return status; + /* Implementation Id */ + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + impl_id_count = be32_to_cpup(p++); + + if (impl_id_count) { + /* nii_domain */ + status = decode_opaque_inline(xdr, &dummy, &dummy_str); + if (unlikely(status)) + return status; + if (unlikely(dummy > NFS4_OPAQUE_LIMIT)) + return -EIO; + memcpy(res->impl_id->domain, dummy_str, dummy); + /* nii_name */ + status = decode_opaque_inline(xdr, &dummy, &dummy_str); + if (unlikely(status)) + return status; + if (unlikely(dummy > NFS4_OPAQUE_LIMIT)) + return -EIO; + memcpy(res->impl_id->name, dummy_str, dummy); + + /* nii_date */ + p = xdr_inline_decode(xdr, 12); + if (unlikely(!p)) + goto out_overflow; + p = xdr_decode_hyper(p, &res->impl_id->date.seconds); + res->impl_id->date.nseconds = be32_to_cpup(p); + + /* if there's more than one entry, ignore the rest */ + } return 0; out_overflow: print_overflow_msg(__func__, xdr); @@ -5285,8 +5239,8 @@ static int decode_chan_attrs(struct xdr_stream *xdr, attrs->max_reqs = be32_to_cpup(p++); nr_attrs = be32_to_cpup(p); if (unlikely(nr_attrs > 1)) { - printk(KERN_WARNING "%s: Invalid rdma channel attrs count %u\n", - __func__, nr_attrs); + printk(KERN_WARNING "NFS: %s: Invalid rdma channel attrs " + "count %u\n", __func__, nr_attrs); return -EINVAL; } if (nr_attrs == 1) { @@ -5436,14 +5390,14 @@ static int decode_getdevicelist(struct xdr_stream *xdr, p += 2; /* Read verifier */ - p = xdr_decode_opaque_fixed(p, verftemp.verifier, 8); + p = xdr_decode_opaque_fixed(p, verftemp.verifier, NFS4_VERIFIER_SIZE); res->num_devs = be32_to_cpup(p); dprintk("%s: num_dev %d\n", __func__, res->num_devs); if (res->num_devs > NFS4_PNFS_GETDEVLIST_MAXNUM) { - printk(KERN_ERR "%s too many result dev_num %u\n", + printk(KERN_ERR "NFS: %s too many result dev_num %u\n", __func__, res->num_devs); return -EIO; } @@ -5537,11 +5491,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, status = decode_op_hdr(xdr, OP_LAYOUTGET); if (status) return status; - p = xdr_inline_decode(xdr, 8 + NFS4_STATEID_SIZE); + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + res->return_on_close = be32_to_cpup(p); + decode_stateid(xdr, &res->stateid); + p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) goto out_overflow; - res->return_on_close = be32_to_cpup(p++); - p = xdr_decode_opaque_fixed(p, res->stateid.data, NFS4_STATEID_SIZE); layout_count = be32_to_cpup(p); if (!layout_count) { dprintk("%s: server responded with empty layout array\n", @@ -5666,7 +5623,8 @@ static int decode_test_stateid(struct xdr_stream *xdr, if (unlikely(!p)) goto out_overflow; res->status = be32_to_cpup(p++); - return res->status; + + return status; out_overflow: print_overflow_msg(__func__, xdr); out: @@ -6583,8 +6541,9 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, if (status) goto out; xdr_enter_page(xdr, PAGE_SIZE); - status = decode_getfattr(xdr, &res->fs_locations->fattr, - res->fs_locations->server); + status = decode_getfattr_generic(xdr, &res->fs_locations->fattr, + NULL, res->fs_locations, + res->fs_locations->server); out: return status; } @@ -6964,7 +6923,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, goto out_overflow; if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, - entry->server) < 0) + NULL, entry->server) < 0) goto out_overflow; if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) entry->ino = entry->fattr->mounted_on_fileid; @@ -7112,7 +7071,7 @@ struct rpc_procinfo nfs4_procedures[] = { #endif /* CONFIG_NFS_V4_1 */ }; -struct rpc_version nfs_version4 = { +const struct rpc_version nfs_version4 = { .number = 4, .nrprocs = ARRAY_SIZE(nfs4_procedures), .procs = nfs4_procedures diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index c4744e1..cd3c910 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -104,7 +104,7 @@ static char nfs_export_path[NFS_MAXPATHLEN + 1] __initdata = ""; /* server:export path string passed to super.c */ static char nfs_root_device[NFS_MAXPATHLEN + 1] __initdata = ""; -#ifdef RPC_DEBUG +#ifdef NFS_DEBUG /* * When the "nfsrootdebug" kernel command line option is specified, * enable debugging messages for NFSROOT. diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 55d0128..4bff4a3 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -137,6 +137,7 @@ static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay, struct objio_dev_ent *ode; struct osd_dev *od; struct osd_dev_info odi; + bool retry_flag = true; int err; ode = _dev_list_find(NFS_SERVER(pnfslay->plh_inode), d_id); @@ -171,10 +172,18 @@ static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay, goto out; } +retry_lookup: od = osduld_info_lookup(&odi); if (unlikely(IS_ERR(od))) { err = PTR_ERR(od); dprintk("%s: osduld_info_lookup => %d\n", __func__, err); + if (err == -ENODEV && retry_flag) { + err = objlayout_autologin(deviceaddr); + if (likely(!err)) { + retry_flag = false; + goto retry_lookup; + } + } goto out; } @@ -205,25 +214,36 @@ static void copy_single_comp(struct ore_components *oc, unsigned c, int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags, struct objio_segment **pseg) { - struct __alloc_objio_segment { - struct objio_segment olseg; - struct ore_dev *ods[numdevs]; - struct ore_comp comps[numdevs]; - } *aolseg; - - aolseg = kzalloc(sizeof(*aolseg), gfp_flags); - if (unlikely(!aolseg)) { +/* This is the in memory structure of the objio_segment + * + * struct __alloc_objio_segment { + * struct objio_segment olseg; + * struct ore_dev *ods[numdevs]; + * struct ore_comp comps[numdevs]; + * } *aolseg; + * NOTE: The code as above compiles and runs perfectly. It is elegant, + * type safe and compact. At some Past time Linus has decided he does not + * like variable length arrays, For the sake of this principal we uglify + * the code as below. + */ + struct objio_segment *lseg; + size_t lseg_size = sizeof(*lseg) + + numdevs * sizeof(lseg->oc.ods[0]) + + numdevs * sizeof(*lseg->oc.comps); + + lseg = kzalloc(lseg_size, gfp_flags); + if (unlikely(!lseg)) { dprintk("%s: Faild allocation numdevs=%d size=%zd\n", __func__, - numdevs, sizeof(*aolseg)); + numdevs, lseg_size); return -ENOMEM; } - aolseg->olseg.oc.numdevs = numdevs; - aolseg->olseg.oc.single_comp = EC_MULTPLE_COMPS; - aolseg->olseg.oc.comps = aolseg->comps; - aolseg->olseg.oc.ods = aolseg->ods; + lseg->oc.numdevs = numdevs; + lseg->oc.single_comp = EC_MULTPLE_COMPS; + lseg->oc.ods = (void *)(lseg + 1); + lseg->oc.comps = (void *)(lseg->oc.ods + numdevs); - *pseg = &aolseg->olseg; + *pseg = lseg; return 0; } @@ -582,10 +602,10 @@ objlayout_init(void) if (ret) printk(KERN_INFO - "%s: Registering OSD pNFS Layout Driver failed: error=%d\n", + "NFS: %s: Registering OSD pNFS Layout Driver failed: error=%d\n", __func__, ret); else - printk(KERN_INFO "%s: Registered OSD pNFS Layout Driver\n", + printk(KERN_INFO "NFS: %s: Registered OSD pNFS Layout Driver\n", __func__); return ret; } @@ -594,7 +614,7 @@ static void __exit objlayout_exit(void) { pnfs_unregister_layoutdriver(&objlayout_type); - printk(KERN_INFO "%s: Unregistered OSD pNFS Layout Driver\n", + printk(KERN_INFO "NFS: %s: Unregistered OSD pNFS Layout Driver\n", __func__); } diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index b3c2903..8d45f1c 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -37,6 +37,9 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/kmod.h> +#include <linux/moduleparam.h> +#include <linux/ratelimit.h> #include <scsi/osd_initiator.h> #include "objlayout.h" @@ -156,7 +159,7 @@ last_byte_offset(u64 start, u64 len) return end > start ? end - 1 : NFS4_MAX_UINT64; } -void _fix_verify_io_params(struct pnfs_layout_segment *lseg, +static void _fix_verify_io_params(struct pnfs_layout_segment *lseg, struct page ***p_pages, unsigned *p_pgbase, u64 offset, unsigned long count) { @@ -490,9 +493,9 @@ encode_accumulated_error(struct objlayout *objlay, __be32 *p) if (!ioerr->oer_errno) continue; - printk(KERN_ERR "%s: err[%d]: errno=%d is_write=%d " - "dev(%llx:%llx) par=0x%llx obj=0x%llx " - "offset=0x%llx length=0x%llx\n", + printk(KERN_ERR "NFS: %s: err[%d]: errno=%d " + "is_write=%d dev(%llx:%llx) par=0x%llx " + "obj=0x%llx offset=0x%llx length=0x%llx\n", __func__, i, ioerr->oer_errno, ioerr->oer_iswrite, _DEVID_LO(&ioerr->oer_component.oid_device_id), @@ -651,3 +654,134 @@ void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr) __free_page(odi->page); kfree(odi); } + +enum { + OBJLAYOUT_MAX_URI_LEN = 256, OBJLAYOUT_MAX_OSDNAME_LEN = 64, + OBJLAYOUT_MAX_SYSID_HEX_LEN = OSD_SYSTEMID_LEN * 2 + 1, + OSD_LOGIN_UPCALL_PATHLEN = 256 +}; + +static char osd_login_prog[OSD_LOGIN_UPCALL_PATHLEN] = "/sbin/osd_login"; + +module_param_string(osd_login_prog, osd_login_prog, sizeof(osd_login_prog), + 0600); +MODULE_PARM_DESC(osd_login_prog, "Path to the osd_login upcall program"); + +struct __auto_login { + char uri[OBJLAYOUT_MAX_URI_LEN]; + char osdname[OBJLAYOUT_MAX_OSDNAME_LEN]; + char systemid_hex[OBJLAYOUT_MAX_SYSID_HEX_LEN]; +}; + +static int __objlayout_upcall(struct __auto_login *login) +{ + static char *envp[] = { "HOME=/", + "TERM=linux", + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", + NULL + }; + char *argv[8]; + int ret; + + if (unlikely(!osd_login_prog[0])) { + dprintk("%s: osd_login_prog is disabled\n", __func__); + return -EACCES; + } + + dprintk("%s uri: %s\n", __func__, login->uri); + dprintk("%s osdname %s\n", __func__, login->osdname); + dprintk("%s systemid_hex %s\n", __func__, login->systemid_hex); + + argv[0] = (char *)osd_login_prog; + argv[1] = "-u"; + argv[2] = login->uri; + argv[3] = "-o"; + argv[4] = login->osdname; + argv[5] = "-s"; + argv[6] = login->systemid_hex; + argv[7] = NULL; + + ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); + /* + * Disable the upcall mechanism if we're getting an ENOENT or + * EACCES error. The admin can re-enable it on the fly by using + * sysfs to set the objlayoutdriver.osd_login_prog module parameter once + * the problem has been fixed. + */ + if (ret == -ENOENT || ret == -EACCES) { + printk(KERN_ERR "PNFS-OBJ: %s was not found please set " + "objlayoutdriver.osd_login_prog kernel parameter!\n", + osd_login_prog); + osd_login_prog[0] = '\0'; + } + dprintk("%s %s return value: %d\n", __func__, osd_login_prog, ret); + + return ret; +} + +/* Assume dest is all zeros */ +static void __copy_nfsS_and_zero_terminate(struct nfs4_string s, + char *dest, int max_len, + const char *var_name) +{ + if (!s.len) + return; + + if (s.len >= max_len) { + pr_warn_ratelimited( + "objlayout_autologin: %s: s.len(%d) >= max_len(%d)", + var_name, s.len, max_len); + s.len = max_len - 1; /* space for null terminator */ + } + + memcpy(dest, s.data, s.len); +} + +/* Assume sysid is all zeros */ +static void _sysid_2_hex(struct nfs4_string s, + char sysid[OBJLAYOUT_MAX_SYSID_HEX_LEN]) +{ + int i; + char *cur; + + if (!s.len) + return; + + if (s.len != OSD_SYSTEMID_LEN) { + pr_warn_ratelimited( + "objlayout_autologin: systemid_len(%d) != OSD_SYSTEMID_LEN", + s.len); + if (s.len > OSD_SYSTEMID_LEN) + s.len = OSD_SYSTEMID_LEN; + } + + cur = sysid; + for (i = 0; i < s.len; i++) + cur = hex_byte_pack(cur, s.data[i]); +} + +int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr) +{ + int rc; + struct __auto_login login; + + if (!deviceaddr->oda_targetaddr.ota_netaddr.r_addr.len) + return -ENODEV; + + memset(&login, 0, sizeof(login)); + __copy_nfsS_and_zero_terminate( + deviceaddr->oda_targetaddr.ota_netaddr.r_addr, + login.uri, sizeof(login.uri), "URI"); + + __copy_nfsS_and_zero_terminate( + deviceaddr->oda_osdname, + login.osdname, sizeof(login.osdname), "OSDNAME"); + + _sysid_2_hex(deviceaddr->oda_systemid, login.systemid_hex); + + rc = __objlayout_upcall(&login); + if (rc > 0) /* script returns positive values */ + rc = -ENODEV; + + return rc; +} diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index 8ec3472..880ba08 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h @@ -184,4 +184,6 @@ extern void objlayout_encode_layoutreturn( struct xdr_stream *, const struct nfs4_layoutreturn_args *); +extern int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr); + #endif /* _OBJLAYOUT_H */ diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 5668f7c..d21fcea 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -13,6 +13,7 @@ #include <linux/file.h> #include <linux/sched.h> #include <linux/sunrpc/clnt.h> +#include <linux/nfs.h> #include <linux/nfs3.h> #include <linux/nfs4.h> #include <linux/nfs_page.h> @@ -106,36 +107,6 @@ void nfs_unlock_request(struct nfs_page *req) nfs_release_request(req); } -/** - * nfs_set_page_tag_locked - Tag a request as locked - * @req: - */ -int nfs_set_page_tag_locked(struct nfs_page *req) -{ - if (!nfs_lock_request_dontget(req)) - return 0; - if (test_bit(PG_MAPPED, &req->wb_flags)) - radix_tree_tag_set(&NFS_I(req->wb_context->dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); - return 1; -} - -/** - * nfs_clear_page_tag_locked - Clear request tag and wake up sleepers - */ -void nfs_clear_page_tag_locked(struct nfs_page *req) -{ - if (test_bit(PG_MAPPED, &req->wb_flags)) { - struct inode *inode = req->wb_context->dentry->d_inode; - struct nfs_inode *nfsi = NFS_I(inode); - - spin_lock(&inode->i_lock); - radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); - nfs_unlock_request(req); - spin_unlock(&inode->i_lock); - } else - nfs_unlock_request(req); -} - /* * nfs_clear_request - Free up all resources allocated to the request * @req: @@ -425,67 +396,6 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) } } -#define NFS_SCAN_MAXENTRIES 16 -/** - * nfs_scan_list - Scan a list for matching requests - * @nfsi: NFS inode - * @dst: Destination list - * @idx_start: lower bound of page->index to scan - * @npages: idx_start + npages sets the upper bound to scan. - * @tag: tag to scan for - * - * Moves elements from one of the inode request lists. - * If the number of requests is set to 0, the entire address_space - * starting at index idx_start, is scanned. - * The requests are *not* checked to ensure that they form a contiguous set. - * You must be holding the inode's i_lock when calling this function - */ -int nfs_scan_list(struct nfs_inode *nfsi, - struct list_head *dst, pgoff_t idx_start, - unsigned int npages, int tag) -{ - struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; - struct nfs_page *req; - pgoff_t idx_end; - int found, i; - int res; - struct list_head *list; - - res = 0; - if (npages == 0) - idx_end = ~0; - else - idx_end = idx_start + npages - 1; - - for (;;) { - found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, - (void **)&pgvec[0], idx_start, - NFS_SCAN_MAXENTRIES, tag); - if (found <= 0) - break; - for (i = 0; i < found; i++) { - req = pgvec[i]; - if (req->wb_index > idx_end) - goto out; - idx_start = req->wb_index + 1; - if (nfs_set_page_tag_locked(req)) { - kref_get(&req->wb_kref); - radix_tree_tag_clear(&nfsi->nfs_page_tree, - req->wb_index, tag); - list = pnfs_choose_commit_list(req, dst); - nfs_list_add_request(req, list); - res++; - if (res == INT_MAX) - goto out; - } - } - /* for latency reduction */ - cond_resched_lock(&nfsi->vfs_inode.i_lock); - } -out: - return res; -} - int __init nfs_init_nfspagecache(void) { nfs_page_cachep = kmem_cache_create("nfs_page", diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 17149a4..b5d4515 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -101,8 +101,8 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh, goto out_no_driver; if (!(server->nfs_client->cl_exchange_flags & (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) { - printk(KERN_ERR "%s: id %u cl_exchange_flags 0x%x\n", __func__, - id, server->nfs_client->cl_exchange_flags); + printk(KERN_ERR "NFS: %s: id %u cl_exchange_flags 0x%x\n", + __func__, id, server->nfs_client->cl_exchange_flags); goto out_no_driver; } ld_type = find_pnfs_driver(id); @@ -122,8 +122,8 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh, server->pnfs_curr_ld = ld_type; if (ld_type->set_layoutdriver && ld_type->set_layoutdriver(server, mntfh)) { - printk(KERN_ERR "%s: Error initializing pNFS layout driver %u.\n", - __func__, id); + printk(KERN_ERR "NFS: %s: Error initializing pNFS layout " + "driver %u.\n", __func__, id); module_put(ld_type->owner); goto out_no_driver; } @@ -143,11 +143,11 @@ pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type) struct pnfs_layoutdriver_type *tmp; if (ld_type->id == 0) { - printk(KERN_ERR "%s id 0 is reserved\n", __func__); + printk(KERN_ERR "NFS: %s id 0 is reserved\n", __func__); return status; } if (!ld_type->alloc_lseg || !ld_type->free_lseg) { - printk(KERN_ERR "%s Layout driver must provide " + printk(KERN_ERR "NFS: %s Layout driver must provide " "alloc_lseg and free_lseg.\n", __func__); return status; } @@ -160,7 +160,7 @@ pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type) dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id, ld_type->name); } else { - printk(KERN_ERR "%s Module with id %d already loaded!\n", + printk(KERN_ERR "NFS: %s Module with id %d already loaded!\n", __func__, ld_type->id); } spin_unlock(&pnfs_spinlock); @@ -496,12 +496,12 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, { u32 oldseq, newseq; - oldseq = be32_to_cpu(lo->plh_stateid.stateid.seqid); - newseq = be32_to_cpu(new->stateid.seqid); + oldseq = be32_to_cpu(lo->plh_stateid.seqid); + newseq = be32_to_cpu(new->seqid); if ((int)(newseq - oldseq) > 0) { - memcpy(&lo->plh_stateid, &new->stateid, sizeof(new->stateid)); + nfs4_stateid_copy(&lo->plh_stateid, new); if (update_barrier) { - u32 new_barrier = be32_to_cpu(new->stateid.seqid); + u32 new_barrier = be32_to_cpu(new->seqid); if ((int)(new_barrier - lo->plh_barrier)) lo->plh_barrier = new_barrier; @@ -525,7 +525,7 @@ pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid, int lget) { if ((stateid) && - (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0) + (int)(lo->plh_barrier - be32_to_cpu(stateid->seqid)) >= 0) return true; return lo->plh_block_lgets || test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags) || @@ -549,11 +549,10 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, do { seq = read_seqbegin(&open_state->seqlock); - memcpy(dst->data, open_state->stateid.data, - sizeof(open_state->stateid.data)); + nfs4_stateid_copy(dst, &open_state->stateid); } while (read_seqretry(&open_state->seqlock, seq)); } else - memcpy(dst->data, lo->plh_stateid.data, sizeof(lo->plh_stateid.data)); + nfs4_stateid_copy(dst, &lo->plh_stateid); spin_unlock(&lo->plh_inode->i_lock); dprintk("<-- %s\n", __func__); return status; @@ -590,7 +589,7 @@ send_layoutget(struct pnfs_layout_hdr *lo, max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; max_pages = max_resp_sz >> PAGE_SHIFT; - pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags); + pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags); if (!pages) goto out_err_free; @@ -760,7 +759,7 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier) } if (!found) { struct pnfs_layout_hdr *lo = nfsi->layout; - u32 current_seqid = be32_to_cpu(lo->plh_stateid.stateid.seqid); + u32 current_seqid = be32_to_cpu(lo->plh_stateid.seqid); /* Since close does not return a layout stateid for use as * a barrier, we choose the worst-case barrier. @@ -966,8 +965,7 @@ pnfs_update_layout(struct inode *ino, } /* Do we even need to bother with this? */ - if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) || - test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { + if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { dprintk("%s matches recall, use MDS\n", __func__); goto out_unlock; } @@ -1032,7 +1030,6 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) struct nfs4_layoutget_res *res = &lgp->res; struct pnfs_layout_segment *lseg; struct inode *ino = lo->plh_inode; - struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; int status = 0; /* Inject layout blob into I/O device driver */ @@ -1048,8 +1045,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) } spin_lock(&ino->i_lock); - if (test_bit(NFS4CLNT_LAYOUTRECALL, &clp->cl_state) || - test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { + if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { dprintk("%s forget reply due to recall\n", __func__); goto out_forget_reply; } @@ -1214,6 +1210,7 @@ void pnfs_ld_write_done(struct nfs_write_data *data) } data->task.tk_status = pnfs_write_done_resend_to_mds(data->inode, &data->pages); } + put_lseg(data->lseg); data->mds_ops->rpc_release(data); } EXPORT_SYMBOL_GPL(pnfs_ld_write_done); @@ -1227,6 +1224,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, nfs_list_add_request(data->req, &desc->pg_list); nfs_pageio_reset_write_mds(desc); desc->pg_recoalesce = 1; + put_lseg(data->lseg); nfs_writedata_release(data); } @@ -1327,6 +1325,7 @@ void pnfs_ld_read_done(struct nfs_read_data *data) data->mds_ops->rpc_call_done(&data->task, data); } else pnfs_ld_handle_read_error(data); + put_lseg(data->lseg); data->mds_ops->rpc_release(data); } EXPORT_SYMBOL_GPL(pnfs_ld_read_done); @@ -1530,8 +1529,7 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) end_pos = nfsi->layout->plh_lwb; nfsi->layout->plh_lwb = 0; - memcpy(&data->args.stateid.data, nfsi->layout->plh_stateid.data, - sizeof(nfsi->layout->plh_stateid.data)); + nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid); spin_unlock(&inode->i_lock); data->args.inode = inode; diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 53d593a..442ebf6 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -94,11 +94,10 @@ struct pnfs_layoutdriver_type { const struct nfs_pageio_ops *pg_read_ops; const struct nfs_pageio_ops *pg_write_ops; - /* Returns true if layoutdriver wants to divert this request to - * driver's commit routine. - */ - bool (*mark_pnfs_commit)(struct pnfs_layout_segment *lseg); - struct list_head * (*choose_commit_list) (struct nfs_page *req); + void (*mark_request_commit) (struct nfs_page *req, + struct pnfs_layout_segment *lseg); + void (*clear_request_commit) (struct nfs_page *req); + int (*scan_commit_lists) (struct inode *inode, int max, spinlock_t *lock); int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how); /* @@ -229,7 +228,6 @@ struct nfs4_deviceid_node { atomic_t ref; }; -void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id); struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, @@ -262,20 +260,6 @@ static inline int pnfs_enabled_sb(struct nfs_server *nfss) return nfss->pnfs_curr_ld != NULL; } -static inline void -pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) -{ - if (lseg) { - struct pnfs_layoutdriver_type *ld; - - ld = NFS_SERVER(req->wb_page->mapping->host)->pnfs_curr_ld; - if (ld->mark_pnfs_commit && ld->mark_pnfs_commit(lseg)) { - set_bit(PG_PNFS_COMMIT, &req->wb_flags); - req->wb_commit_lseg = get_lseg(lseg); - } - } -} - static inline int pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) { @@ -284,27 +268,42 @@ pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how); } -static inline struct list_head * -pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds) +static inline bool +pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) { - struct list_head *rv; + struct inode *inode = req->wb_context->dentry->d_inode; + struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; - if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags)) { - struct inode *inode = req->wb_commit_lseg->pls_layout->plh_inode; + if (lseg == NULL || ld->mark_request_commit == NULL) + return false; + ld->mark_request_commit(req, lseg); + return true; +} - set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags); - rv = NFS_SERVER(inode)->pnfs_curr_ld->choose_commit_list(req); - /* matched by ref taken when PG_PNFS_COMMIT is set */ - put_lseg(req->wb_commit_lseg); - } else - rv = mds; - return rv; +static inline bool +pnfs_clear_request_commit(struct nfs_page *req) +{ + struct inode *inode = req->wb_context->dentry->d_inode; + struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; + + if (ld == NULL || ld->clear_request_commit == NULL) + return false; + ld->clear_request_commit(req); + return true; } -static inline void pnfs_clear_request_commit(struct nfs_page *req) +static inline int +pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock) { - if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags)) - put_lseg(req->wb_commit_lseg); + struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; + int ret; + + if (ld == NULL || ld->scan_commit_lists == NULL) + return 0; + ret = ld->scan_commit_lists(inode, max, lock); + if (ret != 0) + set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags); + return ret; } /* Should the pNFS client commit and return the layout upon a setattr */ @@ -328,6 +327,13 @@ static inline int pnfs_return_layout(struct inode *ino) return 0; } +#ifdef NFS_DEBUG +void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id); +#else +static inline void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id) +{ +} +#endif /* NFS_DEBUG */ #else /* CONFIG_NFS_V4_1 */ static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) @@ -400,35 +406,35 @@ static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, st return false; } -static inline void -pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) -{ -} - static inline int pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) { return PNFS_NOT_ATTEMPTED; } -static inline struct list_head * -pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds) +static inline bool +pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) { - return mds; + return false; } -static inline void pnfs_clear_request_commit(struct nfs_page *req) +static inline bool +pnfs_clear_request_commit(struct nfs_page *req) { + return false; } -static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) +static inline int +pnfs_scan_commit_lists(struct inode *inode, int max, spinlock_t *lock) { return 0; } -static inline void nfs4_deviceid_purge_client(struct nfs_client *ncl) +static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) { + return 0; } + #endif /* CONFIG_NFS_V4_1 */ #endif /* FS_NFS_PNFS_H */ diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index 4f359d2..73f701f 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -43,6 +43,7 @@ static struct hlist_head nfs4_deviceid_cache[NFS4_DEVICE_ID_HASH_SIZE]; static DEFINE_SPINLOCK(nfs4_deviceid_lock); +#ifdef NFS_DEBUG void nfs4_print_deviceid(const struct nfs4_deviceid *id) { @@ -52,6 +53,7 @@ nfs4_print_deviceid(const struct nfs4_deviceid *id) p[0], p[1], p[2], p[3]); } EXPORT_SYMBOL_GPL(nfs4_print_deviceid); +#endif static inline u32 nfs4_deviceid_hash(const struct nfs4_deviceid *id) @@ -92,7 +94,7 @@ _lookup_deviceid(const struct pnfs_layoutdriver_type *ld, * @clp nfs_client associated with deviceid * @id deviceid to look up */ -struct nfs4_deviceid_node * +static struct nfs4_deviceid_node * _find_get_deviceid(const struct pnfs_layoutdriver_type *ld, const struct nfs_client *clp, const struct nfs4_deviceid *id, long hash) diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 0c672588..b63b6f4 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -358,6 +358,11 @@ nfs_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) msg->rpc_proc = &nfs_procedures[NFSPROC_REMOVE]; } +static void nfs_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data) +{ + rpc_call_start(task); +} + static int nfs_proc_unlink_done(struct rpc_task *task, struct inode *dir) { if (nfs_async_handle_expired_key(task)) @@ -372,6 +377,11 @@ nfs_proc_rename_setup(struct rpc_message *msg, struct inode *dir) msg->rpc_proc = &nfs_procedures[NFSPROC_RENAME]; } +static void nfs_proc_rename_rpc_prepare(struct rpc_task *task, struct nfs_renamedata *data) +{ + rpc_call_start(task); +} + static int nfs_proc_rename_done(struct rpc_task *task, struct inode *old_dir, struct inode *new_dir) @@ -651,6 +661,11 @@ static void nfs_proc_read_setup(struct nfs_read_data *data, struct rpc_message * msg->rpc_proc = &nfs_procedures[NFSPROC_READ]; } +static void nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) +{ + rpc_call_start(task); +} + static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) { if (nfs_async_handle_expired_key(task)) @@ -668,6 +683,11 @@ static void nfs_proc_write_setup(struct nfs_write_data *data, struct rpc_message msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE]; } +static void nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) +{ + rpc_call_start(task); +} + static void nfs_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) { @@ -721,9 +741,11 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .create = nfs_proc_create, .remove = nfs_proc_remove, .unlink_setup = nfs_proc_unlink_setup, + .unlink_rpc_prepare = nfs_proc_unlink_rpc_prepare, .unlink_done = nfs_proc_unlink_done, .rename = nfs_proc_rename, .rename_setup = nfs_proc_rename_setup, + .rename_rpc_prepare = nfs_proc_rename_rpc_prepare, .rename_done = nfs_proc_rename_done, .link = nfs_proc_link, .symlink = nfs_proc_symlink, @@ -736,8 +758,10 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .pathconf = nfs_proc_pathconf, .decode_dirent = nfs2_decode_dirent, .read_setup = nfs_proc_read_setup, + .read_rpc_prepare = nfs_proc_read_rpc_prepare, .read_done = nfs_read_done, .write_setup = nfs_proc_write_setup, + .write_rpc_prepare = nfs_proc_write_rpc_prepare, .write_done = nfs_write_done, .commit_setup = nfs_proc_commit_setup, .lock = nfs_proc_lock, diff --git a/fs/nfs/read.c b/fs/nfs/read.c index cfa175c..cc1f758 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -66,7 +66,6 @@ void nfs_readdata_free(struct nfs_read_data *p) void nfs_readdata_release(struct nfs_read_data *rdata) { - put_lseg(rdata->lseg); put_nfs_open_context(rdata->args.context); nfs_readdata_free(rdata); } @@ -465,23 +464,14 @@ static void nfs_readpage_release_partial(void *calldata) nfs_readdata_release(calldata); } -#if defined(CONFIG_NFS_V4_1) void nfs_read_prepare(struct rpc_task *task, void *calldata) { struct nfs_read_data *data = calldata; - - if (nfs4_setup_sequence(NFS_SERVER(data->inode), - &data->args.seq_args, &data->res.seq_res, - 0, task)) - return; - rpc_call_start(task); + NFS_PROTO(data->inode)->read_rpc_prepare(task, data); } -#endif /* CONFIG_NFS_V4_1 */ static const struct rpc_call_ops nfs_read_partial_ops = { -#if defined(CONFIG_NFS_V4_1) .rpc_call_prepare = nfs_read_prepare, -#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_readpage_result_partial, .rpc_release = nfs_readpage_release_partial, }; @@ -545,9 +535,7 @@ static void nfs_readpage_release_full(void *calldata) } static const struct rpc_call_ops nfs_read_full_ops = { -#if defined(CONFIG_NFS_V4_1) .rpc_call_prepare = nfs_read_prepare, -#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_readpage_result_full, .rpc_release = nfs_readpage_release_full, }; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 3dfa4f1..ccc4cdb 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -52,6 +52,8 @@ #include <linux/nfs_xdr.h> #include <linux/magic.h> #include <linux/parser.h> +#include <linux/nsproxy.h> +#include <linux/rcupdate.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -79,7 +81,6 @@ enum { Opt_cto, Opt_nocto, Opt_ac, Opt_noac, Opt_lock, Opt_nolock, - Opt_v2, Opt_v3, Opt_v4, Opt_udp, Opt_tcp, Opt_rdma, Opt_acl, Opt_noacl, Opt_rdirplus, Opt_nordirplus, @@ -97,10 +98,10 @@ enum { Opt_namelen, Opt_mountport, Opt_mountvers, - Opt_nfsvers, Opt_minorversion, /* Mount options that take string arguments */ + Opt_nfsvers, Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, Opt_addr, Opt_mountaddr, Opt_clientaddr, Opt_lookupcache, @@ -132,9 +133,6 @@ static const match_table_t nfs_mount_option_tokens = { { Opt_noac, "noac" }, { Opt_lock, "lock" }, { Opt_nolock, "nolock" }, - { Opt_v2, "v2" }, - { Opt_v3, "v3" }, - { Opt_v4, "v4" }, { Opt_udp, "udp" }, { Opt_tcp, "tcp" }, { Opt_rdma, "rdma" }, @@ -163,9 +161,10 @@ static const match_table_t nfs_mount_option_tokens = { { Opt_namelen, "namlen=%s" }, { Opt_mountport, "mountport=%s" }, { Opt_mountvers, "mountvers=%s" }, + { Opt_minorversion, "minorversion=%s" }, + { Opt_nfsvers, "nfsvers=%s" }, { Opt_nfsvers, "vers=%s" }, - { Opt_minorversion, "minorversion=%s" }, { Opt_sec, "sec=%s" }, { Opt_proto, "proto=%s" }, @@ -179,6 +178,9 @@ static const match_table_t nfs_mount_option_tokens = { { Opt_fscache_uniq, "fsc=%s" }, { Opt_local_lock, "local_lock=%s" }, + /* The following needs to be listed after all other options */ + { Opt_nfsvers, "v%s" }, + { Opt_err, NULL } }; @@ -259,6 +261,22 @@ static match_table_t nfs_local_lock_tokens = { { Opt_local_lock_err, NULL } }; +enum { + Opt_vers_2, Opt_vers_3, Opt_vers_4, Opt_vers_4_0, + Opt_vers_4_1, + + Opt_vers_err +}; + +static match_table_t nfs_vers_tokens = { + { Opt_vers_2, "2" }, + { Opt_vers_3, "3" }, + { Opt_vers_4, "4" }, + { Opt_vers_4_0, "4.0" }, + { Opt_vers_4_1, "4.1" }, + + { Opt_vers_err, NULL } +}; static void nfs_umount_begin(struct super_block *); static int nfs_statfs(struct dentry *, struct kstatfs *); @@ -620,7 +638,6 @@ static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss, struct nfs_client *clp = nfss->nfs_client; seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr); - seq_printf(m, ",minorversion=%u", clp->cl_minorversion); } #else static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss, @@ -629,6 +646,15 @@ static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss, } #endif +static void nfs_show_nfs_version(struct seq_file *m, + unsigned int version, + unsigned int minorversion) +{ + seq_printf(m, ",vers=%u", version); + if (version == 4) + seq_printf(m, ".%u", minorversion); +} + /* * Describe the mount options in force on this server representation */ @@ -656,7 +682,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, u32 version = clp->rpc_ops->version; int local_flock, local_fcntl; - seq_printf(m, ",vers=%u", version); + nfs_show_nfs_version(m, version, clp->cl_minorversion); seq_printf(m, ",rsize=%u", nfss->rsize); seq_printf(m, ",wsize=%u", nfss->wsize); if (nfss->bsize != 0) @@ -676,8 +702,10 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, else seq_puts(m, nfs_infop->nostr); } + rcu_read_lock(); seq_printf(m, ",proto=%s", rpc_peeraddr2str(nfss->client, RPC_DISPLAY_NETID)); + rcu_read_unlock(); if (version == 4) { if (nfss->port != NFS_PORT) seq_printf(m, ",port=%u", nfss->port); @@ -726,9 +754,11 @@ static int nfs_show_options(struct seq_file *m, struct dentry *root) nfs_show_mount_options(m, nfss, 0); + rcu_read_lock(); seq_printf(m, ",addr=%s", rpc_peeraddr2str(nfss->nfs_client->cl_rpcclient, RPC_DISPLAY_ADDR)); + rcu_read_unlock(); return 0; } @@ -745,7 +775,6 @@ static void show_sessions(struct seq_file *m, struct nfs_server *server) {} #endif #endif -#ifdef CONFIG_NFS_V4 #ifdef CONFIG_NFS_V4_1 static void show_pnfs(struct seq_file *m, struct nfs_server *server) { @@ -755,9 +784,26 @@ static void show_pnfs(struct seq_file *m, struct nfs_server *server) else seq_printf(m, "not configured"); } + +static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss) +{ + if (nfss->nfs_client && nfss->nfs_client->impl_id) { + struct nfs41_impl_id *impl_id = nfss->nfs_client->impl_id; + seq_printf(m, "\n\timpl_id:\tname='%s',domain='%s'," + "date='%llu,%u'", + impl_id->name, impl_id->domain, + impl_id->date.seconds, impl_id->date.nseconds); + } +} #else -static void show_pnfs(struct seq_file *m, struct nfs_server *server) {} +#ifdef CONFIG_NFS_V4 +static void show_pnfs(struct seq_file *m, struct nfs_server *server) +{ +} #endif +static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss) +{ +} #endif static int nfs_show_devname(struct seq_file *m, struct dentry *root) @@ -806,6 +852,8 @@ static int nfs_show_stats(struct seq_file *m, struct dentry *root) seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ); + show_implementation_id(m, nfss); + seq_printf(m, "\n\tcaps:\t"); seq_printf(m, "caps=0x%x", nfss->caps); seq_printf(m, ",wtmult=%u", nfss->wtmult); @@ -908,6 +956,7 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int ve data->auth_flavor_len = 1; data->version = version; data->minorversion = 0; + data->net = current->nsproxy->net_ns; security_init_mnt_opts(&data->lsm_opts); } return data; @@ -1052,6 +1101,40 @@ static int nfs_parse_security_flavors(char *value, return 1; } +static int nfs_parse_version_string(char *string, + struct nfs_parsed_mount_data *mnt, + substring_t *args) +{ + mnt->flags &= ~NFS_MOUNT_VER3; + switch (match_token(string, nfs_vers_tokens, args)) { + case Opt_vers_2: + mnt->version = 2; + break; + case Opt_vers_3: + mnt->flags |= NFS_MOUNT_VER3; + mnt->version = 3; + break; + case Opt_vers_4: + /* Backward compatibility option. In future, + * the mount program should always supply + * a NFSv4 minor version number. + */ + mnt->version = 4; + break; + case Opt_vers_4_0: + mnt->version = 4; + mnt->minorversion = 0; + break; + case Opt_vers_4_1: + mnt->version = 4; + mnt->minorversion = 1; + break; + default: + return 0; + } + return 1; +} + static int nfs_get_option_str(substring_t args[], char **option) { kfree(*option); @@ -1157,18 +1240,6 @@ static int nfs_parse_mount_options(char *raw, mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); break; - case Opt_v2: - mnt->flags &= ~NFS_MOUNT_VER3; - mnt->version = 2; - break; - case Opt_v3: - mnt->flags |= NFS_MOUNT_VER3; - mnt->version = 3; - break; - case Opt_v4: - mnt->flags &= ~NFS_MOUNT_VER3; - mnt->version = 4; - break; case Opt_udp: mnt->flags &= ~NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; @@ -1295,26 +1366,6 @@ static int nfs_parse_mount_options(char *raw, goto out_invalid_value; mnt->mount_server.version = option; break; - case Opt_nfsvers: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - switch (option) { - case NFS2_VERSION: - mnt->flags &= ~NFS_MOUNT_VER3; - mnt->version = 2; - break; - case NFS3_VERSION: - mnt->flags |= NFS_MOUNT_VER3; - mnt->version = 3; - break; - case NFS4_VERSION: - mnt->flags &= ~NFS_MOUNT_VER3; - mnt->version = 4; - break; - default: - goto out_invalid_value; - } - break; case Opt_minorversion: if (nfs_get_option_ul(args, &option)) goto out_invalid_value; @@ -1326,6 +1377,15 @@ static int nfs_parse_mount_options(char *raw, /* * options that take text values */ + case Opt_nfsvers: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = nfs_parse_version_string(string, mnt, args); + kfree(string); + if (!rc) + goto out_invalid_value; + break; case Opt_sec: string = match_strdup(args); if (string == NULL) @@ -1405,7 +1465,7 @@ static int nfs_parse_mount_options(char *raw, if (string == NULL) goto out_nomem; mnt->nfs_server.addrlen = - rpc_pton(string, strlen(string), + rpc_pton(mnt->net, string, strlen(string), (struct sockaddr *) &mnt->nfs_server.address, sizeof(mnt->nfs_server.address)); @@ -1427,7 +1487,7 @@ static int nfs_parse_mount_options(char *raw, if (string == NULL) goto out_nomem; mnt->mount_server.addrlen = - rpc_pton(string, strlen(string), + rpc_pton(mnt->net, string, strlen(string), (struct sockaddr *) &mnt->mount_server.address, sizeof(mnt->mount_server.address)); @@ -1516,6 +1576,9 @@ static int nfs_parse_mount_options(char *raw, if (!sloppy && invalid_option) return 0; + if (mnt->minorversion && mnt->version != 4) + goto out_minorversion_mismatch; + /* * verify that any proto=/mountproto= options match the address * familiies in the addr=/mountaddr= options. @@ -1549,6 +1612,10 @@ out_invalid_address: out_invalid_value: printk(KERN_INFO "NFS: bad mount option value specified: %s\n", p); return 0; +out_minorversion_mismatch: + printk(KERN_INFO "NFS: mount option vers=%u does not support " + "minorversion=%u\n", mnt->version, mnt->minorversion); + return 0; out_nomem: printk(KERN_INFO "NFS: not enough memory to parse option\n"); return 0; @@ -1622,6 +1689,7 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, .noresvport = args->flags & NFS_MOUNT_NORESVPORT, .auth_flav_len = &server_authlist_len, .auth_flavs = server_authlist, + .net = args->net, }; int status; @@ -2047,7 +2115,7 @@ static inline void nfs_initialise_sb(struct super_block *sb) /* We probably want something more informative here */ snprintf(sb->s_id, sizeof(sb->s_id), - "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev)); + "%u:%u", MAJOR(sb->s_dev), MINOR(sb->s_dev)); if (sb->s_blocksize == 0) sb->s_blocksize = nfs_block_bits(server->wsize, @@ -2499,12 +2567,6 @@ static int nfs4_validate_text_mount_data(void *options, return -EINVAL; } - if (args->client_address == NULL) { - dfprintk(MOUNT, - "NFS4: mount program didn't pass callback address\n"); - return -EINVAL; - } - return nfs_parse_devname(dev_name, &args->nfs_server.hostname, NFS4_MAXNAMLEN, @@ -2663,8 +2725,7 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags, if (!s->s_root) { /* initial superblock/root creation */ nfs4_fill_super(s); - nfs_fscache_get_super_cookie( - s, data ? data->fscache_uniq : NULL, NULL); + nfs_fscache_get_super_cookie(s, data->fscache_uniq, NULL); } mntroot = nfs4_get_root(s, mntfh, dev_name); diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c index 978aaeb..ad4d2e7 100644 --- a/fs/nfs/sysctl.c +++ b/fs/nfs/sysctl.c @@ -32,7 +32,6 @@ static ctl_table nfs_cb_sysctls[] = { .extra1 = (int *)&nfs_set_port_min, .extra2 = (int *)&nfs_set_port_max, }, -#ifndef CONFIG_NFS_USE_NEW_IDMAPPER { .procname = "idmap_cache_timeout", .data = &nfs_idmap_cache_timeout, @@ -40,7 +39,6 @@ static ctl_table nfs_cb_sysctls[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, -#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */ #endif { .procname = "nfs_mountpoint_timeout", diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 4f9319a..3210a03 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -20,15 +20,6 @@ #include "iostat.h" #include "delegation.h" -struct nfs_unlinkdata { - struct hlist_node list; - struct nfs_removeargs args; - struct nfs_removeres res; - struct inode *dir; - struct rpc_cred *cred; - struct nfs_fattr dir_attr; -}; - /** * nfs_free_unlinkdata - release data from a sillydelete operation. * @data: pointer to unlink structure. @@ -107,25 +98,16 @@ static void nfs_async_unlink_release(void *calldata) nfs_sb_deactive(sb); } -#if defined(CONFIG_NFS_V4_1) -void nfs_unlink_prepare(struct rpc_task *task, void *calldata) +static void nfs_unlink_prepare(struct rpc_task *task, void *calldata) { struct nfs_unlinkdata *data = calldata; - struct nfs_server *server = NFS_SERVER(data->dir); - - if (nfs4_setup_sequence(server, &data->args.seq_args, - &data->res.seq_res, 1, task)) - return; - rpc_call_start(task); + NFS_PROTO(data->dir)->unlink_rpc_prepare(task, data); } -#endif /* CONFIG_NFS_V4_1 */ static const struct rpc_call_ops nfs_unlink_ops = { .rpc_call_done = nfs_async_unlink_done, .rpc_release = nfs_async_unlink_release, -#if defined(CONFIG_NFS_V4_1) .rpc_call_prepare = nfs_unlink_prepare, -#endif /* CONFIG_NFS_V4_1 */ }; static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct nfs_unlinkdata *data) @@ -341,18 +323,6 @@ nfs_cancel_async_unlink(struct dentry *dentry) spin_unlock(&dentry->d_lock); } -struct nfs_renamedata { - struct nfs_renameargs args; - struct nfs_renameres res; - struct rpc_cred *cred; - struct inode *old_dir; - struct dentry *old_dentry; - struct nfs_fattr old_fattr; - struct inode *new_dir; - struct dentry *new_dentry; - struct nfs_fattr new_fattr; -}; - /** * nfs_async_rename_done - Sillyrename post-processing * @task: rpc_task of the sillyrename @@ -403,25 +373,16 @@ static void nfs_async_rename_release(void *calldata) kfree(data); } -#if defined(CONFIG_NFS_V4_1) static void nfs_rename_prepare(struct rpc_task *task, void *calldata) { struct nfs_renamedata *data = calldata; - struct nfs_server *server = NFS_SERVER(data->old_dir); - - if (nfs4_setup_sequence(server, &data->args.seq_args, - &data->res.seq_res, 1, task)) - return; - rpc_call_start(task); + NFS_PROTO(data->old_dir)->rename_rpc_prepare(task, data); } -#endif /* CONFIG_NFS_V4_1 */ static const struct rpc_call_ops nfs_rename_ops = { .rpc_call_done = nfs_async_rename_done, .rpc_release = nfs_async_rename_release, -#if defined(CONFIG_NFS_V4_1) .rpc_call_prepare = nfs_rename_prepare, -#endif /* CONFIG_NFS_V4_1 */ }; /** diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 834f0fe..2c68818 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -100,7 +100,6 @@ void nfs_writedata_free(struct nfs_write_data *p) void nfs_writedata_release(struct nfs_write_data *wdata) { - put_lseg(wdata->lseg); put_nfs_open_context(wdata->args.context); nfs_writedata_free(wdata); } @@ -236,10 +235,10 @@ static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblo req = nfs_page_find_request_locked(page); if (req == NULL) break; - if (nfs_set_page_tag_locked(req)) + if (nfs_lock_request_dontget(req)) break; /* Note: If we hold the page lock, as is the case in nfs_writepage, - * then the call to nfs_set_page_tag_locked() will always + * then the call to nfs_lock_request_dontget() will always * succeed provided that someone hasn't already marked the * request as dirty (in which case we don't care). */ @@ -375,21 +374,14 @@ out_err: /* * Insert a write request into an inode */ -static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) +static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) { struct nfs_inode *nfsi = NFS_I(inode); - int error; - - error = radix_tree_preload(GFP_NOFS); - if (error != 0) - goto out; /* Lock the request! */ nfs_lock_request_dontget(req); spin_lock(&inode->i_lock); - error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req); - BUG_ON(error); if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE)) inode->i_version++; set_bit(PG_MAPPED, &req->wb_flags); @@ -397,12 +389,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) set_page_private(req->wb_page, (unsigned long)req); nfsi->npages++; kref_get(&req->wb_kref); - radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, - NFS_PAGE_TAG_LOCKED); spin_unlock(&inode->i_lock); - radix_tree_preload_end(); -out: - return error; } /* @@ -419,7 +406,6 @@ static void nfs_inode_remove_request(struct nfs_page *req) set_page_private(req->wb_page, 0); ClearPagePrivate(req->wb_page); clear_bit(PG_MAPPED, &req->wb_flags); - radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); nfsi->npages--; spin_unlock(&inode->i_lock); nfs_release_request(req); @@ -432,39 +418,90 @@ nfs_mark_request_dirty(struct nfs_page *req) } #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -/* - * Add a request to the inode's commit list. +/** + * nfs_request_add_commit_list - add request to a commit list + * @req: pointer to a struct nfs_page + * @head: commit list head + * + * This sets the PG_CLEAN bit, updates the inode global count of + * number of outstanding requests requiring a commit as well as + * the MM page stats. + * + * The caller must _not_ hold the inode->i_lock, but must be + * holding the nfs_page lock. */ -static void -nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) +void +nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head) { struct inode *inode = req->wb_context->dentry->d_inode; - struct nfs_inode *nfsi = NFS_I(inode); - spin_lock(&inode->i_lock); set_bit(PG_CLEAN, &(req)->wb_flags); - radix_tree_tag_set(&nfsi->nfs_page_tree, - req->wb_index, - NFS_PAGE_TAG_COMMIT); - nfsi->ncommit++; + spin_lock(&inode->i_lock); + nfs_list_add_request(req, head); + NFS_I(inode)->ncommit++; spin_unlock(&inode->i_lock); - pnfs_mark_request_commit(req, lseg); inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); __mark_inode_dirty(inode, I_DIRTY_DATASYNC); } +EXPORT_SYMBOL_GPL(nfs_request_add_commit_list); -static int +/** + * nfs_request_remove_commit_list - Remove request from a commit list + * @req: pointer to a nfs_page + * + * This clears the PG_CLEAN bit, and updates the inode global count of + * number of outstanding requests requiring a commit + * It does not update the MM page stats. + * + * The caller _must_ hold the inode->i_lock and the nfs_page lock. + */ +void +nfs_request_remove_commit_list(struct nfs_page *req) +{ + struct inode *inode = req->wb_context->dentry->d_inode; + + if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) + return; + nfs_list_remove_request(req); + NFS_I(inode)->ncommit--; +} +EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list); + + +/* + * Add a request to the inode's commit list. + */ +static void +nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) +{ + struct inode *inode = req->wb_context->dentry->d_inode; + + if (pnfs_mark_request_commit(req, lseg)) + return; + nfs_request_add_commit_list(req, &NFS_I(inode)->commit_list); +} + +static void +nfs_clear_page_commit(struct page *page) +{ + dec_zone_page_state(page, NR_UNSTABLE_NFS); + dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE); +} + +static void nfs_clear_request_commit(struct nfs_page *req) { - struct page *page = req->wb_page; + if (test_bit(PG_CLEAN, &req->wb_flags)) { + struct inode *inode = req->wb_context->dentry->d_inode; - if (test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) { - dec_zone_page_state(page, NR_UNSTABLE_NFS); - dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE); - return 1; + if (!pnfs_clear_request_commit(req)) { + spin_lock(&inode->i_lock); + nfs_request_remove_commit_list(req); + spin_unlock(&inode->i_lock); + } + nfs_clear_page_commit(req->wb_page); } - return 0; } static inline @@ -491,15 +528,14 @@ int nfs_reschedule_unstable_write(struct nfs_page *req, return 0; } #else -static inline void +static void nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) { } -static inline int +static void nfs_clear_request_commit(struct nfs_page *req) { - return 0; } static inline @@ -520,46 +556,65 @@ int nfs_reschedule_unstable_write(struct nfs_page *req, static int nfs_need_commit(struct nfs_inode *nfsi) { - return radix_tree_tagged(&nfsi->nfs_page_tree, NFS_PAGE_TAG_COMMIT); + return nfsi->ncommit > 0; +} + +/* i_lock held by caller */ +static int +nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max, + spinlock_t *lock) +{ + struct nfs_page *req, *tmp; + int ret = 0; + + list_for_each_entry_safe(req, tmp, src, wb_list) { + if (!nfs_lock_request(req)) + continue; + if (cond_resched_lock(lock)) + list_safe_reset_next(req, tmp, wb_list); + nfs_request_remove_commit_list(req); + nfs_list_add_request(req, dst); + ret++; + if (ret == max) + break; + } + return ret; } /* * nfs_scan_commit - Scan an inode for commit requests * @inode: NFS inode to scan * @dst: destination list - * @idx_start: lower bound of page->index to scan. - * @npages: idx_start + npages sets the upper bound to scan. * * Moves requests from the inode's 'commit' request list. * The requests are *not* checked to ensure that they form a contiguous set. */ static int -nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) +nfs_scan_commit(struct inode *inode, struct list_head *dst) { struct nfs_inode *nfsi = NFS_I(inode); - int ret; - - if (!nfs_need_commit(nfsi)) - return 0; + int ret = 0; spin_lock(&inode->i_lock); - ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); - if (ret > 0) - nfsi->ncommit -= ret; - spin_unlock(&inode->i_lock); - - if (nfs_need_commit(NFS_I(inode))) - __mark_inode_dirty(inode, I_DIRTY_DATASYNC); + if (nfsi->ncommit > 0) { + const int max = INT_MAX; + ret = nfs_scan_commit_list(&nfsi->commit_list, dst, max, + &inode->i_lock); + ret += pnfs_scan_commit_lists(inode, max - ret, + &inode->i_lock); + } + spin_unlock(&inode->i_lock); return ret; } + #else static inline int nfs_need_commit(struct nfs_inode *nfsi) { return 0; } -static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) +static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst) { return 0; } @@ -604,7 +659,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, || end < req->wb_offset) goto out_flushme; - if (nfs_set_page_tag_locked(req)) + if (nfs_lock_request_dontget(req)) break; /* The request is locked, so wait and then retry */ @@ -616,13 +671,6 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, spin_lock(&inode->i_lock); } - if (nfs_clear_request_commit(req) && - radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, - req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) { - NFS_I(inode)->ncommit--; - pnfs_clear_request_commit(req); - } - /* Okay, the request matches. Update the region */ if (offset < req->wb_offset) { req->wb_offset = offset; @@ -634,6 +682,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, req->wb_bytes = rqend - req->wb_offset; out_unlock: spin_unlock(&inode->i_lock); + nfs_clear_request_commit(req); return req; out_flushme: spin_unlock(&inode->i_lock); @@ -655,7 +704,6 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, { struct inode *inode = page->mapping->host; struct nfs_page *req; - int error; req = nfs_try_to_update_request(inode, page, offset, bytes); if (req != NULL) @@ -663,11 +711,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, req = nfs_create_request(ctx, inode, page, offset, bytes); if (IS_ERR(req)) goto out; - error = nfs_inode_add_request(inode, req); - if (error != 0) { - nfs_release_request(req); - req = ERR_PTR(error); - } + nfs_inode_add_request(inode, req); out: return req; } @@ -684,7 +728,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, nfs_grow_file(page, offset, count); nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); nfs_mark_request_dirty(req); - nfs_clear_page_tag_locked(req); + nfs_unlock_request(req); return 0; } @@ -777,7 +821,7 @@ static void nfs_writepage_release(struct nfs_page *req, if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data)) nfs_inode_remove_request(req); - nfs_clear_page_tag_locked(req); + nfs_unlock_request(req); nfs_end_page_writeback(page); } @@ -925,7 +969,7 @@ static void nfs_redirty_request(struct nfs_page *req) struct page *page = req->wb_page; nfs_mark_request_dirty(req); - nfs_clear_page_tag_locked(req); + nfs_unlock_request(req); nfs_end_page_writeback(page); } @@ -1128,23 +1172,14 @@ out: nfs_writedata_release(calldata); } -#if defined(CONFIG_NFS_V4_1) void nfs_write_prepare(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; - - if (nfs4_setup_sequence(NFS_SERVER(data->inode), - &data->args.seq_args, - &data->res.seq_res, 1, task)) - return; - rpc_call_start(task); + NFS_PROTO(data->inode)->write_rpc_prepare(task, data); } -#endif /* CONFIG_NFS_V4_1 */ static const struct rpc_call_ops nfs_write_partial_ops = { -#if defined(CONFIG_NFS_V4_1) .rpc_call_prepare = nfs_write_prepare, -#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_writeback_done_partial, .rpc_release = nfs_writeback_release_partial, }; @@ -1199,16 +1234,14 @@ static void nfs_writeback_release_full(void *calldata) remove_request: nfs_inode_remove_request(req); next: - nfs_clear_page_tag_locked(req); + nfs_unlock_request(req); nfs_end_page_writeback(page); } nfs_writedata_release(calldata); } static const struct rpc_call_ops nfs_write_full_ops = { -#if defined(CONFIG_NFS_V4_1) .rpc_call_prepare = nfs_write_prepare, -#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_writeback_done_full, .rpc_release = nfs_writeback_release_full, }; @@ -1325,7 +1358,6 @@ void nfs_commitdata_release(void *data) { struct nfs_write_data *wdata = data; - put_lseg(wdata->lseg); put_nfs_open_context(wdata->args.context); nfs_commit_free(wdata); } @@ -1411,7 +1443,7 @@ void nfs_retry_commit(struct list_head *page_list, dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); dec_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); - nfs_clear_page_tag_locked(req); + nfs_unlock_request(req); } } EXPORT_SYMBOL_GPL(nfs_retry_commit); @@ -1460,7 +1492,7 @@ void nfs_commit_release_pages(struct nfs_write_data *data) while (!list_empty(&data->pages)) { req = nfs_list_entry(data->pages.next); nfs_list_remove_request(req); - nfs_clear_request_commit(req); + nfs_clear_page_commit(req->wb_page); dprintk("NFS: commit (%s/%lld %d@%lld)", req->wb_context->dentry->d_sb->s_id, @@ -1486,7 +1518,7 @@ void nfs_commit_release_pages(struct nfs_write_data *data) dprintk(" mismatch\n"); nfs_mark_request_dirty(req); next: - nfs_clear_page_tag_locked(req); + nfs_unlock_request(req); } } EXPORT_SYMBOL_GPL(nfs_commit_release_pages); @@ -1501,9 +1533,7 @@ static void nfs_commit_release(void *calldata) } static const struct rpc_call_ops nfs_commit_ops = { -#if defined(CONFIG_NFS_V4_1) .rpc_call_prepare = nfs_write_prepare, -#endif /* CONFIG_NFS_V4_1 */ .rpc_call_done = nfs_commit_done, .rpc_release = nfs_commit_release, }; @@ -1517,7 +1547,7 @@ int nfs_commit_inode(struct inode *inode, int how) res = nfs_commit_set_lock(NFS_I(inode), may_wait); if (res <= 0) goto out_mark_dirty; - res = nfs_scan_commit(inode, &head, 0, 0); + res = nfs_scan_commit(inode, &head); if (res) { int error; @@ -1635,6 +1665,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) if (req == NULL) break; if (nfs_lock_request_dontget(req)) { + nfs_clear_request_commit(req); nfs_inode_remove_request(req); /* * In case nfs_inode_remove_request has marked the diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 6f3ebb4..0e262f3 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -605,24 +605,24 @@ static struct rpc_version nfs_cb_version4 = { .procs = nfs4_cb_procedures }; -static struct rpc_version *nfs_cb_version[] = { +static const struct rpc_version *nfs_cb_version[] = { &nfs_cb_version4, }; -static struct rpc_program cb_program; +static const struct rpc_program cb_program; static struct rpc_stat cb_stats = { .program = &cb_program }; #define NFS4_CALLBACK 0x40000000 -static struct rpc_program cb_program = { +static const struct rpc_program cb_program = { .name = "nfs4_cb", .number = NFS4_CALLBACK, .nrvers = ARRAY_SIZE(nfs_cb_version), .version = nfs_cb_version, .stats = &cb_stats, - .pipe_dir_name = "/nfsd4_cb", + .pipe_dir_name = "nfsd4_cb", }; static int max_cb_time(void) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e8c98f0..c5cddd6 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1308,7 +1308,7 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_r else goto out_err; - conn->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val, + conn->cb_addrlen = rpc_uaddr2sockaddr(&init_net, se->se_callback_addr_val, se->se_callback_addr_len, (struct sockaddr *)&conn->cb_addr, sizeof(conn->cb_addr)); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 748eda9..64c24af 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -223,7 +223,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) if (qword_get(&buf, fo_path, size) < 0) return -EINVAL; - if (rpc_pton(fo_path, size, sap, salen) == 0) + if (rpc_pton(&init_net, fo_path, size, sap, salen) == 0) return -EINVAL; return nlmsvc_unlock_all_by_ip(sap); @@ -722,7 +722,7 @@ static ssize_t __write_ports_addxprt(char *buf) nfsd_serv->sv_nrthreads--; return 0; out_close: - xprt = svc_find_xprt(nfsd_serv, transport, PF_INET, port); + xprt = svc_find_xprt(nfsd_serv, transport, &init_net, PF_INET, port); if (xprt != NULL) { svc_close_xprt(xprt); svc_xprt_put(xprt); @@ -748,7 +748,7 @@ static ssize_t __write_ports_delxprt(char *buf) if (port < 1 || port > USHRT_MAX || nfsd_serv == NULL) return -EINVAL; - xprt = svc_find_xprt(nfsd_serv, transport, AF_UNSPEC, port); + xprt = svc_find_xprt(nfsd_serv, transport, &init_net, AF_UNSPEC, port); if (xprt == NULL) return -ENOTCONN; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index eda7d7e..fce472f 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -251,13 +251,13 @@ static void nfsd_shutdown(void) nfsd_up = false; } -static void nfsd_last_thread(struct svc_serv *serv) +static void nfsd_last_thread(struct svc_serv *serv, struct net *net) { /* When last nfsd thread exits we need to do some clean-up */ nfsd_serv = NULL; nfsd_shutdown(); - svc_rpcb_cleanup(serv); + svc_rpcb_cleanup(serv, net); printk(KERN_WARNING "nfsd: last server has exited, flushing export " "cache\n"); diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c index a2e2402..6d4521f 100644 --- a/fs/nfsd/stats.c +++ b/fs/nfsd/stats.c @@ -25,6 +25,7 @@ #include <linux/module.h> #include <linux/sunrpc/stats.h> #include <linux/nfsd/stats.h> +#include <net/net_namespace.h> #include "nfsd.h" @@ -94,11 +95,11 @@ static const struct file_operations nfsd_proc_fops = { void nfsd_stat_init(void) { - svc_proc_register(&nfsd_svcstats, &nfsd_proc_fops); + svc_proc_register(&init_net, &nfsd_svcstats, &nfsd_proc_fops); } void nfsd_stat_shutdown(void) { - svc_proc_unregister("nfsd"); + svc_proc_unregister(&init_net, "nfsd"); } |