diff options
43 files changed, 1583 insertions, 896 deletions
@@ -1664,30 +1664,6 @@ config NFS_V4 If unsure, say N. -config NFS_DIRECTIO - bool "Allow direct I/O on NFS files" - depends on NFS_FS - help - This option enables applications to perform uncached I/O on files - in NFS file systems using the O_DIRECT open() flag. When O_DIRECT - is set for a file, its data is not cached in the system's page - cache. Data is moved to and from user-level application buffers - directly. Unlike local disk-based file systems, NFS O_DIRECT has - no alignment restrictions. - - Unless your program is designed to use O_DIRECT properly, you are - much better off allowing the NFS client to manage data caching for - you. Misusing O_DIRECT can cause poor server performance or network - storms. This kernel build option defaults OFF to avoid exposing - system administrators unwittingly to a potentially hazardous - feature. - - For more details on NFS O_DIRECT, see fs/nfs/direct.c. - - If unsure, say N. This reduces the size of the NFS client, and - causes open() to return EINVAL if a file residing in NFS is - opened with the O_DIRECT flag. - config NFSD tristate "NFS server support" depends on INET @@ -1808,15 +1784,33 @@ config SUNRPC_XPRT_RDMA tristate depends on SUNRPC && INFINIBAND && EXPERIMENTAL default SUNRPC && INFINIBAND + help + This option enables an RPC client transport capability that + allows the NFS client to mount servers via an RDMA-enabled + transport. + + To compile RPC client RDMA transport support as a module, + choose M here: the module will be called xprtrdma. + + If unsure, say N. config SUNRPC_BIND34 bool "Support for rpcbind versions 3 & 4 (EXPERIMENTAL)" depends on SUNRPC && EXPERIMENTAL + default n help - Provides kernel support for querying rpcbind servers via versions 3 - and 4 of the rpcbind protocol. The kernel automatically falls back - to version 2 if a remote rpcbind service does not support versions - 3 or 4. + RPC requests over IPv6 networks require support for larger + addresses when performing an RPC bind. Sun added support for + IPv6 addressing by creating two new versions of the rpcbind + protocol (RFC 1833). + + This option enables support in the kernel RPC client for + querying rpcbind servers via versions 3 and 4 of the rpcbind + protocol. The kernel automatically falls back to version 2 + if a remote rpcbind service does not support versions 3 or 4. + By themselves, these new versions do not provide support for + RPC over IPv6, but the new protocol versions are necessary to + support it. If unsure, say N to get traditional behavior (version 2 rpcbind requests only). @@ -1830,12 +1824,13 @@ config RPCSEC_GSS_KRB5 select CRYPTO_DES select CRYPTO_CBC help - Provides for secure RPC calls by means of a gss-api - mechanism based on Kerberos V5. This is required for - NFSv4. + Choose Y here to enable Secure RPC using the Kerberos version 5 + GSS-API mechanism (RFC 1964). - Note: Requires an auxiliary userspace daemon which may be found on - http://www.citi.umich.edu/projects/nfsv4/ + Secure RPC calls with Kerberos require an auxiliary user-space + daemon which may be found in the Linux nfs-utils package + available from http://linux-nfs.org/. In addition, user-space + Kerberos support should be installed. If unsure, say N. @@ -1849,11 +1844,12 @@ config RPCSEC_GSS_SPKM3 select CRYPTO_CAST5 select CRYPTO_CBC help - Provides for secure RPC calls by means of a gss-api - mechanism based on the SPKM3 public-key mechanism. + Choose Y here to enable Secure RPC using the SPKM3 public key + GSS-API mechansim (RFC 2025). - Note: Requires an auxiliary userspace daemon which may be found on - http://www.citi.umich.edu/projects/nfsv4/ + Secure RPC calls with SPKM3 require an auxiliary userspace + daemon which may be found in the Linux nfs-utils package + available from http://linux-nfs.org/. If unsure, say N. diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index b6b74a6..40b16f2 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -155,8 +155,6 @@ static void nlmclnt_release_lockargs(struct nlm_rqst *req) int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl) { struct nlm_rqst *call; - sigset_t oldset; - unsigned long flags; int status; nlm_get_host(host); @@ -168,22 +166,6 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl) /* Set up the argument struct */ nlmclnt_setlockargs(call, fl); - /* Keep the old signal mask */ - spin_lock_irqsave(¤t->sighand->siglock, flags); - oldset = current->blocked; - - /* If we're cleaning up locks because the process is exiting, - * perform the RPC call asynchronously. */ - if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) - && fl->fl_type == F_UNLCK - && (current->flags & PF_EXITING)) { - sigfillset(¤t->blocked); /* Mask all signals */ - recalc_sigpending(); - - call->a_flags = RPC_TASK_ASYNC; - } - spin_unlock_irqrestore(¤t->sighand->siglock, flags); - if (IS_SETLK(cmd) || IS_SETLKW(cmd)) { if (fl->fl_type != F_UNLCK) { call->a_args.block = IS_SETLKW(cmd) ? 1 : 0; @@ -198,11 +180,6 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl) fl->fl_ops->fl_release_private(fl); fl->fl_ops = NULL; - spin_lock_irqsave(¤t->sighand->siglock, flags); - current->blocked = oldset; - recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, flags); - dprintk("lockd: clnt proc returns %d\n", status); return status; } @@ -221,6 +198,7 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host) for(;;) { call = kzalloc(sizeof(*call), GFP_KERNEL); if (call != NULL) { + atomic_set(&call->a_count, 1); locks_init_lock(&call->a_args.lock.fl); locks_init_lock(&call->a_res.lock.fl); call->a_host = host; @@ -237,6 +215,8 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host) void nlm_release_call(struct nlm_rqst *call) { + if (!atomic_dec_and_test(&call->a_count)) + return; nlm_release_host(call->a_host); nlmclnt_release_lockargs(call); kfree(call); @@ -267,7 +247,7 @@ static int nlm_wait_on_grace(wait_queue_head_t *queue) * Generic NLM call */ static int -nlmclnt_call(struct nlm_rqst *req, u32 proc) +nlmclnt_call(struct rpc_cred *cred, struct nlm_rqst *req, u32 proc) { struct nlm_host *host = req->a_host; struct rpc_clnt *clnt; @@ -276,6 +256,7 @@ nlmclnt_call(struct nlm_rqst *req, u32 proc) struct rpc_message msg = { .rpc_argp = argp, .rpc_resp = resp, + .rpc_cred = cred, }; int status; @@ -343,10 +324,16 @@ in_grace_period: /* * Generic NLM call, async version. */ -static int __nlm_async_call(struct nlm_rqst *req, u32 proc, struct rpc_message *msg, const struct rpc_call_ops *tk_ops) +static struct rpc_task *__nlm_async_call(struct nlm_rqst *req, u32 proc, struct rpc_message *msg, const struct rpc_call_ops *tk_ops) { struct nlm_host *host = req->a_host; struct rpc_clnt *clnt; + struct rpc_task_setup task_setup_data = { + .rpc_message = msg, + .callback_ops = tk_ops, + .callback_data = req, + .flags = RPC_TASK_ASYNC, + }; dprintk("lockd: call procedure %d on %s (async)\n", (int)proc, host->h_name); @@ -356,21 +343,36 @@ static int __nlm_async_call(struct nlm_rqst *req, u32 proc, struct rpc_message * if (clnt == NULL) goto out_err; msg->rpc_proc = &clnt->cl_procinfo[proc]; + task_setup_data.rpc_client = clnt; /* bootstrap and kick off the async RPC call */ - return rpc_call_async(clnt, msg, RPC_TASK_ASYNC, tk_ops, req); + return rpc_run_task(&task_setup_data); out_err: tk_ops->rpc_release(req); - return -ENOLCK; + return ERR_PTR(-ENOLCK); } +static int nlm_do_async_call(struct nlm_rqst *req, u32 proc, struct rpc_message *msg, const struct rpc_call_ops *tk_ops) +{ + struct rpc_task *task; + + task = __nlm_async_call(req, proc, msg, tk_ops); + if (IS_ERR(task)) + return PTR_ERR(task); + rpc_put_task(task); + return 0; +} + +/* + * NLM asynchronous call. + */ int nlm_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops) { struct rpc_message msg = { .rpc_argp = &req->a_args, .rpc_resp = &req->a_res, }; - return __nlm_async_call(req, proc, &msg, tk_ops); + return nlm_do_async_call(req, proc, &msg, tk_ops); } int nlm_async_reply(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops) @@ -378,7 +380,33 @@ int nlm_async_reply(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *t struct rpc_message msg = { .rpc_argp = &req->a_res, }; - return __nlm_async_call(req, proc, &msg, tk_ops); + return nlm_do_async_call(req, proc, &msg, tk_ops); +} + +/* + * NLM client asynchronous call. + * + * Note that although the calls are asynchronous, and are therefore + * guaranteed to complete, we still always attempt to wait for + * completion in order to be able to correctly track the lock + * state. + */ +static int nlmclnt_async_call(struct rpc_cred *cred, struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops) +{ + struct rpc_message msg = { + .rpc_argp = &req->a_args, + .rpc_resp = &req->a_res, + .rpc_cred = cred, + }; + struct rpc_task *task; + int err; + + task = __nlm_async_call(req, proc, &msg, tk_ops); + if (IS_ERR(task)) + return PTR_ERR(task); + err = rpc_wait_for_completion_task(task); + rpc_put_task(task); + return err; } /* @@ -389,7 +417,7 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl) { int status; - status = nlmclnt_call(req, NLMPROC_TEST); + status = nlmclnt_call(nfs_file_cred(fl->fl_file), req, NLMPROC_TEST); if (status < 0) goto out; @@ -480,10 +508,12 @@ static int do_vfs_lock(struct file_lock *fl) static int nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) { + struct rpc_cred *cred = nfs_file_cred(fl->fl_file); struct nlm_host *host = req->a_host; struct nlm_res *resp = &req->a_res; struct nlm_wait *block = NULL; unsigned char fl_flags = fl->fl_flags; + unsigned char fl_type; int status = -ENOLCK; if (nsm_monitor(host) < 0) { @@ -493,18 +523,22 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl) } fl->fl_flags |= FL_ACCESS; status = do_vfs_lock(fl); + fl->fl_flags = fl_flags; if (status < 0) goto out; block = nlmclnt_prepare_block(host, fl); again: + /* + * Initialise resp->status to a valid non-zero value, + * since 0 == nlm_lck_granted + */ + resp->status = nlm_lck_blocked; for(;;) { /* Reboot protection */ fl->fl_u.nfs_fl.state = host->h_state; - status = nlmclnt_call(req, NLMPROC_LOCK); + status = nlmclnt_call(cred, req, NLMPROC_LOCK); if (status < 0) - goto out_unblock; - if (!req->a_args.block) break; /* Did a reclaimer thread notify us of a server reboot? */ if (resp->status == nlm_lck_denied_grace_period) @@ -513,15 +547,22 @@ again: break; /* Wait on an NLM blocking lock */ status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT); - /* if we were interrupted. Send a CANCEL request to the server - * and exit - */ if (status < 0) - goto out_unblock; + break; if (resp->status != nlm_lck_blocked) break; } + /* if we were interrupted while blocking, then cancel the lock request + * and exit + */ + if (resp->status == nlm_lck_blocked) { + if (!req->a_args.block) + goto out_unlock; + if (nlmclnt_cancel(host, req->a_args.block, fl) == 0) + goto out_unblock; + } + if (resp->status == nlm_granted) { down_read(&host->h_rwsem); /* Check whether or not the server has rebooted */ @@ -530,20 +571,34 @@ again: goto again; } /* Ensure the resulting lock will get added to granted list */ - fl->fl_flags = fl_flags | FL_SLEEP; + fl->fl_flags |= FL_SLEEP; if (do_vfs_lock(fl) < 0) printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__); up_read(&host->h_rwsem); + fl->fl_flags = fl_flags; + status = 0; } + if (status < 0) + goto out_unlock; status = nlm_stat_to_errno(resp->status); out_unblock: nlmclnt_finish_block(block); - /* Cancel the blocked request if it is still pending */ - if (resp->status == nlm_lck_blocked) - nlmclnt_cancel(host, req->a_args.block, fl); out: nlm_release_call(req); + return status; +out_unlock: + /* Fatal error: ensure that we remove the lock altogether */ + dprintk("lockd: lock attempt ended in fatal error.\n" + " Attempting to unlock.\n"); + nlmclnt_finish_block(block); + fl_type = fl->fl_type; + fl->fl_type = F_UNLCK; + down_read(&host->h_rwsem); + do_vfs_lock(fl); + up_read(&host->h_rwsem); + fl->fl_type = fl_type; fl->fl_flags = fl_flags; + nlmclnt_async_call(cred, req, NLMPROC_UNLOCK, &nlmclnt_unlock_ops); return status; } @@ -567,8 +622,8 @@ nlmclnt_reclaim(struct nlm_host *host, struct file_lock *fl) nlmclnt_setlockargs(req, fl); req->a_args.reclaim = 1; - if ((status = nlmclnt_call(req, NLMPROC_LOCK)) >= 0 - && req->a_res.status == nlm_granted) + status = nlmclnt_call(nfs_file_cred(fl->fl_file), req, NLMPROC_LOCK); + if (status >= 0 && req->a_res.status == nlm_granted) return 0; printk(KERN_WARNING "lockd: failed to reclaim lock for pid %d " @@ -598,7 +653,8 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl) { struct nlm_host *host = req->a_host; struct nlm_res *resp = &req->a_res; - int status = 0; + int status; + unsigned char fl_flags = fl->fl_flags; /* * Note: the server is supposed to either grant us the unlock @@ -607,16 +663,17 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl) */ fl->fl_flags |= FL_EXISTS; down_read(&host->h_rwsem); - if (do_vfs_lock(fl) == -ENOENT) { - up_read(&host->h_rwsem); + status = do_vfs_lock(fl); + up_read(&host->h_rwsem); + fl->fl_flags = fl_flags; + if (status == -ENOENT) { + status = 0; goto out; } - up_read(&host->h_rwsem); - - if (req->a_flags & RPC_TASK_ASYNC) - return nlm_async_call(req, NLMPROC_UNLOCK, &nlmclnt_unlock_ops); - status = nlmclnt_call(req, NLMPROC_UNLOCK); + atomic_inc(&req->a_count); + status = nlmclnt_async_call(nfs_file_cred(fl->fl_file), req, + NLMPROC_UNLOCK, &nlmclnt_unlock_ops); if (status < 0) goto out; @@ -671,16 +728,10 @@ static const struct rpc_call_ops nlmclnt_unlock_ops = { static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl) { struct nlm_rqst *req; - unsigned long flags; - sigset_t oldset; - int status; + int status; - /* Block all signals while setting up call */ - spin_lock_irqsave(¤t->sighand->siglock, flags); - oldset = current->blocked; - sigfillset(¤t->blocked); - recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, flags); + dprintk("lockd: blocking lock attempt was interrupted by a signal.\n" + " Attempting to cancel lock.\n"); req = nlm_alloc_call(nlm_get_host(host)); if (!req) @@ -690,13 +741,12 @@ static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl nlmclnt_setlockargs(req, fl); req->a_args.block = block; - status = nlm_async_call(req, NLMPROC_CANCEL, &nlmclnt_cancel_ops); - - spin_lock_irqsave(¤t->sighand->siglock, flags); - current->blocked = oldset; - recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, flags); - + atomic_inc(&req->a_count); + status = nlmclnt_async_call(nfs_file_cred(fl->fl_file), req, + NLMPROC_CANCEL, &nlmclnt_cancel_ops); + if (status == 0 && req->a_res.status == nlm_lck_denied) + status = -ENOLCK; + nlm_release_call(req); return status; } diff --git a/fs/lockd/host.c b/fs/lockd/host.c index f1ef49f..f23750d 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -42,11 +42,12 @@ static struct nsm_handle * nsm_find(const struct sockaddr_in *sin, /* * Common host lookup routine for server & client */ -static struct nlm_host * -nlm_lookup_host(int server, const struct sockaddr_in *sin, - int proto, int version, const char *hostname, - unsigned int hostname_len, - const struct sockaddr_in *ssin) +static struct nlm_host *nlm_lookup_host(int server, + const struct sockaddr_in *sin, + int proto, u32 version, + const char *hostname, + unsigned int hostname_len, + const struct sockaddr_in *ssin) { struct hlist_head *chain; struct hlist_node *pos; @@ -55,7 +56,7 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin, int hash; dprintk("lockd: nlm_lookup_host("NIPQUAD_FMT"->"NIPQUAD_FMT - ", p=%d, v=%d, my role=%s, name=%.*s)\n", + ", p=%d, v=%u, my role=%s, name=%.*s)\n", NIPQUAD(ssin->sin_addr.s_addr), NIPQUAD(sin->sin_addr.s_addr), proto, version, server? "server" : "client", @@ -175,9 +176,10 @@ nlm_destroy_host(struct nlm_host *host) /* * Find an NLM server handle in the cache. If there is none, create it. */ -struct nlm_host * -nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version, - const char *hostname, unsigned int hostname_len) +struct nlm_host *nlmclnt_lookup_host(const struct sockaddr_in *sin, + int proto, u32 version, + const char *hostname, + unsigned int hostname_len) { struct sockaddr_in ssin = {0}; diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 908b23f..e4d5635 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -18,6 +18,8 @@ #define NLMDBG_FACILITY NLMDBG_MONITOR +#define XDR_ADDRBUF_LEN (20) + static struct rpc_clnt * nsm_create(void); static struct rpc_program nsm_program; @@ -147,28 +149,55 @@ nsm_create(void) /* * XDR functions for NSM. + * + * See http://www.opengroup.org/ for details on the Network + * Status Monitor wire protocol. */ -static __be32 * -xdr_encode_common(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp) +static __be32 *xdr_encode_nsm_string(__be32 *p, char *string) { - char buffer[20], *name; - - /* - * Use the dotted-quad IP address of the remote host as - * identifier. Linux statd always looks up the canonical - * hostname first for whatever remote hostname it receives, - * so this works alright. - */ - if (nsm_use_hostnames) { - name = argp->mon_name; - } else { - sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(argp->addr)); + size_t len = strlen(string); + + if (len > SM_MAXSTRLEN) + len = SM_MAXSTRLEN; + return xdr_encode_opaque(p, string, len); +} + +/* + * "mon_name" specifies the host to be monitored. + * + * Linux uses a text version of the IP address of the remote + * host as the host identifier (the "mon_name" argument). + * + * Linux statd always looks up the canonical hostname first for + * whatever remote hostname it receives, so this works alright. + */ +static __be32 *xdr_encode_mon_name(__be32 *p, struct nsm_args *argp) +{ + char buffer[XDR_ADDRBUF_LEN + 1]; + char *name = argp->mon_name; + + if (!nsm_use_hostnames) { + snprintf(buffer, XDR_ADDRBUF_LEN, + NIPQUAD_FMT, NIPQUAD(argp->addr)); name = buffer; } - if (!(p = xdr_encode_string(p, name)) - || !(p = xdr_encode_string(p, utsname()->nodename))) + + return xdr_encode_nsm_string(p, name); +} + +/* + * The "my_id" argument specifies the hostname and RPC procedure + * to be called when the status manager receives notification + * (via the SM_NOTIFY call) that the state of host "mon_name" + * has changed. + */ +static __be32 *xdr_encode_my_id(__be32 *p, struct nsm_args *argp) +{ + p = xdr_encode_nsm_string(p, utsname()->nodename); + if (!p) return ERR_PTR(-EIO); + *p++ = htonl(argp->prog); *p++ = htonl(argp->vers); *p++ = htonl(argp->proc); @@ -176,18 +205,48 @@ xdr_encode_common(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp) return p; } -static int -xdr_encode_mon(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp) +/* + * The "mon_id" argument specifies the non-private arguments + * of an SM_MON or SM_UNMON call. + */ +static __be32 *xdr_encode_mon_id(__be32 *p, struct nsm_args *argp) { - p = xdr_encode_common(rqstp, p, argp); - if (IS_ERR(p)) - return PTR_ERR(p); + p = xdr_encode_mon_name(p, argp); + if (!p) + return ERR_PTR(-EIO); - /* Surprise - there may even be room for an IPv6 address now */ + return xdr_encode_my_id(p, argp); +} + +/* + * The "priv" argument may contain private information required + * by the SM_MON call. This information will be supplied in the + * SM_NOTIFY call. + * + * Linux provides the raw IP address of the monitored host, + * left in network byte order. + */ +static __be32 *xdr_encode_priv(__be32 *p, struct nsm_args *argp) +{ *p++ = argp->addr; *p++ = 0; *p++ = 0; *p++ = 0; + + return p; +} + +static int +xdr_encode_mon(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp) +{ + p = xdr_encode_mon_id(p, argp); + if (IS_ERR(p)) + return PTR_ERR(p); + + p = xdr_encode_priv(p, argp); + if (IS_ERR(p)) + return PTR_ERR(p); + rqstp->rq_slen = xdr_adjust_iovec(rqstp->rq_svec, p); return 0; } @@ -195,7 +254,7 @@ xdr_encode_mon(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp) static int xdr_encode_unmon(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp) { - p = xdr_encode_common(rqstp, p, argp); + p = xdr_encode_mon_id(p, argp); if (IS_ERR(p)) return PTR_ERR(p); rqstp->rq_slen = xdr_adjust_iovec(rqstp->rq_svec, p); @@ -220,9 +279,11 @@ xdr_decode_stat(struct rpc_rqst *rqstp, __be32 *p, struct nsm_res *resp) } #define SM_my_name_sz (1+XDR_QUADLEN(SM_MAXSTRLEN)) -#define SM_my_id_sz (3+1+SM_my_name_sz) -#define SM_mon_id_sz (1+XDR_QUADLEN(20)+SM_my_id_sz) -#define SM_mon_sz (SM_mon_id_sz+4) +#define SM_my_id_sz (SM_my_name_sz+3) +#define SM_mon_name_sz (1+XDR_QUADLEN(SM_MAXSTRLEN)) +#define SM_mon_id_sz (SM_mon_name_sz+SM_my_id_sz) +#define SM_priv_sz (XDR_QUADLEN(SM_PRIV_SIZE)) +#define SM_mon_sz (SM_mon_id_sz+SM_priv_sz) #define SM_monres_sz 2 #define SM_unmonres_sz 1 diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 1ed8bd4..38c2f0b 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -74,7 +74,9 @@ static const unsigned long nlm_timeout_min = 3; static const unsigned long nlm_timeout_max = 20; static const int nlm_port_min = 0, nlm_port_max = 65535; +#ifdef CONFIG_SYSCTL static struct ctl_table_header * nlm_sysctl_table; +#endif static unsigned long get_lockd_grace_period(void) { @@ -359,6 +361,8 @@ out: } EXPORT_SYMBOL(lockd_down); +#ifdef CONFIG_SYSCTL + /* * Sysctl parameters (same as module parameters, different interface). */ @@ -443,6 +447,8 @@ static ctl_table nlm_sysctl_root[] = { { .ctl_name = 0 } }; +#endif /* CONFIG_SYSCTL */ + /* * Module (and sysfs) parameters. */ @@ -516,15 +522,21 @@ module_param(nsm_use_hostnames, bool, 0644); static int __init init_nlm(void) { +#ifdef CONFIG_SYSCTL nlm_sysctl_table = register_sysctl_table(nlm_sysctl_root); return nlm_sysctl_table ? 0 : -ENOMEM; +#else + return 0; +#endif } static void __exit exit_nlm(void) { /* FIXME: delete all NLM clients */ nlm_shutdown_hosts(); +#ifdef CONFIG_SYSCTL unregister_sysctl_table(nlm_sysctl_table); +#endif } module_init(init_nlm); diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index df0f41e..ac6170c 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_NFS_FS) += nfs.o nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \ - pagelist.o proc.o read.o symlink.o unlink.o \ + direct.o pagelist.o proc.o read.o symlink.o unlink.o \ write.o namespace.o mount_clnt.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o @@ -14,5 +14,4 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ delegation.o idmap.o \ callback.o callback_xdr.o callback_proc.o \ nfs4namespace.o -nfs-$(CONFIG_NFS_DIRECTIO) += direct.o nfs-$(CONFIG_SYSCTL) += sysctl.o diff --git a/fs/nfs/client.c b/fs/nfs/client.c index c5c0175..f2f3b28 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -112,6 +112,7 @@ struct nfs_client_initdata { static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) { struct nfs_client *clp; + struct rpc_cred *cred; if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) goto error_0; @@ -150,6 +151,9 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ clp->cl_boot_time = CURRENT_TIME; clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; #endif + cred = rpc_lookup_machine_cred(); + if (!IS_ERR(cred)) + clp->cl_machine_cred = cred; return clp; @@ -170,6 +174,8 @@ static void nfs4_shutdown_client(struct nfs_client *clp) BUG_ON(!RB_EMPTY_ROOT(&clp->cl_state_owners)); if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state)) nfs_idmap_delete(clp); + + rpc_destroy_wait_queue(&clp->cl_rpcwaitq); #endif } @@ -189,6 +195,9 @@ static void nfs_free_client(struct nfs_client *clp) if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) nfs_callback_down(); + if (clp->cl_machine_cred != NULL) + put_rpccred(clp->cl_machine_cred); + kfree(clp->cl_hostname); kfree(clp); @@ -680,10 +689,22 @@ static int nfs_init_server(struct nfs_server *server, if (error < 0) goto error; + server->port = data->nfs_server.port; + error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]); if (error < 0) goto error; + /* Preserve the values of mount_server-related mount options */ + if (data->mount_server.addrlen) { + memcpy(&server->mountd_address, &data->mount_server.address, + data->mount_server.addrlen); + server->mountd_addrlen = data->mount_server.addrlen; + } + server->mountd_version = data->mount_server.version; + server->mountd_port = data->mount_server.port; + server->mountd_protocol = data->mount_server.protocol; + server->namelen = data->namlen; /* Create a client RPC handle for the NFSv3 ACL management interface */ nfs_init_server_aclclient(server); @@ -1062,6 +1083,8 @@ static int nfs4_init_server(struct nfs_server *server, server->acdirmin = data->acdirmin * HZ; server->acdirmax = data->acdirmax * HZ; + server->port = data->nfs_server.port; + error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]); error: diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index d9e30ac..f288b3e 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1967,7 +1967,7 @@ force_lookup: if (!NFS_PROTO(inode)->access) goto out_notsup; - cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0); + cred = rpc_lookup_cred(); if (!IS_ERR(cred)) { res = nfs_do_access(inode, cred, mask); put_rpccred(cred); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 16844f9..4757a2b 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -229,14 +229,20 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) static void nfs_direct_read_result(struct rpc_task *task, void *calldata) { struct nfs_read_data *data = calldata; - struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; - if (nfs_readpage_result(task, data) != 0) - return; + nfs_readpage_result(task, data); +} + +static void nfs_direct_read_release(void *calldata) +{ + + struct nfs_read_data *data = calldata; + struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; + int status = data->task.tk_status; spin_lock(&dreq->lock); - if (unlikely(task->tk_status < 0)) { - dreq->error = task->tk_status; + if (unlikely(status < 0)) { + dreq->error = status; spin_unlock(&dreq->lock); } else { dreq->count += data->res.count; @@ -249,11 +255,12 @@ static void nfs_direct_read_result(struct rpc_task *task, void *calldata) if (put_dreq(dreq)) nfs_direct_complete(dreq); + nfs_readdata_release(calldata); } static const struct rpc_call_ops nfs_read_direct_ops = { .rpc_call_done = nfs_direct_read_result, - .rpc_release = nfs_readdata_release, + .rpc_release = nfs_direct_read_release, }; /* @@ -280,6 +287,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, .rpc_client = NFS_CLIENT(inode), .rpc_message = &msg, .callback_ops = &nfs_read_direct_ops, + .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC, }; unsigned int pgbase; @@ -323,7 +331,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, data->inode = inode; data->cred = msg.rpc_cred; data->args.fh = NFS_FH(inode); - data->args.context = ctx; + data->args.context = get_nfs_open_context(ctx); data->args.offset = pos; data->args.pgbase = pgbase; data->args.pages = data->pagevec; @@ -339,8 +347,9 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, NFS_PROTO(inode)->read_setup(data, &msg); task = rpc_run_task(&task_setup_data); - if (!IS_ERR(task)) - rpc_put_task(task); + if (IS_ERR(task)) + break; + rpc_put_task(task); dprintk("NFS: %5u initiated direct read call " "(req %s/%Ld, %zu bytes @ offset %Lu)\n", @@ -446,6 +455,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) struct rpc_task_setup task_setup_data = { .rpc_client = NFS_CLIENT(inode), .callback_ops = &nfs_write_direct_ops, + .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC, }; @@ -499,27 +509,34 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) static void nfs_direct_commit_result(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; - struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; /* Call the NFS version-specific code */ - if (NFS_PROTO(data->inode)->commit_done(task, data) != 0) - return; - if (unlikely(task->tk_status < 0)) { + NFS_PROTO(data->inode)->commit_done(task, data); +} + +static void nfs_direct_commit_release(void *calldata) +{ + struct nfs_write_data *data = calldata; + struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; + int status = data->task.tk_status; + + if (status < 0) { dprintk("NFS: %5u commit failed with error %d.\n", - task->tk_pid, task->tk_status); + data->task.tk_pid, status); dreq->flags = NFS_ODIRECT_RESCHED_WRITES; } else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) { - dprintk("NFS: %5u commit verify failed\n", task->tk_pid); + dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid); dreq->flags = NFS_ODIRECT_RESCHED_WRITES; } - dprintk("NFS: %5u commit returned %d\n", task->tk_pid, task->tk_status); + dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status); nfs_direct_write_complete(dreq, data->inode); + nfs_commitdata_release(calldata); } static const struct rpc_call_ops nfs_commit_direct_ops = { .rpc_call_done = nfs_direct_commit_result, - .rpc_release = nfs_commit_release, + .rpc_release = nfs_direct_commit_release, }; static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) @@ -537,6 +554,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) .rpc_message = &msg, .callback_ops = &nfs_commit_direct_ops, .callback_data = data, + .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC, }; @@ -546,6 +564,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) data->args.fh = NFS_FH(data->inode); data->args.offset = 0; data->args.count = 0; + data->args.context = get_nfs_open_context(dreq->ctx); data->res.count = 0; data->res.fattr = &data->fattr; data->res.verf = &data->verf; @@ -585,7 +604,7 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode static void nfs_alloc_commit_data(struct nfs_direct_req *dreq) { - dreq->commit_data = nfs_commit_alloc(); + dreq->commit_data = nfs_commitdata_alloc(); if (dreq->commit_data != NULL) dreq->commit_data->req = (struct nfs_page *) dreq; } @@ -606,11 +625,20 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode static void nfs_direct_write_result(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; - struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; - int status = task->tk_status; if (nfs_writeback_done(task, data) != 0) return; +} + +/* + * NB: Return the value of the first error return code. Subsequent + * errors after the first one are ignored. + */ +static void nfs_direct_write_release(void *calldata) +{ + struct nfs_write_data *data = calldata; + struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; + int status = data->task.tk_status; spin_lock(&dreq->lock); @@ -632,23 +660,13 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata) break; case NFS_ODIRECT_DO_COMMIT: if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) { - dprintk("NFS: %5u write verify failed\n", task->tk_pid); + dprintk("NFS: %5u write verify failed\n", data->task.tk_pid); dreq->flags = NFS_ODIRECT_RESCHED_WRITES; } } } out_unlock: spin_unlock(&dreq->lock); -} - -/* - * NB: Return the value of the first error return code. Subsequent - * errors after the first one are ignored. - */ -static void nfs_direct_write_release(void *calldata) -{ - struct nfs_write_data *data = calldata; - struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; if (put_dreq(dreq)) nfs_direct_write_complete(dreq, data->inode); @@ -682,6 +700,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, .rpc_client = NFS_CLIENT(inode), .rpc_message = &msg, .callback_ops = &nfs_write_direct_ops, + .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC, }; size_t wsize = NFS_SERVER(inode)->wsize; @@ -728,7 +747,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, data->inode = inode; data->cred = msg.rpc_cred; data->args.fh = NFS_FH(inode); - data->args.context = ctx; + data->args.context = get_nfs_open_context(ctx); data->args.offset = pos; data->args.pgbase = pgbase; data->args.pages = data->pagevec; @@ -745,8 +764,9 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, NFS_PROTO(inode)->write_setup(data, &msg); task = rpc_run_task(&task_setup_data); - if (!IS_ERR(task)) - rpc_put_task(task); + if (IS_ERR(task)) + break; + rpc_put_task(task); dprintk("NFS: %5u initiated direct write call " "(req %s/%Ld, %zu bytes @ offset %Lu)\n", diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 5d2e9d9..3536b01 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -238,10 +238,8 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov, ssize_t result; size_t count = iov_length(iov, nr_segs); -#ifdef CONFIG_NFS_DIRECTIO if (iocb->ki_filp->f_flags & O_DIRECT) return nfs_file_direct_read(iocb, iov, nr_segs, pos); -#endif dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n", dentry->d_parent->d_name.name, dentry->d_name.name, @@ -387,9 +385,7 @@ const struct address_space_operations nfs_file_aops = { .write_end = nfs_write_end, .invalidatepage = nfs_invalidate_page, .releasepage = nfs_release_page, -#ifdef CONFIG_NFS_DIRECTIO .direct_IO = nfs_direct_IO, -#endif .launder_page = nfs_launder_page, }; @@ -447,10 +443,8 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, ssize_t result; size_t count = iov_length(iov, nr_segs); -#ifdef CONFIG_NFS_DIRECTIO if (iocb->ki_filp->f_flags & O_DIRECT) return nfs_file_direct_write(iocb, iov, nr_segs, pos); -#endif dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%Ld)\n", dentry->d_parent->d_name.name, dentry->d_name.name, @@ -576,17 +570,9 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl) lock_kernel(); /* Use local locking if mounted with "-onolock" */ - if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) { + if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) status = NFS_PROTO(inode)->lock(filp, cmd, fl); - /* If we were signalled we still need to ensure that - * we clean up any state on the server. We therefore - * record the lock call as having succeeded in order to - * ensure that locks_remove_posix() cleans it out when - * the process exits. - */ - if (status == -EINTR || status == -ERESTARTSYS) - do_vfs_lock(filp, fl); - } else + else status = do_vfs_lock(filp, fl); unlock_kernel(); if (status < 0) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 6f88d7c..5cb3345 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -523,8 +523,12 @@ struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) static void __put_nfs_open_context(struct nfs_open_context *ctx, int wait) { - struct inode *inode = ctx->path.dentry->d_inode; + struct inode *inode; + if (ctx == NULL) + return; + + inode = ctx->path.dentry->d_inode; if (!atomic_dec_and_lock(&ctx->count, &inode->i_lock)) return; list_del(&ctx->list); @@ -610,7 +614,7 @@ int nfs_open(struct inode *inode, struct file *filp) struct nfs_open_context *ctx; struct rpc_cred *cred; - cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0); + cred = rpc_lookup_cred(); if (IS_ERR(cred)) return PTR_ERR(cred); ctx = alloc_nfs_open_context(filp->f_path.mnt, filp->f_path.dentry, cred); @@ -1218,6 +1222,36 @@ static void nfs_destroy_inodecache(void) kmem_cache_destroy(nfs_inode_cachep); } +struct workqueue_struct *nfsiod_workqueue; + +/* + * start up the nfsiod workqueue + */ +static int nfsiod_start(void) +{ + struct workqueue_struct *wq; + dprintk("RPC: creating workqueue nfsiod\n"); + wq = create_singlethread_workqueue("nfsiod"); + if (wq == NULL) + return -ENOMEM; + nfsiod_workqueue = wq; + return 0; +} + +/* + * Destroy the nfsiod workqueue + */ +static void nfsiod_stop(void) +{ + struct workqueue_struct *wq; + + wq = nfsiod_workqueue; + if (wq == NULL) + return; + nfsiod_workqueue = NULL; + destroy_workqueue(wq); +} + /* * Initialize NFS */ @@ -1225,6 +1259,10 @@ static int __init init_nfs_fs(void) { int err; + err = nfsiod_start(); + if (err) + goto out6; + err = nfs_fs_proc_init(); if (err) goto out5; @@ -1271,6 +1309,8 @@ out3: out4: nfs_fs_proc_exit(); out5: + nfsiod_stop(); +out6: return err; } @@ -1286,6 +1326,7 @@ static void __exit exit_nfs_fs(void) #endif unregister_nfs_fs(); nfs_fs_proc_exit(); + nfsiod_stop(); } /* Not quite true; I just maintain it */ diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 9319927..04ae867 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -46,9 +46,9 @@ struct nfs_parsed_mount_data { struct sockaddr_storage address; size_t addrlen; char *hostname; - unsigned int version; + u32 version; unsigned short port; - int protocol; + unsigned short protocol; } mount_server; struct { @@ -56,7 +56,8 @@ struct nfs_parsed_mount_data { size_t addrlen; char *hostname; char *export_path; - int protocol; + unsigned short port; + unsigned short protocol; } nfs_server; struct security_mnt_opts lsm_opts; @@ -115,13 +116,8 @@ extern void nfs_destroy_readpagecache(void); extern int __init nfs_init_writepagecache(void); extern void nfs_destroy_writepagecache(void); -#ifdef CONFIG_NFS_DIRECTIO extern int __init nfs_init_directcache(void); extern void nfs_destroy_directcache(void); -#else -#define nfs_init_directcache() (0) -#define nfs_destroy_directcache() do {} while(0) -#endif /* nfs2xdr.c */ extern int nfs_stat_to_errno(int); @@ -146,6 +142,7 @@ extern struct rpc_procinfo nfs4_procedures[]; extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask); /* inode.c */ +extern struct workqueue_struct *nfsiod_workqueue; extern struct inode *nfs_alloc_inode(struct super_block *sb); extern void nfs_destroy_inode(struct inode *); extern int nfs_write_inode(struct inode *,int); diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 607f6eb..af4d0f1 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -20,7 +20,7 @@ static void nfs_expire_automounts(struct work_struct *work); -LIST_HEAD(nfs_automount_list); +static LIST_HEAD(nfs_automount_list); static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts); int nfs_mountpoint_expiry_timeout = 500 * HZ; diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 1f7ea67..28bab67 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -267,7 +267,7 @@ nfs_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) int status; if ((status = ntohl(*p++))) - return -nfs_stat_to_errno(status); + return nfs_stat_to_errno(status); p = xdr_decode_fattr(p, res->fattr); count = ntohl(*p++); @@ -428,11 +428,11 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy) size_t hdrlen; unsigned int pglen, recvd; u32 len; - int status, nr; + int status, nr = 0; __be32 *end, *entry, *kaddr; if ((status = ntohl(*p++))) - return -nfs_stat_to_errno(status); + return nfs_stat_to_errno(status); hdrlen = (u8 *) p - (u8 *) iov->iov_base; if (iov->iov_len < hdrlen) { @@ -452,7 +452,12 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy) kaddr = p = kmap_atomic(*page, KM_USER0); end = (__be32 *)((char *)p + pglen); entry = p; - for (nr = 0; *p++; nr++) { + + /* Make sure the packet actually has a value_follows and EOF entry */ + if ((entry + 1) > end) + goto short_pkt; + + for (; *p++; nr++) { if (p + 2 > end) goto short_pkt; p++; /* fileid */ @@ -467,18 +472,32 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy) goto short_pkt; entry = p; } - if (!nr && (entry[0] != 0 || entry[1] == 0)) - goto short_pkt; + + /* + * Apparently some server sends responses that are a valid size, but + * contain no entries, and have value_follows==0 and EOF==0. For + * those, just set the EOF marker. + */ + if (!nr && entry[1] == 0) { + dprintk("NFS: readdir reply truncated!\n"); + entry[1] = 1; + } out: kunmap_atomic(kaddr, KM_USER0); return nr; short_pkt: + /* + * When we get a short packet there are 2 possibilities. We can + * return an error, or fix up the response to look like a valid + * response and return what we have so far. If there are no + * entries and the packet was short, then return -EIO. If there + * are valid entries in the response, return them and pretend that + * the call was successful, but incomplete. The caller can retry the + * readdir starting at the last cookie. + */ entry[0] = entry[1] = 0; - /* truncate listing ? */ - if (!nr) { - dprintk("NFS: readdir reply truncated!\n"); - entry[1] = 1; - } + if (!nr) + nr = -errno_NFSERR_IO; goto out; err_unmap: nr = -errno_NFSERR_IO; @@ -518,7 +537,7 @@ nfs_xdr_stat(struct rpc_rqst *req, __be32 *p, void *dummy) int status; if ((status = ntohl(*p++)) != 0) - status = -nfs_stat_to_errno(status); + status = nfs_stat_to_errno(status); return status; } @@ -532,7 +551,7 @@ nfs_xdr_attrstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) int status; if ((status = ntohl(*p++))) - return -nfs_stat_to_errno(status); + return nfs_stat_to_errno(status); xdr_decode_fattr(p, fattr); return 0; } @@ -547,7 +566,7 @@ nfs_xdr_diropres(struct rpc_rqst *req, __be32 *p, struct nfs_diropok *res) int status; if ((status = ntohl(*p++))) - return -nfs_stat_to_errno(status); + return nfs_stat_to_errno(status); p = xdr_decode_fhandle(p, res->fh); xdr_decode_fattr(p, res->fattr); return 0; @@ -585,7 +604,7 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy) int status; if ((status = ntohl(*p++))) - return -nfs_stat_to_errno(status); + return nfs_stat_to_errno(status); /* Convert length of symlink */ len = ntohl(*p++); if (len >= rcvbuf->page_len) { @@ -634,7 +653,7 @@ nfs_xdr_statfsres(struct rpc_rqst *req, __be32 *p, struct nfs2_fsstat *res) int status; if ((status = ntohl(*p++))) - return -nfs_stat_to_errno(status); + return nfs_stat_to_errno(status); res->tsize = ntohl(*p++); res->bsize = ntohl(*p++); @@ -653,39 +672,39 @@ static struct { int errno; } nfs_errtbl[] = { { NFS_OK, 0 }, - { NFSERR_PERM, EPERM }, - { NFSERR_NOENT, ENOENT }, - { NFSERR_IO, errno_NFSERR_IO }, - { NFSERR_NXIO, ENXIO }, -/* { NFSERR_EAGAIN, EAGAIN }, */ - { NFSERR_ACCES, EACCES }, - { NFSERR_EXIST, EEXIST }, - { NFSERR_XDEV, EXDEV }, - { NFSERR_NODEV, ENODEV }, - { NFSERR_NOTDIR, ENOTDIR }, - { NFSERR_ISDIR, EISDIR }, - { NFSERR_INVAL, EINVAL }, - { NFSERR_FBIG, EFBIG }, - { NFSERR_NOSPC, ENOSPC }, - { NFSERR_ROFS, EROFS }, - { NFSERR_MLINK, EMLINK }, - { NFSERR_NAMETOOLONG, ENAMETOOLONG }, - { NFSERR_NOTEMPTY, ENOTEMPTY }, - { NFSERR_DQUOT, EDQUOT }, - { NFSERR_STALE, ESTALE }, - { NFSERR_REMOTE, EREMOTE }, + { NFSERR_PERM, -EPERM }, + { NFSERR_NOENT, -ENOENT }, + { NFSERR_IO, -errno_NFSERR_IO}, + { NFSERR_NXIO, -ENXIO }, +/* { NFSERR_EAGAIN, -EAGAIN }, */ + { NFSERR_ACCES, -EACCES }, + { NFSERR_EXIST, -EEXIST }, + { NFSERR_XDEV, -EXDEV }, + { NFSERR_NODEV, -ENODEV }, + { NFSERR_NOTDIR, -ENOTDIR }, + { NFSERR_ISDIR, -EISDIR }, + { NFSERR_INVAL, -EINVAL }, + { NFSERR_FBIG, -EFBIG }, + { NFSERR_NOSPC, -ENOSPC }, + { NFSERR_ROFS, -EROFS }, + { NFSERR_MLINK, -EMLINK }, + { NFSERR_NAMETOOLONG, -ENAMETOOLONG }, + { NFSERR_NOTEMPTY, -ENOTEMPTY }, + { NFSERR_DQUOT, -EDQUOT }, + { NFSERR_STALE, -ESTALE }, + { NFSERR_REMOTE, -EREMOTE }, #ifdef EWFLUSH - { NFSERR_WFLUSH, EWFLUSH }, + { NFSERR_WFLUSH, -EWFLUSH }, #endif - { NFSERR_BADHANDLE, EBADHANDLE }, - { NFSERR_NOT_SYNC, ENOTSYNC }, - { NFSERR_BAD_COOKIE, EBADCOOKIE }, - { NFSERR_NOTSUPP, ENOTSUPP }, - { NFSERR_TOOSMALL, ETOOSMALL }, - { NFSERR_SERVERFAULT, ESERVERFAULT }, - { NFSERR_BADTYPE, EBADTYPE }, - { NFSERR_JUKEBOX, EJUKEBOX }, - { -1, EIO } + { NFSERR_BADHANDLE, -EBADHANDLE }, + { NFSERR_NOT_SYNC, -ENOTSYNC }, + { NFSERR_BAD_COOKIE, -EBADCOOKIE }, + { NFSERR_NOTSUPP, -ENOTSUPP }, + { NFSERR_TOOSMALL, -ETOOSMALL }, + { NFSERR_SERVERFAULT, -ESERVERFAULT }, + { NFSERR_BADTYPE, -EBADTYPE }, + { NFSERR_JUKEBOX, -EJUKEBOX }, + { -1, -EIO } }; /* diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 3917e2f..11cddde 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -508,14 +508,14 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res struct page **page; size_t hdrlen; u32 len, recvd, pglen; - int status, nr; + int status, nr = 0; __be32 *entry, *end, *kaddr; status = ntohl(*p++); /* Decode post_op_attrs */ p = xdr_decode_post_op_attr(p, res->dir_attr); if (status) - return -nfs_stat_to_errno(status); + return nfs_stat_to_errno(status); /* Decode verifier cookie */ if (res->verf) { res->verf[0] = *p++; @@ -542,7 +542,12 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res kaddr = p = kmap_atomic(*page, KM_USER0); end = (__be32 *)((char *)p + pglen); entry = p; - for (nr = 0; *p++; nr++) { + + /* Make sure the packet actually has a value_follows and EOF entry */ + if ((entry + 1) > end) + goto short_pkt; + + for (; *p++; nr++) { if (p + 3 > end) goto short_pkt; p += 2; /* inode # */ @@ -581,18 +586,32 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res goto short_pkt; entry = p; } - if (!nr && (entry[0] != 0 || entry[1] == 0)) - goto short_pkt; + + /* + * Apparently some server sends responses that are a valid size, but + * contain no entries, and have value_follows==0 and EOF==0. For + * those, just set the EOF marker. + */ + if (!nr && entry[1] == 0) { + dprintk("NFS: readdir reply truncated!\n"); + entry[1] = 1; + } out: kunmap_atomic(kaddr, KM_USER0); return nr; short_pkt: + /* + * When we get a short packet there are 2 possibilities. We can + * return an error, or fix up the response to look like a valid + * response and return what we have so far. If there are no + * entries and the packet was short, then return -EIO. If there + * are valid entries in the response, return them and pretend that + * the call was successful, but incomplete. The caller can retry the + * readdir starting at the last cookie. + */ entry[0] = entry[1] = 0; - /* truncate listing ? */ - if (!nr) { - dprintk("NFS: readdir reply truncated!\n"); - entry[1] = 1; - } + if (!nr) + nr = -errno_NFSERR_IO; goto out; err_unmap: nr = -errno_NFSERR_IO; @@ -732,7 +751,7 @@ nfs3_xdr_attrstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) int status; if ((status = ntohl(*p++))) - return -nfs_stat_to_errno(status); + return nfs_stat_to_errno(status); xdr_decode_fattr(p, fattr); return 0; } @@ -747,7 +766,7 @@ nfs3_xdr_wccstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) int status; if ((status = ntohl(*p++))) - status = -nfs_stat_to_errno(status); + status = nfs_stat_to_errno(status); xdr_decode_wcc_data(p, fattr); return status; } @@ -767,7 +786,7 @@ nfs3_xdr_lookupres(struct rpc_rqst *req, __be32 *p, struct nfs3_diropres *res) int status; if ((status = ntohl(*p++))) { - status = -nfs_stat_to_errno(status); + status = nfs_stat_to_errno(status); } else { if (!(p = xdr_decode_fhandle(p, res->fh))) return -errno_NFSERR_IO; @@ -787,7 +806,7 @@ nfs3_xdr_accessres(struct rpc_rqst *req, __be32 *p, struct nfs3_accessres *res) p = xdr_decode_post_op_attr(p, res->fattr); if (status) - return -nfs_stat_to_errno(status); + return nfs_stat_to_errno(status); res->access = ntohl(*p++); return 0; } @@ -824,7 +843,7 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) p = xdr_decode_post_op_attr(p, fattr); if (status != 0) - return -nfs_stat_to_errno(status); + return nfs_stat_to_errno(status); /* Convert length of symlink */ len = ntohl(*p++); @@ -872,7 +891,7 @@ nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) p = xdr_decode_post_op_attr(p, res->fattr); if (status != 0) - return -nfs_stat_to_errno(status); + return nfs_stat_to_errno(status); /* Decode reply count and EOF flag. NFSv3 is somewhat redundant * in that it puts the count both in the res struct and in the @@ -922,7 +941,7 @@ nfs3_xdr_writeres(struct rpc_rqst *req, __be32 *p, struct nfs_writeres *res) p = xdr_decode_wcc_data(p, res->fattr); if (status != 0) - return -nfs_stat_to_errno(status); + return nfs_stat_to_errno(status); res->count = ntohl(*p++); res->verf->committed = (enum nfs3_stable_how)ntohl(*p++); @@ -953,7 +972,7 @@ nfs3_xdr_createres(struct rpc_rqst *req, __be32 *p, struct nfs3_diropres *res) res->fattr->valid = 0; } } else { - status = -nfs_stat_to_errno(status); + status = nfs_stat_to_errno(status); } p = xdr_decode_wcc_data(p, res->dir_attr); return status; @@ -968,7 +987,7 @@ nfs3_xdr_renameres(struct rpc_rqst *req, __be32 *p, struct nfs3_renameres *res) int status; if ((status = ntohl(*p++)) != 0) - status = -nfs_stat_to_errno(status); + status = nfs_stat_to_errno(status); p = xdr_decode_wcc_data(p, res->fromattr); p = xdr_decode_wcc_data(p, res->toattr); return status; @@ -983,7 +1002,7 @@ nfs3_xdr_linkres(struct rpc_rqst *req, __be32 *p, struct nfs3_linkres *res) int status; if ((status = ntohl(*p++)) != 0) - status = -nfs_stat_to_errno(status); + status = nfs_stat_to_errno(status); p = xdr_decode_post_op_attr(p, res->fattr); p = xdr_decode_wcc_data(p, res->dir_attr); return status; @@ -1001,7 +1020,7 @@ nfs3_xdr_fsstatres(struct rpc_rqst *req, __be32 *p, struct nfs_fsstat *res) p = xdr_decode_post_op_attr(p, res->fattr); if (status != 0) - return -nfs_stat_to_errno(status); + return nfs_stat_to_errno(status); p = xdr_decode_hyper(p, &res->tbytes); p = xdr_decode_hyper(p, &res->fbytes); @@ -1026,7 +1045,7 @@ nfs3_xdr_fsinfores(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *res) p = xdr_decode_post_op_attr(p, res->fattr); if (status != 0) - return -nfs_stat_to_errno(status); + return nfs_stat_to_errno(status); res->rtmax = ntohl(*p++); res->rtpref = ntohl(*p++); @@ -1054,7 +1073,7 @@ nfs3_xdr_pathconfres(struct rpc_rqst *req, __be32 *p, struct nfs_pathconf *res) p = xdr_decode_post_op_attr(p, res->fattr); if (status != 0) - return -nfs_stat_to_errno(status); + return nfs_stat_to_errno(status); res->max_link = ntohl(*p++); res->max_namelen = ntohl(*p++); @@ -1073,7 +1092,7 @@ nfs3_xdr_commitres(struct rpc_rqst *req, __be32 *p, struct nfs_writeres *res) status = ntohl(*p++); p = xdr_decode_wcc_data(p, res->fattr); if (status != 0) - return -nfs_stat_to_errno(status); + return nfs_stat_to_errno(status); res->verf->verifier[0] = *p++; res->verf->verifier[1] = *p++; @@ -1095,7 +1114,7 @@ nfs3_xdr_getaclres(struct rpc_rqst *req, __be32 *p, int err, base; if (status != 0) - return -nfs_stat_to_errno(status); + return nfs_stat_to_errno(status); p = xdr_decode_post_op_attr(p, res->fattr); res->mask = ntohl(*p++); if (res->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) @@ -1122,7 +1141,7 @@ nfs3_xdr_setaclres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) int status = ntohl(*p++); if (status) - return -nfs_stat_to_errno(status); + return nfs_stat_to_errno(status); xdr_decode_post_op_attr(p, fattr); return 0; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7ce0786..dbc0927 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -51,6 +51,7 @@ #include "nfs4_fs.h" #include "delegation.h" +#include "internal.h" #include "iostat.h" #define NFSDBG_FACILITY NFSDBG_PROC @@ -239,6 +240,8 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p) { p->o_res.f_attr = &p->f_attr; p->o_res.dir_attr = &p->dir_attr; + p->o_res.seqid = p->o_arg.seqid; + p->c_res.seqid = p->c_arg.seqid; p->o_res.server = p->o_arg.server; nfs_fattr_init(&p->f_attr); nfs_fattr_init(&p->dir_attr); @@ -729,7 +732,6 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata) renew_lease(data->o_res.server, data->timestamp); data->rpc_done = 1; } - nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid); } static void nfs4_open_confirm_release(void *calldata) @@ -773,6 +775,7 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data) .rpc_message = &msg, .callback_ops = &nfs4_open_confirm_ops, .callback_data = data, + .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC, }; int status; @@ -858,7 +861,6 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata) if (!(data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM)) nfs_confirm_seqid(&data->owner->so_seqid, 0); } - nfs_increment_open_seqid(data->rpc_status, data->o_arg.seqid); data->rpc_done = 1; } @@ -910,6 +912,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) .rpc_message = &msg, .callback_ops = &nfs4_open_ops, .callback_data = data, + .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC, }; int status; @@ -979,11 +982,8 @@ static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *s if (IS_ERR(opendata)) return PTR_ERR(opendata); ret = nfs4_open_recover(opendata, state); - if (ret == -ESTALE) { - /* Invalidate the state owner so we don't ever use it again */ - nfs4_drop_state_owner(state->owner); + if (ret == -ESTALE) d_drop(ctx->path.dentry); - } nfs4_opendata_put(opendata); return ret; } @@ -1226,7 +1226,6 @@ static void nfs4_close_done(struct rpc_task *task, void *data) /* hmm. we are done with the inode, and in the process of freeing * the state_owner. we keep this around to process errors */ - nfs_increment_open_seqid(task->tk_status, calldata->arg.seqid); switch (task->tk_status) { case 0: nfs_set_open_stateid(state, &calldata->res.stateid, 0); @@ -1315,6 +1314,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait) .rpc_client = server->client, .rpc_message = &msg, .callback_ops = &nfs4_close_ops, + .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC, }; int status = -ENOMEM; @@ -1332,6 +1332,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait) goto out_free_calldata; calldata->arg.bitmask = server->attr_bitmask; calldata->res.fattr = &calldata->fattr; + calldata->res.seqid = calldata->arg.seqid; calldata->res.server = server; calldata->path.mnt = mntget(path->mnt); calldata->path.dentry = dget(path->dentry); @@ -1404,7 +1405,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) BUG_ON(nd->intent.open.flags & O_CREAT); } - cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0); + cred = rpc_lookup_cred(); if (IS_ERR(cred)) return (struct dentry *)cred; parent = dentry->d_parent; @@ -1439,7 +1440,7 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st struct rpc_cred *cred; struct nfs4_state *state; - cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0); + cred = rpc_lookup_cred(); if (IS_ERR(cred)) return PTR_ERR(cred); state = nfs4_do_open(dir, &path, openflags, NULL, cred); @@ -1656,7 +1657,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, nfs_fattr_init(fattr); - cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0); + cred = rpc_lookup_cred(); if (IS_ERR(cred)) return PTR_ERR(cred); @@ -1892,7 +1893,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, struct rpc_cred *cred; int status = 0; - cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0); + cred = rpc_lookup_cred(); if (IS_ERR(cred)) { status = PTR_ERR(cred); goto out; @@ -2761,10 +2762,10 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server) case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: - rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL, NULL); + rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); nfs4_schedule_state_recovery(clp); if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) == 0) - rpc_wake_up_task(task); + rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); task->tk_status = 0; return -EAGAIN; case -NFS4ERR_DELAY: @@ -2884,7 +2885,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short po RPC_DISPLAY_ADDR), rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_PROTO), - cred->cr_ops->cr_name, + clp->cl_rpcclient->cl_auth->au_ops->au_name, clp->cl_id_uniquifier); setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, sizeof(setclientid.sc_netid), @@ -3158,6 +3159,7 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, p->arg.fh = NFS_FH(inode); p->arg.fl = &p->fl; p->arg.seqid = seqid; + p->res.seqid = seqid; p->arg.stateid = &lsp->ls_stateid; p->lsp = lsp; atomic_inc(&lsp->ls_count); @@ -3183,7 +3185,6 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) if (RPC_ASSASSINATED(task)) return; - nfs_increment_lock_seqid(task->tk_status, calldata->arg.seqid); switch (task->tk_status) { case 0: memcpy(calldata->lsp->ls_stateid.data, @@ -3235,6 +3236,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl, .rpc_client = NFS_CLIENT(lsp->ls_state->inode), .rpc_message = &msg, .callback_ops = &nfs4_locku_ops, + .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC, }; @@ -3261,6 +3263,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * struct nfs4_lock_state *lsp; struct rpc_task *task; int status = 0; + unsigned char fl_flags = request->fl_flags; status = nfs4_set_lock_state(state, request); /* Unlock _before_ we do the RPC call */ @@ -3284,6 +3287,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * status = nfs4_wait_for_completion_rpc_task(task); rpc_put_task(task); out: + request->fl_flags = fl_flags; return status; } @@ -3320,6 +3324,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, p->arg.lock_stateid = &lsp->ls_stateid; p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; p->arg.lock_owner.id = lsp->ls_id.id; + p->res.lock_seqid = p->arg.lock_seqid; p->lsp = lsp; atomic_inc(&lsp->ls_count); p->ctx = get_nfs_open_context(ctx); @@ -3346,6 +3351,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) return; data->arg.open_stateid = &state->stateid; data->arg.new_lock_owner = 1; + data->res.open_seqid = data->arg.open_seqid; } else data->arg.new_lock_owner = 0; data->timestamp = jiffies; @@ -3363,7 +3369,6 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) if (RPC_ASSASSINATED(task)) goto out; if (data->arg.new_lock_owner != 0) { - nfs_increment_open_seqid(data->rpc_status, data->arg.open_seqid); if (data->rpc_status == 0) nfs_confirm_seqid(&data->lsp->ls_seqid, 0); else @@ -3375,7 +3380,6 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) data->lsp->ls_flags |= NFS_LOCK_INITIALIZED; renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp); } - nfs_increment_lock_seqid(data->rpc_status, data->arg.lock_seqid); out: dprintk("%s: done, ret = %d!\n", __FUNCTION__, data->rpc_status); } @@ -3419,6 +3423,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f .rpc_client = NFS_CLIENT(state->inode), .rpc_message = &msg, .callback_ops = &nfs4_lock_ops, + .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC, }; int ret; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index b962397..46eb624 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -71,6 +71,29 @@ static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred) return status; } +static struct rpc_cred *nfs4_get_machine_cred(struct nfs_client *clp) +{ + struct rpc_cred *cred = NULL; + + spin_lock(&clp->cl_lock); + if (clp->cl_machine_cred != NULL) + cred = get_rpccred(clp->cl_machine_cred); + spin_unlock(&clp->cl_lock); + return cred; +} + +static void nfs4_clear_machine_cred(struct nfs_client *clp) +{ + struct rpc_cred *cred; + + spin_lock(&clp->cl_lock); + cred = clp->cl_machine_cred; + clp->cl_machine_cred = NULL; + spin_unlock(&clp->cl_lock); + if (cred != NULL) + put_rpccred(cred); +} + struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp) { struct nfs4_state_owner *sp; @@ -91,13 +114,18 @@ static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp) { struct nfs4_state_owner *sp; struct rb_node *pos; + struct rpc_cred *cred; + cred = nfs4_get_machine_cred(clp); + if (cred != NULL) + goto out; pos = rb_first(&clp->cl_state_owners); if (pos != NULL) { sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); - return get_rpccred(sp->so_cred); + cred = get_rpccred(sp->so_cred); } - return NULL; +out: + return cred; } static void nfs_alloc_unique_id(struct rb_root *root, struct nfs_unique_id *new, @@ -292,8 +320,10 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct spin_unlock(&clp->cl_lock); if (sp == new) get_rpccred(cred); - else + else { + rpc_destroy_wait_queue(&new->so_sequence.wait); kfree(new); + } return sp; } @@ -310,6 +340,7 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp) return; nfs4_remove_state_owner(clp, sp); spin_unlock(&clp->cl_lock); + rpc_destroy_wait_queue(&sp->so_sequence.wait); put_rpccred(cred); kfree(sp); } @@ -529,6 +560,7 @@ static void nfs4_free_lock_state(struct nfs4_lock_state *lsp) spin_lock(&clp->cl_lock); nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id); spin_unlock(&clp->cl_lock); + rpc_destroy_wait_queue(&lsp->ls_sequence.wait); kfree(lsp); } @@ -731,7 +763,7 @@ int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task) list_add_tail(&seqid->list, &sequence->list); if (list_first_entry(&sequence->list, struct nfs_seqid, list) == seqid) goto unlock; - rpc_sleep_on(&sequence->wait, task, NULL, NULL); + rpc_sleep_on(&sequence->wait, task, NULL); status = -EAGAIN; unlock: spin_unlock(&sequence->lock); @@ -920,10 +952,10 @@ restart_loop: if (cred != NULL) { /* Yes there are: try to renew the old lease */ status = nfs4_proc_renew(clp, cred); + put_rpccred(cred); switch (status) { case 0: case -NFS4ERR_CB_PATH_DOWN: - put_rpccred(cred); goto out; case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_LEASE_MOVED: @@ -932,14 +964,19 @@ restart_loop: } else { /* "reboot" to ensure we clear all state on the server */ clp->cl_boot_time = CURRENT_TIME; - cred = nfs4_get_setclientid_cred(clp); } /* We're going to have to re-establish a clientid */ nfs4_state_mark_reclaim(clp); status = -ENOENT; + cred = nfs4_get_setclientid_cred(clp); if (cred != NULL) { status = nfs4_init_client(clp, cred); put_rpccred(cred); + /* Handle case where the user hasn't set up machine creds */ + if (status == -EACCES && cred == clp->cl_machine_cred) { + nfs4_clear_machine_cred(clp); + goto restart_loop; + } } if (status) goto out_error; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index db1ed9c..5a2d649 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -110,7 +110,7 @@ static int nfs4_stat_to_errno(int); #define decode_savefh_maxsz (op_decode_hdr_maxsz) #define encode_restorefh_maxsz (op_encode_hdr_maxsz) #define decode_restorefh_maxsz (op_decode_hdr_maxsz) -#define encode_fsinfo_maxsz (op_encode_hdr_maxsz + 2) +#define encode_fsinfo_maxsz (encode_getattr_maxsz) #define decode_fsinfo_maxsz (op_decode_hdr_maxsz + 11) #define encode_renew_maxsz (op_encode_hdr_maxsz + 3) #define decode_renew_maxsz (op_decode_hdr_maxsz) @@ -1191,8 +1191,8 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; WRITE32(attrs[0] & readdir->bitmask[0]); WRITE32(attrs[1] & readdir->bitmask[1]); - dprintk("%s: cookie = %Lu, verifier = 0x%x%x, bitmap = 0x%x%x\n", - __FUNCTION__, + dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n", + __func__, (unsigned long long)readdir->cookie, ((u32 *)readdir->verifier.data)[0], ((u32 *)readdir->verifier.data)[1], @@ -2241,7 +2241,7 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) } READ32(nfserr); if (nfserr != NFS_OK) - return -nfs4_stat_to_errno(nfserr); + return nfs4_stat_to_errno(nfserr); return 0; } @@ -2291,7 +2291,7 @@ static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint3 bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS; } else bitmask[0] = bitmask[1] = 0; - dprintk("%s: bitmask=0x%x%x\n", __FUNCTION__, bitmask[0], bitmask[1]); + dprintk("%s: bitmask=%08x:%08x\n", __func__, bitmask[0], bitmask[1]); return 0; } @@ -3005,6 +3005,8 @@ static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) int status; status = decode_op_hdr(xdr, OP_CLOSE); + if (status != -EIO) + nfs_increment_open_seqid(status, res->seqid); if (status) return status; READ_BUF(NFS4_STATEID_SIZE); @@ -3296,11 +3298,17 @@ static int decode_lock(struct xdr_stream *xdr, struct nfs_lock_res *res) int status; status = decode_op_hdr(xdr, OP_LOCK); + if (status == -EIO) + goto out; if (status == 0) { READ_BUF(NFS4_STATEID_SIZE); COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); } else if (status == -NFS4ERR_DENIED) - return decode_lock_denied(xdr, NULL); + status = decode_lock_denied(xdr, NULL); + if (res->open_seqid != NULL) + nfs_increment_open_seqid(status, res->open_seqid); + nfs_increment_lock_seqid(status, res->lock_seqid); +out: return status; } @@ -3319,6 +3327,8 @@ static int decode_locku(struct xdr_stream *xdr, struct nfs_locku_res *res) int status; status = decode_op_hdr(xdr, OP_LOCKU); + if (status != -EIO) + nfs_increment_lock_seqid(status, res->seqid); if (status == 0) { READ_BUF(NFS4_STATEID_SIZE); COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); @@ -3384,6 +3394,8 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) int status; status = decode_op_hdr(xdr, OP_OPEN); + if (status != -EIO) + nfs_increment_open_seqid(status, res->seqid); if (status) return status; READ_BUF(NFS4_STATEID_SIZE); @@ -3416,6 +3428,8 @@ static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmre int status; status = decode_op_hdr(xdr, OP_OPEN_CONFIRM); + if (status != -EIO) + nfs_increment_open_seqid(status, res->seqid); if (status) return status; READ_BUF(NFS4_STATEID_SIZE); @@ -3429,6 +3443,8 @@ static int decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *re int status; status = decode_op_hdr(xdr, OP_OPEN_DOWNGRADE); + if (status != -EIO) + nfs_increment_open_seqid(status, res->seqid); if (status) return status; READ_BUF(NFS4_STATEID_SIZE); @@ -3481,7 +3497,7 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n size_t hdrlen; u32 recvd, pglen = rcvbuf->page_len; __be32 *end, *entry, *p, *kaddr; - unsigned int nr; + unsigned int nr = 0; int status; status = decode_op_hdr(xdr, OP_READDIR); @@ -3489,8 +3505,8 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n return status; READ_BUF(8); COPYMEM(readdir->verifier.data, 8); - dprintk("%s: verifier = 0x%x%x\n", - __FUNCTION__, + dprintk("%s: verifier = %08x:%08x\n", + __func__, ((u32 *)readdir->verifier.data)[0], ((u32 *)readdir->verifier.data)[1]); @@ -3505,7 +3521,12 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n kaddr = p = kmap_atomic(page, KM_USER0); end = p + ((pglen + readdir->pgbase) >> 2); entry = p; - for (nr = 0; *p++; nr++) { + + /* Make sure the packet actually has a value_follows and EOF entry */ + if ((entry + 1) > end) + goto short_pkt; + + for (; *p++; nr++) { u32 len, attrlen, xlen; if (end - p < 3) goto short_pkt; @@ -3532,20 +3553,32 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n p += attrlen; /* attributes */ entry = p; } - if (!nr && (entry[0] != 0 || entry[1] == 0)) - goto short_pkt; + /* + * Apparently some server sends responses that are a valid size, but + * contain no entries, and have value_follows==0 and EOF==0. For + * those, just set the EOF marker. + */ + if (!nr && entry[1] == 0) { + dprintk("NFS: readdir reply truncated!\n"); + entry[1] = 1; + } out: kunmap_atomic(kaddr, KM_USER0); return 0; short_pkt: + /* + * When we get a short packet there are 2 possibilities. We can + * return an error, or fix up the response to look like a valid + * response and return what we have so far. If there are no + * entries and the packet was short, then return -EIO. If there + * are valid entries in the response, return them and pretend that + * the call was successful, but incomplete. The caller can retry the + * readdir starting at the last cookie. + */ dprintk("%s: short packet at entry %d\n", __FUNCTION__, nr); entry[0] = entry[1] = 0; - /* truncate listing ? */ - if (!nr) { - dprintk("NFS: readdir reply truncated!\n"); - entry[1] = 1; - } - goto out; + if (nr) + goto out; err_unmap: kunmap_atomic(kaddr, KM_USER0); return -errno_NFSERR_IO; @@ -3727,7 +3760,7 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp) READ_BUF(len); return -NFSERR_CLID_INUSE; } else - return -nfs4_stat_to_errno(nfserr); + return nfs4_stat_to_errno(nfserr); return 0; } @@ -4389,7 +4422,7 @@ static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs_fsinf if (!status) status = decode_fsinfo(&xdr, fsinfo); if (!status) - status = -nfs4_stat_to_errno(hdr.status); + status = nfs4_stat_to_errno(hdr.status); return status; } @@ -4479,7 +4512,7 @@ static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, __be32 *p, if (!status) status = decode_setclientid(&xdr, clp); if (!status) - status = -nfs4_stat_to_errno(hdr.status); + status = nfs4_stat_to_errno(hdr.status); return status; } @@ -4501,7 +4534,7 @@ static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, __be32 *p, str if (!status) status = decode_fsinfo(&xdr, fsinfo); if (!status) - status = -nfs4_stat_to_errno(hdr.status); + status = nfs4_stat_to_errno(hdr.status); return status; } @@ -4611,42 +4644,42 @@ static struct { int errno; } nfs_errtbl[] = { { NFS4_OK, 0 }, - { NFS4ERR_PERM, EPERM }, - { NFS4ERR_NOENT, ENOENT }, - { NFS4ERR_IO, errno_NFSERR_IO }, - { NFS4ERR_NXIO, ENXIO }, - { NFS4ERR_ACCESS, EACCES }, - { NFS4ERR_EXIST, EEXIST }, - { NFS4ERR_XDEV, EXDEV }, - { NFS4ERR_NOTDIR, ENOTDIR }, - { NFS4ERR_ISDIR, EISDIR }, - { NFS4ERR_INVAL, EINVAL }, - { NFS4ERR_FBIG, EFBIG }, - { NFS4ERR_NOSPC, ENOSPC }, - { NFS4ERR_ROFS, EROFS }, - { NFS4ERR_MLINK, EMLINK }, - { NFS4ERR_NAMETOOLONG, ENAMETOOLONG }, - { NFS4ERR_NOTEMPTY, ENOTEMPTY }, - { NFS4ERR_DQUOT, EDQUOT }, - { NFS4ERR_STALE, ESTALE }, - { NFS4ERR_BADHANDLE, EBADHANDLE }, - { NFS4ERR_BADOWNER, EINVAL }, - { NFS4ERR_BADNAME, EINVAL }, - { NFS4ERR_BAD_COOKIE, EBADCOOKIE }, - { NFS4ERR_NOTSUPP, ENOTSUPP }, - { NFS4ERR_TOOSMALL, ETOOSMALL }, - { NFS4ERR_SERVERFAULT, ESERVERFAULT }, - { NFS4ERR_BADTYPE, EBADTYPE }, - { NFS4ERR_LOCKED, EAGAIN }, - { NFS4ERR_RESOURCE, EREMOTEIO }, - { NFS4ERR_SYMLINK, ELOOP }, - { NFS4ERR_OP_ILLEGAL, EOPNOTSUPP }, - { NFS4ERR_DEADLOCK, EDEADLK }, - { NFS4ERR_WRONGSEC, EPERM }, /* FIXME: this needs + { NFS4ERR_PERM, -EPERM }, + { NFS4ERR_NOENT, -ENOENT }, + { NFS4ERR_IO, -errno_NFSERR_IO}, + { NFS4ERR_NXIO, -ENXIO }, + { NFS4ERR_ACCESS, -EACCES }, + { NFS4ERR_EXIST, -EEXIST }, + { NFS4ERR_XDEV, -EXDEV }, + { NFS4ERR_NOTDIR, -ENOTDIR }, + { NFS4ERR_ISDIR, -EISDIR }, + { NFS4ERR_INVAL, -EINVAL }, + { NFS4ERR_FBIG, -EFBIG }, + { NFS4ERR_NOSPC, -ENOSPC }, + { NFS4ERR_ROFS, -EROFS }, + { NFS4ERR_MLINK, -EMLINK }, + { NFS4ERR_NAMETOOLONG, -ENAMETOOLONG }, + { NFS4ERR_NOTEMPTY, -ENOTEMPTY }, + { NFS4ERR_DQUOT, -EDQUOT }, + { NFS4ERR_STALE, -ESTALE }, + { NFS4ERR_BADHANDLE, -EBADHANDLE }, + { NFS4ERR_BADOWNER, -EINVAL }, + { NFS4ERR_BADNAME, -EINVAL }, + { NFS4ERR_BAD_COOKIE, -EBADCOOKIE }, + { NFS4ERR_NOTSUPP, -ENOTSUPP }, + { NFS4ERR_TOOSMALL, -ETOOSMALL }, + { NFS4ERR_SERVERFAULT, -ESERVERFAULT }, + { NFS4ERR_BADTYPE, -EBADTYPE }, + { NFS4ERR_LOCKED, -EAGAIN }, + { NFS4ERR_RESOURCE, -EREMOTEIO }, + { NFS4ERR_SYMLINK, -ELOOP }, + { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP }, + { NFS4ERR_DEADLOCK, -EDEADLK }, + { NFS4ERR_WRONGSEC, -EPERM }, /* FIXME: this needs * to be handled by a * middle-layer. */ - { -1, EIO } + { -1, -EIO } }; /* @@ -4663,14 +4696,14 @@ nfs4_stat_to_errno(int stat) } if (stat <= 10000 || stat > 10100) { /* The server is looney tunes. */ - return ESERVERFAULT; + return -ESERVERFAULT; } /* If we cannot translate the error, the recovery routines should * handle it. * Note: remaining NFSv4 error codes have values > 10000, so should * not conflict with native Linux error codes. */ - return stat; + return -stat; } #define PROC(proc, argtype, restype) \ diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 5a70be5..16f57e0 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -58,22 +58,19 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) return p; } -static void nfs_readdata_rcu_free(struct rcu_head *head) +static void nfs_readdata_free(struct nfs_read_data *p) { - struct nfs_read_data *p = container_of(head, struct nfs_read_data, task.u.tk_rcu); if (p && (p->pagevec != &p->page_array[0])) kfree(p->pagevec); mempool_free(p, nfs_rdata_mempool); } -static void nfs_readdata_free(struct nfs_read_data *rdata) -{ - call_rcu_bh(&rdata->task.u.tk_rcu, nfs_readdata_rcu_free); -} - void nfs_readdata_release(void *data) { - nfs_readdata_free(data); + struct nfs_read_data *rdata = data; + + put_nfs_open_context(rdata->args.context); + nfs_readdata_free(rdata); } static @@ -156,7 +153,7 @@ static void nfs_readpage_release(struct nfs_page *req) /* * Set up the NFS read request struct */ -static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, +static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, const struct rpc_call_ops *call_ops, unsigned int count, unsigned int offset) { @@ -174,6 +171,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, .rpc_message = &msg, .callback_ops = call_ops, .callback_data = data, + .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC | swap_flags, }; @@ -186,7 +184,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, data->args.pgbase = req->wb_pgbase + offset; data->args.pages = data->pagevec; data->args.count = count; - data->args.context = req->wb_context; + data->args.context = get_nfs_open_context(req->wb_context); data->res.fattr = &data->fattr; data->res.count = count; @@ -204,8 +202,10 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, (unsigned long long)data->args.offset); task = rpc_run_task(&task_setup_data); - if (!IS_ERR(task)) - rpc_put_task(task); + if (IS_ERR(task)) + return PTR_ERR(task); + rpc_put_task(task); + return 0; } static void @@ -242,6 +242,7 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne size_t rsize = NFS_SERVER(inode)->rsize, nbytes; unsigned int offset; int requests = 0; + int ret = 0; LIST_HEAD(list); nfs_list_remove_request(req); @@ -253,7 +254,6 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne data = nfs_readdata_alloc(1); if (!data) goto out_bad; - INIT_LIST_HEAD(&data->pages); list_add(&data->pages, &list); requests++; nbytes -= len; @@ -264,6 +264,8 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne offset = 0; nbytes = count; do { + int ret2; + data = list_entry(list.next, struct nfs_read_data, pages); list_del_init(&data->pages); @@ -271,13 +273,15 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne if (nbytes < rsize) rsize = nbytes; - nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, + ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, rsize, offset); + if (ret == 0) + ret = ret2; offset += rsize; nbytes -= rsize; } while (nbytes != 0); - return 0; + return ret; out_bad: while (!list_empty(&list)) { @@ -295,12 +299,12 @@ static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned struct nfs_page *req; struct page **pages; struct nfs_read_data *data; + int ret = -ENOMEM; data = nfs_readdata_alloc(npages); if (!data) goto out_bad; - INIT_LIST_HEAD(&data->pages); pages = data->pagevec; while (!list_empty(head)) { req = nfs_list_entry(head->next); @@ -311,11 +315,10 @@ static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned } req = nfs_list_entry(data->pages.next); - nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0); - return 0; + return nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0); out_bad: nfs_async_read_error(head); - return -ENOMEM; + return ret; } /* @@ -342,26 +345,25 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) return 0; } -static int nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data) +static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data) { struct nfs_readargs *argp = &data->args; struct nfs_readres *resp = &data->res; if (resp->eof || resp->count == argp->count) - return 0; + return; /* This is a short read! */ nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); /* Has the server at least made some progress? */ if (resp->count == 0) - return 0; + return; /* Yes, so retry the read at the end of the data */ argp->offset += resp->count; argp->pgbase += resp->count; argp->count -= resp->count; rpc_restart_call(task); - return -EAGAIN; } /* @@ -370,29 +372,37 @@ static int nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data) static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata) { struct nfs_read_data *data = calldata; - struct nfs_page *req = data->req; - struct page *page = req->wb_page; if (nfs_readpage_result(task, data) != 0) return; + if (task->tk_status < 0) + return; - if (likely(task->tk_status >= 0)) { - nfs_readpage_truncate_uninitialised_page(data); - if (nfs_readpage_retry(task, data) != 0) - return; - } - if (unlikely(task->tk_status < 0)) + nfs_readpage_truncate_uninitialised_page(data); + nfs_readpage_retry(task, data); +} + +static void nfs_readpage_release_partial(void *calldata) +{ + struct nfs_read_data *data = calldata; + struct nfs_page *req = data->req; + struct page *page = req->wb_page; + int status = data->task.tk_status; + + if (status < 0) SetPageError(page); + if (atomic_dec_and_test(&req->wb_complete)) { if (!PageError(page)) SetPageUptodate(page); nfs_readpage_release(req); } + nfs_readdata_release(calldata); } static const struct rpc_call_ops nfs_read_partial_ops = { .rpc_call_done = nfs_readpage_result_partial, - .rpc_release = nfs_readdata_release, + .rpc_release = nfs_readpage_release_partial, }; static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data) @@ -427,29 +437,35 @@ static void nfs_readpage_result_full(struct rpc_task *task, void *calldata) if (nfs_readpage_result(task, data) != 0) return; + if (task->tk_status < 0) + return; /* * Note: nfs_readpage_retry may change the values of * data->args. In the multi-page case, we therefore need * to ensure that we call nfs_readpage_set_pages_uptodate() * first. */ - if (likely(task->tk_status >= 0)) { - nfs_readpage_truncate_uninitialised_page(data); - nfs_readpage_set_pages_uptodate(data); - if (nfs_readpage_retry(task, data) != 0) - return; - } + nfs_readpage_truncate_uninitialised_page(data); + nfs_readpage_set_pages_uptodate(data); + nfs_readpage_retry(task, data); +} + +static void nfs_readpage_release_full(void *calldata) +{ + struct nfs_read_data *data = calldata; + while (!list_empty(&data->pages)) { struct nfs_page *req = nfs_list_entry(data->pages.next); nfs_list_remove_request(req); nfs_readpage_release(req); } + nfs_readdata_release(calldata); } static const struct rpc_call_ops nfs_read_full_ops = { .rpc_call_done = nfs_readpage_result_full, - .rpc_release = nfs_readdata_release, + .rpc_release = nfs_readpage_release_full, }; /* diff --git a/fs/nfs/super.c b/fs/nfs/super.c index f921902..20a1cb1 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -441,10 +441,52 @@ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour) return sec_flavours[i].str; } +static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss, + int showdefaults) +{ + struct sockaddr *sap = (struct sockaddr *)&nfss->mountd_address; + + switch (sap->sa_family) { + case AF_INET: { + struct sockaddr_in *sin = (struct sockaddr_in *)sap; + seq_printf(m, ",mountaddr=" NIPQUAD_FMT, + NIPQUAD(sin->sin_addr.s_addr)); + break; + } + case AF_INET6: { + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; + seq_printf(m, ",mountaddr=" NIP6_FMT, + NIP6(sin6->sin6_addr)); + break; + } + default: + if (showdefaults) + seq_printf(m, ",mountaddr=unspecified"); + } + + if (nfss->mountd_version || showdefaults) + seq_printf(m, ",mountvers=%u", nfss->mountd_version); + if (nfss->mountd_port || showdefaults) + seq_printf(m, ",mountport=%u", nfss->mountd_port); + + switch (nfss->mountd_protocol) { + case IPPROTO_UDP: + seq_printf(m, ",mountproto=udp"); + break; + case IPPROTO_TCP: + seq_printf(m, ",mountproto=tcp"); + break; + default: + if (showdefaults) + seq_printf(m, ",mountproto=auto"); + } +} + /* * Describe the mount options in force on this server representation */ -static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults) +static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, + int showdefaults) { static const struct proc_nfs_info { int flag; @@ -452,6 +494,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, const char *nostr; } nfs_info[] = { { NFS_MOUNT_SOFT, ",soft", ",hard" }, + { NFS_MOUNT_INTR, ",intr", ",nointr" }, + { NFS_MOUNT_POSIX, ",posix", "" }, { NFS_MOUNT_NOCTO, ",nocto", "" }, { NFS_MOUNT_NOAC, ",noac", "" }, { NFS_MOUNT_NONLM, ",nolock", "" }, @@ -462,18 +506,22 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, }; const struct proc_nfs_info *nfs_infop; struct nfs_client *clp = nfss->nfs_client; - - seq_printf(m, ",vers=%d", clp->rpc_ops->version); - seq_printf(m, ",rsize=%d", nfss->rsize); - seq_printf(m, ",wsize=%d", nfss->wsize); + u32 version = clp->rpc_ops->version; + + seq_printf(m, ",vers=%u", version); + seq_printf(m, ",rsize=%u", nfss->rsize); + seq_printf(m, ",wsize=%u", nfss->wsize); + if (nfss->bsize != 0) + seq_printf(m, ",bsize=%u", nfss->bsize); + seq_printf(m, ",namlen=%u", nfss->namelen); if (nfss->acregmin != 3*HZ || showdefaults) - seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ); + seq_printf(m, ",acregmin=%u", nfss->acregmin/HZ); if (nfss->acregmax != 60*HZ || showdefaults) - seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ); + seq_printf(m, ",acregmax=%u", nfss->acregmax/HZ); if (nfss->acdirmin != 30*HZ || showdefaults) - seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ); + seq_printf(m, ",acdirmin=%u", nfss->acdirmin/HZ); if (nfss->acdirmax != 60*HZ || showdefaults) - seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ); + seq_printf(m, ",acdirmax=%u", nfss->acdirmax/HZ); for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) { if (nfss->flags & nfs_infop->flag) seq_puts(m, nfs_infop->str); @@ -482,9 +530,24 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, } seq_printf(m, ",proto=%s", rpc_peeraddr2str(nfss->client, RPC_DISPLAY_PROTO)); + if (version == 4) { + if (nfss->port != NFS_PORT) + seq_printf(m, ",port=%u", nfss->port); + } else + if (nfss->port) + seq_printf(m, ",port=%u", nfss->port); + seq_printf(m, ",timeo=%lu", 10U * nfss->client->cl_timeout->to_initval / HZ); seq_printf(m, ",retrans=%u", nfss->client->cl_timeout->to_retries); seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor)); + + if (version != 4) + nfs_show_mountd_options(m, nfss, showdefaults); + +#ifdef CONFIG_NFS_V4 + if (clp->rpc_ops->version == 4) + seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr); +#endif } /* @@ -529,10 +592,10 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) seq_printf(m, "\n\tcaps:\t"); seq_printf(m, "caps=0x%x", nfss->caps); - seq_printf(m, ",wtmult=%d", nfss->wtmult); - seq_printf(m, ",dtsize=%d", nfss->dtsize); - seq_printf(m, ",bsize=%d", nfss->bsize); - seq_printf(m, ",namelen=%d", nfss->namelen); + seq_printf(m, ",wtmult=%u", nfss->wtmult); + seq_printf(m, ",dtsize=%u", nfss->dtsize); + seq_printf(m, ",bsize=%u", nfss->bsize); + seq_printf(m, ",namlen=%u", nfss->namelen); #ifdef CONFIG_NFS_V4 if (nfss->nfs_client->rpc_ops->version == 4) { @@ -546,9 +609,9 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) /* * Display security flavor in effect for this mount */ - seq_printf(m, "\n\tsec:\tflavor=%d", auth->au_ops->au_flavor); + seq_printf(m, "\n\tsec:\tflavor=%u", auth->au_ops->au_flavor); if (auth->au_flavor) - seq_printf(m, ",pseudoflavor=%d", auth->au_flavor); + seq_printf(m, ",pseudoflavor=%u", auth->au_flavor); /* * Display superblock I/O counters @@ -683,7 +746,6 @@ static int nfs_parse_mount_options(char *raw, struct nfs_parsed_mount_data *mnt) { char *p, *string, *secdata; - unsigned short port = 0; int rc; if (!raw) { @@ -798,7 +860,7 @@ static int nfs_parse_mount_options(char *raw, return 0; if (option < 0 || option > 65535) return 0; - port = option; + mnt->nfs_server.port = option; break; case Opt_rsize: if (match_int(args, &mnt->rsize)) @@ -1048,7 +1110,8 @@ static int nfs_parse_mount_options(char *raw, } } - nfs_set_port((struct sockaddr *)&mnt->nfs_server.address, port); + nfs_set_port((struct sockaddr *)&mnt->nfs_server.address, + mnt->nfs_server.port); return 1; @@ -1169,7 +1232,9 @@ static int nfs_validate_mount_data(void *options, args->acregmax = 60; args->acdirmin = 30; args->acdirmax = 60; + args->mount_server.port = 0; /* autobind unless user sets port */ args->mount_server.protocol = XPRT_TRANSPORT_UDP; + args->nfs_server.port = 0; /* autobind unless user sets port */ args->nfs_server.protocol = XPRT_TRANSPORT_TCP; switch (data->version) { @@ -1208,7 +1273,6 @@ static int nfs_validate_mount_data(void *options, args->flags = data->flags; args->rsize = data->rsize; args->wsize = data->wsize; - args->flags = data->flags; args->timeo = data->timeo; args->retrans = data->retrans; args->acregmin = data->acregmin; @@ -1230,6 +1294,8 @@ static int nfs_validate_mount_data(void *options, args->namlen = data->namlen; args->bsize = data->bsize; args->auth_flavors[0] = data->pseudoflavor; + if (!args->nfs_server.hostname) + goto out_nomem; /* * The legacy version 6 binary mount data from userspace has a @@ -1276,6 +1342,8 @@ static int nfs_validate_mount_data(void *options, len = c - dev_name; /* N.B. caller will free nfs_server.hostname in all cases */ args->nfs_server.hostname = kstrndup(dev_name, len, GFP_KERNEL); + if (!args->nfs_server.hostname) + goto out_nomem; c++; if (strlen(c) > NFS_MAXPATHLEN) @@ -1319,6 +1387,10 @@ out_v3_not_compiled: return -EPROTONOSUPPORT; #endif /* !CONFIG_NFS_V3 */ +out_nomem: + dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n"); + return -ENOMEM; + out_no_address: dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n"); return -EINVAL; @@ -1706,28 +1778,6 @@ static void nfs4_fill_super(struct super_block *sb) } /* - * If the user didn't specify a port, set the port number to - * the NFS version 4 default port. - */ -static void nfs4_default_port(struct sockaddr *sap) -{ - switch (sap->sa_family) { - case AF_INET: { - struct sockaddr_in *ap = (struct sockaddr_in *)sap; - if (ap->sin_port == 0) - ap->sin_port = htons(NFS_PORT); - break; - } - case AF_INET6: { - struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap; - if (ap->sin6_port == 0) - ap->sin6_port = htons(NFS_PORT); - break; - } - } -} - -/* * Validate NFSv4 mount options */ static int nfs4_validate_mount_data(void *options, @@ -1751,6 +1801,7 @@ static int nfs4_validate_mount_data(void *options, args->acregmax = 60; args->acdirmin = 30; args->acdirmax = 60; + args->nfs_server.port = NFS_PORT; /* 2049 unless user set port= */ args->nfs_server.protocol = XPRT_TRANSPORT_TCP; switch (data->version) { @@ -1767,9 +1818,6 @@ static int nfs4_validate_mount_data(void *options, &args->nfs_server.address)) goto out_no_address; - nfs4_default_port((struct sockaddr *) - &args->nfs_server.address); - switch (data->auth_flavourlen) { case 0: args->auth_flavors[0] = RPC_AUTH_UNIX; @@ -1827,9 +1875,6 @@ static int nfs4_validate_mount_data(void *options, &args->nfs_server.address)) return -EINVAL; - nfs4_default_port((struct sockaddr *) - &args->nfs_server.address); - switch (args->auth_flavor_len) { case 0: args->auth_flavors[0] = RPC_AUTH_UNIX; @@ -1852,12 +1897,16 @@ static int nfs4_validate_mount_data(void *options, return -ENAMETOOLONG; /* N.B. caller will free nfs_server.hostname in all cases */ args->nfs_server.hostname = kstrndup(dev_name, len, GFP_KERNEL); + if (!args->nfs_server.hostname) + goto out_nomem; c++; /* step over the ':' */ len = strlen(c); if (len > NFS4_MAXPATHLEN) return -ENAMETOOLONG; args->nfs_server.export_path = kstrndup(c, len, GFP_KERNEL); + if (!args->nfs_server.export_path) + goto out_nomem; dprintk("NFS: MNTPATH: '%s'\n", args->nfs_server.export_path); @@ -1879,6 +1928,10 @@ out_inval_auth: data->auth_flavourlen); return -EINVAL; +out_nomem: + dfprintk(MOUNT, "NFS4: not enough memory to handle mount options\n"); + return -ENOMEM; + out_no_address: dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n"); return -EINVAL; diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 7574153..3adf8b2 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -234,7 +234,7 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry) if (data == NULL) goto out; - data->cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0); + data->cred = rpc_lookup_cred(); if (IS_ERR(data->cred)) { status = PTR_ERR(data->cred); goto out_free; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index bed6341..1ade11d1 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -48,7 +48,7 @@ static struct kmem_cache *nfs_wdata_cachep; static mempool_t *nfs_wdata_mempool; static mempool_t *nfs_commit_mempool; -struct nfs_write_data *nfs_commit_alloc(void) +struct nfs_write_data *nfs_commitdata_alloc(void) { struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); @@ -59,19 +59,13 @@ struct nfs_write_data *nfs_commit_alloc(void) return p; } -static void nfs_commit_rcu_free(struct rcu_head *head) +void nfs_commit_free(struct nfs_write_data *p) { - struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu); if (p && (p->pagevec != &p->page_array[0])) kfree(p->pagevec); mempool_free(p, nfs_commit_mempool); } -void nfs_commit_free(struct nfs_write_data *wdata) -{ - call_rcu_bh(&wdata->task.u.tk_rcu, nfs_commit_rcu_free); -} - struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) { struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS); @@ -93,21 +87,18 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) return p; } -static void nfs_writedata_rcu_free(struct rcu_head *head) +static void nfs_writedata_free(struct nfs_write_data *p) { - struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu); if (p && (p->pagevec != &p->page_array[0])) kfree(p->pagevec); mempool_free(p, nfs_wdata_mempool); } -static void nfs_writedata_free(struct nfs_write_data *wdata) +void nfs_writedata_release(void *data) { - call_rcu_bh(&wdata->task.u.tk_rcu, nfs_writedata_rcu_free); -} + struct nfs_write_data *wdata = data; -void nfs_writedata_release(void *wdata) -{ + put_nfs_open_context(wdata->args.context); nfs_writedata_free(wdata); } @@ -291,8 +282,6 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, spin_unlock(&inode->i_lock); if (!nfs_pageio_add_request(pgio, req)) { nfs_redirty_request(req); - nfs_end_page_writeback(page); - nfs_clear_page_tag_locked(req); return pgio->pg_error; } return 0; @@ -366,15 +355,13 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) /* * Insert a write request into an inode */ -static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) +static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) { struct nfs_inode *nfsi = NFS_I(inode); int error; error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req); - BUG_ON(error == -EEXIST); - if (error) - return error; + BUG_ON(error); if (!nfsi->npages) { igrab(inode); if (nfs_have_delegation(inode, FMODE_WRITE)) @@ -384,8 +371,8 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) set_page_private(req->wb_page, (unsigned long)req); nfsi->npages++; kref_get(&req->wb_kref); - radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); - return 0; + radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, + NFS_PAGE_TAG_LOCKED); } /* @@ -413,7 +400,7 @@ static void nfs_inode_remove_request(struct nfs_page *req) } static void -nfs_redirty_request(struct nfs_page *req) +nfs_mark_request_dirty(struct nfs_page *req) { __set_page_dirty_nobuffers(req->wb_page); } @@ -467,7 +454,7 @@ int nfs_reschedule_unstable_write(struct nfs_page *req) return 1; } if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) { - nfs_redirty_request(req); + nfs_mark_request_dirty(req); return 1; } return 0; @@ -597,6 +584,13 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, /* Loop over all inode entries and see if we find * A request for the page we wish to update */ + if (new) { + if (radix_tree_preload(GFP_NOFS)) { + nfs_release_request(new); + return ERR_PTR(-ENOMEM); + } + } + spin_lock(&inode->i_lock); req = nfs_page_find_request_locked(page); if (req) { @@ -607,28 +601,27 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, error = nfs_wait_on_request(req); nfs_release_request(req); if (error < 0) { - if (new) + if (new) { + radix_tree_preload_end(); nfs_release_request(new); + } return ERR_PTR(error); } continue; } spin_unlock(&inode->i_lock); - if (new) + if (new) { + radix_tree_preload_end(); nfs_release_request(new); + } break; } if (new) { - int error; nfs_lock_request_dontget(new); - error = nfs_inode_add_request(inode, new); - if (error) { - spin_unlock(&inode->i_lock); - nfs_unlock_request(new); - return ERR_PTR(error); - } + nfs_inode_add_request(inode, new); spin_unlock(&inode->i_lock); + radix_tree_preload_end(); req = new; goto zero_page; } @@ -785,7 +778,7 @@ static int flush_task_priority(int how) /* * Set up the argument/result storage required for the RPC call. */ -static void nfs_write_rpcsetup(struct nfs_page *req, +static int nfs_write_rpcsetup(struct nfs_page *req, struct nfs_write_data *data, const struct rpc_call_ops *call_ops, unsigned int count, unsigned int offset, @@ -806,6 +799,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req, .rpc_message = &msg, .callback_ops = call_ops, .callback_data = data, + .workqueue = nfsiod_workqueue, .flags = flags, .priority = priority, }; @@ -822,7 +816,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req, data->args.pgbase = req->wb_pgbase + offset; data->args.pages = data->pagevec; data->args.count = count; - data->args.context = req->wb_context; + data->args.context = get_nfs_open_context(req->wb_context); data->args.stable = NFS_UNSTABLE; if (how & FLUSH_STABLE) { data->args.stable = NFS_DATA_SYNC; @@ -847,8 +841,21 @@ static void nfs_write_rpcsetup(struct nfs_page *req, (unsigned long long)data->args.offset); task = rpc_run_task(&task_setup_data); - if (!IS_ERR(task)) - rpc_put_task(task); + if (IS_ERR(task)) + return PTR_ERR(task); + rpc_put_task(task); + return 0; +} + +/* If a nfs_flush_* function fails, it should remove reqs from @head and + * call this on each, which will prepare them to be retried on next + * writeback using standard nfs. + */ +static void nfs_redirty_request(struct nfs_page *req) +{ + nfs_mark_request_dirty(req); + nfs_end_page_writeback(req->wb_page); + nfs_clear_page_tag_locked(req); } /* @@ -863,6 +870,7 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned size_t wsize = NFS_SERVER(inode)->wsize, nbytes; unsigned int offset; int requests = 0; + int ret = 0; LIST_HEAD(list); nfs_list_remove_request(req); @@ -884,6 +892,8 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned offset = 0; nbytes = count; do { + int ret2; + data = list_entry(list.next, struct nfs_write_data, pages); list_del_init(&data->pages); @@ -891,13 +901,15 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned if (nbytes < wsize) wsize = nbytes; - nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, + ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, wsize, offset, how); + if (ret == 0) + ret = ret2; offset += wsize; nbytes -= wsize; } while (nbytes != 0); - return 0; + return ret; out_bad: while (!list_empty(&list)) { @@ -906,8 +918,6 @@ out_bad: nfs_writedata_release(data); } nfs_redirty_request(req); - nfs_end_page_writeback(req->wb_page); - nfs_clear_page_tag_locked(req); return -ENOMEM; } @@ -940,16 +950,12 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i req = nfs_list_entry(data->pages.next); /* Set up the argument struct */ - nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how); - - return 0; + return nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how); out_bad: while (!list_empty(head)) { req = nfs_list_entry(head->next); nfs_list_remove_request(req); nfs_redirty_request(req); - nfs_end_page_writeback(req->wb_page); - nfs_clear_page_tag_locked(req); } return -ENOMEM; } @@ -972,7 +978,6 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; struct nfs_page *req = data->req; - struct page *page = req->wb_page; dprintk("NFS: write (%s/%Ld %d@%Ld)", req->wb_context->path.dentry->d_inode->i_sb->s_id, @@ -980,13 +985,20 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) req->wb_bytes, (long long)req_offset(req)); - if (nfs_writeback_done(task, data) != 0) - return; + nfs_writeback_done(task, data); +} - if (task->tk_status < 0) { +static void nfs_writeback_release_partial(void *calldata) +{ + struct nfs_write_data *data = calldata; + struct nfs_page *req = data->req; + struct page *page = req->wb_page; + int status = data->task.tk_status; + + if (status < 0) { nfs_set_pageerror(page); - nfs_context_set_write_error(req->wb_context, task->tk_status); - dprintk(", error = %d\n", task->tk_status); + nfs_context_set_write_error(req->wb_context, status); + dprintk(", error = %d\n", status); goto out; } @@ -1011,11 +1023,12 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) out: if (atomic_dec_and_test(&req->wb_complete)) nfs_writepage_release(req); + nfs_writedata_release(calldata); } static const struct rpc_call_ops nfs_write_partial_ops = { .rpc_call_done = nfs_writeback_done_partial, - .rpc_release = nfs_writedata_release, + .rpc_release = nfs_writeback_release_partial, }; /* @@ -1028,17 +1041,21 @@ static const struct rpc_call_ops nfs_write_partial_ops = { static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; - struct nfs_page *req; - struct page *page; - if (nfs_writeback_done(task, data) != 0) - return; + nfs_writeback_done(task, data); +} + +static void nfs_writeback_release_full(void *calldata) +{ + struct nfs_write_data *data = calldata; + int status = data->task.tk_status; /* Update attributes as result of writeback. */ while (!list_empty(&data->pages)) { - req = nfs_list_entry(data->pages.next); + struct nfs_page *req = nfs_list_entry(data->pages.next); + struct page *page = req->wb_page; + nfs_list_remove_request(req); - page = req->wb_page; dprintk("NFS: write (%s/%Ld %d@%Ld)", req->wb_context->path.dentry->d_inode->i_sb->s_id, @@ -1046,10 +1063,10 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) req->wb_bytes, (long long)req_offset(req)); - if (task->tk_status < 0) { + if (status < 0) { nfs_set_pageerror(page); - nfs_context_set_write_error(req->wb_context, task->tk_status); - dprintk(", error = %d\n", task->tk_status); + nfs_context_set_write_error(req->wb_context, status); + dprintk(", error = %d\n", status); goto remove_request; } @@ -1069,11 +1086,12 @@ remove_request: next: nfs_clear_page_tag_locked(req); } + nfs_writedata_release(calldata); } static const struct rpc_call_ops nfs_write_full_ops = { .rpc_call_done = nfs_writeback_done_full, - .rpc_release = nfs_writedata_release, + .rpc_release = nfs_writeback_release_full, }; @@ -1159,15 +1177,18 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -void nfs_commit_release(void *wdata) +void nfs_commitdata_release(void *data) { + struct nfs_write_data *wdata = data; + + put_nfs_open_context(wdata->args.context); nfs_commit_free(wdata); } /* * Set up the argument/result storage required for the RPC call. */ -static void nfs_commit_rpcsetup(struct list_head *head, +static int nfs_commit_rpcsetup(struct list_head *head, struct nfs_write_data *data, int how) { @@ -1187,6 +1208,7 @@ static void nfs_commit_rpcsetup(struct list_head *head, .rpc_message = &msg, .callback_ops = &nfs_commit_ops, .callback_data = data, + .workqueue = nfsiod_workqueue, .flags = flags, .priority = priority, }; @@ -1203,6 +1225,7 @@ static void nfs_commit_rpcsetup(struct list_head *head, /* Note: we always request a commit of the entire inode */ data->args.offset = 0; data->args.count = 0; + data->args.context = get_nfs_open_context(first->wb_context); data->res.count = 0; data->res.fattr = &data->fattr; data->res.verf = &data->verf; @@ -1214,8 +1237,10 @@ static void nfs_commit_rpcsetup(struct list_head *head, dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); task = rpc_run_task(&task_setup_data); - if (!IS_ERR(task)) - rpc_put_task(task); + if (IS_ERR(task)) + return PTR_ERR(task); + rpc_put_task(task); + return 0; } /* @@ -1227,15 +1252,13 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) struct nfs_write_data *data; struct nfs_page *req; - data = nfs_commit_alloc(); + data = nfs_commitdata_alloc(); if (!data) goto out_bad; /* Set up the argument struct */ - nfs_commit_rpcsetup(head, data, how); - - return 0; + return nfs_commit_rpcsetup(head, data, how); out_bad: while (!list_empty(head)) { req = nfs_list_entry(head->next); @@ -1255,7 +1278,6 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) static void nfs_commit_done(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; - struct nfs_page *req; dprintk("NFS: %5u nfs_commit_done (status %d)\n", task->tk_pid, task->tk_status); @@ -1263,6 +1285,13 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) /* Call the NFS version-specific code */ if (NFS_PROTO(data->inode)->commit_done(task, data) != 0) return; +} + +static void nfs_commit_release(void *calldata) +{ + struct nfs_write_data *data = calldata; + struct nfs_page *req; + int status = data->task.tk_status; while (!list_empty(&data->pages)) { req = nfs_list_entry(data->pages.next); @@ -1277,10 +1306,10 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); - if (task->tk_status < 0) { - nfs_context_set_write_error(req->wb_context, task->tk_status); + if (status < 0) { + nfs_context_set_write_error(req->wb_context, status); nfs_inode_remove_request(req); - dprintk(", error = %d\n", task->tk_status); + dprintk(", error = %d\n", status); goto next; } @@ -1297,10 +1326,11 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) } /* We have a mismatch. Write the page again */ dprintk(" mismatch\n"); - nfs_redirty_request(req); + nfs_mark_request_dirty(req); next: nfs_clear_page_tag_locked(req); } + nfs_commitdata_release(calldata); } static const struct rpc_call_ops nfs_commit_ops = { @@ -1487,18 +1517,19 @@ static int nfs_wb_page_priority(struct inode *inode, struct page *page, }; int ret; - BUG_ON(!PageLocked(page)); - if (clear_page_dirty_for_io(page)) { - ret = nfs_writepage_locked(page, &wbc); + do { + if (clear_page_dirty_for_io(page)) { + ret = nfs_writepage_locked(page, &wbc); + if (ret < 0) + goto out_error; + } else if (!PagePrivate(page)) + break; + ret = nfs_sync_mapping_wait(page->mapping, &wbc, how); if (ret < 0) - goto out; - } - if (!PagePrivate(page)) - return 0; - ret = nfs_sync_mapping_wait(page->mapping, &wbc, how); - if (ret >= 0) - return 0; -out: + goto out_error; + } while (PagePrivate(page)); + return 0; +out_error: __mark_inode_dirty(inode, I_DIRTY_PAGES); return ret; } diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 4babb2a..94649a8 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -91,6 +91,7 @@ struct nlm_wait; */ #define NLMCLNT_OHSIZE ((__NEW_UTS_LEN) + 10u) struct nlm_rqst { + atomic_t a_count; unsigned int a_flags; /* initial RPC task flags */ struct nlm_host * a_host; /* host handle */ struct nlm_args a_args; /* arguments */ @@ -173,8 +174,10 @@ void nlmclnt_next_cookie(struct nlm_cookie *); /* * Host cache */ -struct nlm_host *nlmclnt_lookup_host(const struct sockaddr_in *, int, int, - const char *, unsigned int); +struct nlm_host *nlmclnt_lookup_host(const struct sockaddr_in *sin, + int proto, u32 version, + const char *hostname, + unsigned int hostname_len); struct nlm_host *nlmsvc_lookup_host(struct svc_rqst *, const char *, unsigned int); struct rpc_clnt * nlm_bind_host(struct nlm_host *); @@ -217,8 +220,7 @@ void nlmsvc_mark_resources(void); void nlmsvc_free_host_resources(struct nlm_host *); void nlmsvc_invalidate_all(void); -static __inline__ struct inode * -nlmsvc_file_inode(struct nlm_file *file) +static inline struct inode *nlmsvc_file_inode(struct nlm_file *file) { return file->f_file->f_path.dentry->d_inode; } @@ -226,8 +228,8 @@ nlmsvc_file_inode(struct nlm_file *file) /* * Compare two host addresses (needs modifying for ipv6) */ -static __inline__ int -nlm_cmp_addr(const struct sockaddr_in *sin1, const struct sockaddr_in *sin2) +static inline int nlm_cmp_addr(const struct sockaddr_in *sin1, + const struct sockaddr_in *sin2) { return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr; } @@ -236,8 +238,8 @@ nlm_cmp_addr(const struct sockaddr_in *sin1, const struct sockaddr_in *sin2) * Compare two NLM locks. * When the second lock is of type F_UNLCK, this acts like a wildcard. */ -static __inline__ int -nlm_compare_locks(const struct file_lock *fl1, const struct file_lock *fl2) +static inline int nlm_compare_locks(const struct file_lock *fl1, + const struct file_lock *fl2) { return fl1->fl_pid == fl2->fl_pid && fl1->fl_owner == fl2->fl_owner diff --git a/include/linux/lockd/sm_inter.h b/include/linux/lockd/sm_inter.h index 22a6458..5a5448b 100644 --- a/include/linux/lockd/sm_inter.h +++ b/include/linux/lockd/sm_inter.h @@ -19,6 +19,7 @@ #define SM_NOTIFY 6 #define SM_MAXSTRLEN 1024 +#define SM_PRIV_SIZE 16 /* * Arguments for all calls to statd diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index f4a0e4c..27d6a8d 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -430,7 +430,6 @@ extern void nfs_unregister_sysctl(void); /* * linux/fs/nfs/namespace.c */ -extern struct list_head nfs_automount_list; extern const struct inode_operations nfs_mountpoint_inode_operations; extern const struct inode_operations nfs_referral_inode_operations; extern int nfs_mountpoint_expiry_timeout; @@ -466,9 +465,9 @@ extern int nfs_wb_page(struct inode *inode, struct page* page); extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) extern int nfs_commit_inode(struct inode *, int); -extern struct nfs_write_data *nfs_commit_alloc(void); +extern struct nfs_write_data *nfs_commitdata_alloc(void); extern void nfs_commit_free(struct nfs_write_data *wdata); -extern void nfs_commit_release(void *wdata); +extern void nfs_commitdata_release(void *wdata); #else static inline int nfs_commit_inode(struct inode *inode, int how) diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 3423c67..c9beacd 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -32,6 +32,8 @@ struct nfs_client { const struct nfs_rpc_ops *rpc_ops; /* NFS protocol vector */ int cl_proto; /* Network transport protocol */ + struct rpc_cred *cl_machine_cred; + #ifdef CONFIG_NFS_V4 u64 cl_clientid; /* constant */ nfs4_verifier cl_confirm; @@ -93,6 +95,7 @@ struct nfs_server { unsigned int wpages; /* write size (in pages) */ unsigned int wtmult; /* server disk block size */ unsigned int dtsize; /* readdir size */ + unsigned short port; /* "port=" setting */ unsigned int bsize; /* server block size */ unsigned int acregmin; /* attr cache timeouts */ unsigned int acregmax; @@ -117,6 +120,13 @@ struct nfs_server { atomic_t active; /* Keep trace of any activity to this server */ wait_queue_head_t active_wq; /* Wait for any activity to stop */ + + /* mountd-related mount options */ + struct sockaddr_storage mountd_address; + size_t mountd_addrlen; + u32 mountd_version; + unsigned short mountd_port; + unsigned short mountd_protocol; }; /* Server capabilities */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index f301d0b..24263bb 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -140,6 +140,7 @@ struct nfs_openres { __u32 rflags; struct nfs_fattr * f_attr; struct nfs_fattr * dir_attr; + struct nfs_seqid * seqid; const struct nfs_server *server; int delegation_type; nfs4_stateid delegation; @@ -159,6 +160,7 @@ struct nfs_open_confirmargs { struct nfs_open_confirmres { nfs4_stateid stateid; + struct nfs_seqid * seqid; }; /* @@ -175,6 +177,7 @@ struct nfs_closeargs { struct nfs_closeres { nfs4_stateid stateid; struct nfs_fattr * fattr; + struct nfs_seqid * seqid; const struct nfs_server *server; }; /* @@ -199,7 +202,9 @@ struct nfs_lock_args { }; struct nfs_lock_res { - nfs4_stateid stateid; + nfs4_stateid stateid; + struct nfs_seqid * lock_seqid; + struct nfs_seqid * open_seqid; }; struct nfs_locku_args { @@ -210,7 +215,8 @@ struct nfs_locku_args { }; struct nfs_locku_res { - nfs4_stateid stateid; + nfs4_stateid stateid; + struct nfs_seqid * seqid; }; struct nfs_lockt_args { diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 7a69ca3..3f63218 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -26,6 +26,7 @@ struct auth_cred { uid_t uid; gid_t gid; struct group_info *group_info; + unsigned char machine_cred : 1; }; /* @@ -59,8 +60,8 @@ struct rpc_cred { /* * Client authentication handle */ -#define RPC_CREDCACHE_NR 8 -#define RPC_CREDCACHE_MASK (RPC_CREDCACHE_NR - 1) +#define RPC_CREDCACHE_HASHBITS 4 +#define RPC_CREDCACHE_NR (1 << RPC_CREDCACHE_HASHBITS) struct rpc_cred_cache { struct hlist_head hashtable[RPC_CREDCACHE_NR]; spinlock_t lock; @@ -89,7 +90,6 @@ struct rpc_auth { /* Flags for rpcauth_lookupcred() */ #define RPCAUTH_LOOKUP_NEW 0x01 /* Accept an uninitialised cred */ -#define RPCAUTH_LOOKUP_ROOTCREDS 0x02 /* This really ought to go! */ /* * Client authentication ops @@ -97,9 +97,7 @@ struct rpc_auth { struct rpc_authops { struct module *owner; rpc_authflavor_t au_flavor; /* flavor (RPC_AUTH_*) */ -#ifdef RPC_DEBUG char * au_name; -#endif struct rpc_auth * (*create)(struct rpc_clnt *, rpc_authflavor_t); void (*destroy)(struct rpc_auth *); @@ -113,6 +111,7 @@ struct rpc_credops { void (*crdestroy)(struct rpc_cred *); int (*crmatch)(struct auth_cred *, struct rpc_cred *, int); + void (*crbind)(struct rpc_task *, struct rpc_cred *); __be32 * (*crmarshal)(struct rpc_task *, __be32 *); int (*crrefresh)(struct rpc_task *); __be32 * (*crvalidate)(struct rpc_task *, __be32 *); @@ -126,9 +125,13 @@ extern const struct rpc_authops authunix_ops; extern const struct rpc_authops authnull_ops; void __init rpc_init_authunix(void); +void __init rpc_init_generic_auth(void); void __init rpcauth_init_module(void); void __exit rpcauth_remove_module(void); +void __exit rpc_destroy_generic_auth(void); +struct rpc_cred * rpc_lookup_cred(void); +struct rpc_cred * rpc_lookup_machine_cred(void); int rpcauth_register(const struct rpc_authops *); int rpcauth_unregister(const struct rpc_authops *); struct rpc_auth * rpcauth_create(rpc_authflavor_t, struct rpc_clnt *); @@ -136,8 +139,8 @@ void rpcauth_release(struct rpc_auth *); struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int); void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *); struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int); -struct rpc_cred * rpcauth_bindcred(struct rpc_task *); -void rpcauth_holdcred(struct rpc_task *); +void rpcauth_bindcred(struct rpc_task *, struct rpc_cred *, int); +void rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *); void put_rpccred(struct rpc_cred *); void rpcauth_unbindcred(struct rpc_task *); __be32 * rpcauth_marshcred(struct rpc_task *, __be32 *); diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h index 67658e1..fec6899 100644 --- a/include/linux/sunrpc/auth_gss.h +++ b/include/linux/sunrpc/auth_gss.h @@ -84,6 +84,7 @@ struct gss_cred { enum rpc_gss_svc gc_service; struct gss_cl_ctx *gc_ctx; struct gss_upcall_msg *gc_upcall; + unsigned char gc_machine_cred : 1; }; #endif /* __KERNEL__ */ diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 129a86e..6fff7f8 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -127,11 +127,12 @@ int rpcb_getport_sync(struct sockaddr_in *, u32, u32, int); void rpcb_getport_async(struct rpc_task *); void rpc_call_start(struct rpc_task *); -int rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, - int flags, const struct rpc_call_ops *tk_ops, +int rpc_call_async(struct rpc_clnt *clnt, + const struct rpc_message *msg, int flags, + const struct rpc_call_ops *tk_ops, void *calldata); -int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, - int flags); +int rpc_call_sync(struct rpc_clnt *clnt, + const struct rpc_message *msg, int flags); struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int flags); void rpc_restart_call(struct rpc_task *); diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index f689f02..d1a5c8c 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -11,7 +11,6 @@ #include <linux/timer.h> #include <linux/sunrpc/types.h> -#include <linux/rcupdate.h> #include <linux/spinlock.h> #include <linux/wait.h> #include <linux/workqueue.h> @@ -33,7 +32,8 @@ struct rpc_wait_queue; struct rpc_wait { struct list_head list; /* wait queue links */ struct list_head links; /* Links to related tasks */ - struct rpc_wait_queue * rpc_waitq; /* RPC wait queue we're on */ + struct list_head timer_list; /* Timer list */ + unsigned long expires; }; /* @@ -57,33 +57,25 @@ struct rpc_task { __u8 tk_cred_retry; /* - * timeout_fn to be executed by timer bottom half * callback to be executed after waking up * action next procedure for async tasks * tk_ops caller callbacks */ - void (*tk_timeout_fn)(struct rpc_task *); void (*tk_callback)(struct rpc_task *); void (*tk_action)(struct rpc_task *); const struct rpc_call_ops *tk_ops; void * tk_calldata; - /* - * tk_timer is used for async processing by the RPC scheduling - * primitives. You should not access this directly unless - * you have a pathological interest in kernel oopses. - */ - struct timer_list tk_timer; /* kernel timer */ unsigned long tk_timeout; /* timeout for rpc_sleep() */ unsigned short tk_flags; /* misc flags */ unsigned long tk_runstate; /* Task run status */ struct workqueue_struct *tk_workqueue; /* Normally rpciod, but could * be any workqueue */ + struct rpc_wait_queue *tk_waitqueue; /* RPC wait queue we're on */ union { struct work_struct tk_work; /* Async task work queue */ struct rpc_wait tk_wait; /* RPC wait */ - struct rcu_head tk_rcu; /* for task deletion */ } u; unsigned short tk_timeouts; /* maj timeouts */ @@ -123,6 +115,7 @@ struct rpc_task_setup { const struct rpc_message *rpc_message; const struct rpc_call_ops *callback_ops; void *callback_data; + struct workqueue_struct *workqueue; unsigned short flags; signed char priority; }; @@ -147,9 +140,7 @@ struct rpc_task_setup { #define RPC_TASK_RUNNING 0 #define RPC_TASK_QUEUED 1 -#define RPC_TASK_WAKEUP 2 -#define RPC_TASK_HAS_TIMER 3 -#define RPC_TASK_ACTIVE 4 +#define RPC_TASK_ACTIVE 2 #define RPC_IS_RUNNING(t) test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) #define rpc_set_running(t) set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate) @@ -171,15 +162,6 @@ struct rpc_task_setup { smp_mb__after_clear_bit(); \ } while (0) -#define rpc_start_wakeup(t) \ - (test_and_set_bit(RPC_TASK_WAKEUP, &(t)->tk_runstate) == 0) -#define rpc_finish_wakeup(t) \ - do { \ - smp_mb__before_clear_bit(); \ - clear_bit(RPC_TASK_WAKEUP, &(t)->tk_runstate); \ - smp_mb__after_clear_bit(); \ - } while (0) - #define RPC_IS_ACTIVATED(t) test_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate) /* @@ -192,6 +174,12 @@ struct rpc_task_setup { #define RPC_PRIORITY_HIGH (1) #define RPC_NR_PRIORITY (1 + RPC_PRIORITY_HIGH - RPC_PRIORITY_LOW) +struct rpc_timer { + struct timer_list timer; + struct list_head list; + unsigned long expires; +}; + /* * RPC synchronization objects */ @@ -204,6 +192,7 @@ struct rpc_wait_queue { unsigned char count; /* # task groups remaining serviced so far */ unsigned char nr; /* # tasks remaining for cookie */ unsigned short qlen; /* total # tasks waiting in queue */ + struct rpc_timer timer_list; #ifdef RPC_DEBUG const char * name; #endif @@ -229,9 +218,11 @@ void rpc_killall_tasks(struct rpc_clnt *); void rpc_execute(struct rpc_task *); void rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *); void rpc_init_wait_queue(struct rpc_wait_queue *, const char *); +void rpc_destroy_wait_queue(struct rpc_wait_queue *); void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *, - rpc_action action, rpc_action timer); -void rpc_wake_up_task(struct rpc_task *); + rpc_action action); +void rpc_wake_up_queued_task(struct rpc_wait_queue *, + struct rpc_task *); void rpc_wake_up(struct rpc_wait_queue *); struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *); void rpc_wake_up_status(struct rpc_wait_queue *, int); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index b3ff9a8..4d80a11 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -86,6 +86,10 @@ struct rpc_rqst { unsigned long rq_majortimeo; /* major timeout alarm */ unsigned long rq_timeout; /* Current timeout value */ unsigned int rq_retries; /* # of retries */ + unsigned int rq_connect_cookie; + /* A cookie used to track the + state of the transport + connection */ /* * Partial send handling @@ -152,6 +156,9 @@ struct rpc_xprt { unsigned long connect_timeout, bind_timeout, reestablish_timeout; + unsigned int connect_cookie; /* A cookie that gets bumped + every time the transport + is reconnected */ /* * Disconnection of idle transports @@ -232,7 +239,7 @@ int xprt_unregister_transport(struct xprt_class *type); void xprt_set_retrans_timeout_def(struct rpc_task *task); void xprt_set_retrans_timeout_rtt(struct rpc_task *task); void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status); -void xprt_wait_for_buffer_space(struct rpc_task *task); +void xprt_wait_for_buffer_space(struct rpc_task *task, rpc_action action); void xprt_write_space(struct rpc_xprt *xprt); void xprt_update_rtt(struct rpc_task *task); void xprt_adjust_cwnd(struct rpc_task *task, int result); @@ -241,6 +248,7 @@ void xprt_complete_rqst(struct rpc_task *task, int copied); void xprt_release_rqst_cong(struct rpc_task *task); void xprt_disconnect_done(struct rpc_xprt *xprt); void xprt_force_disconnect(struct rpc_xprt *xprt); +void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); /* * Reserved bit positions in xprt->state diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 92e1dbe..5369aa3 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -8,7 +8,7 @@ obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ - auth.o auth_null.o auth_unix.o \ + auth.o auth_null.o auth_unix.o auth_generic.o \ svc.o svcsock.o svcauth.o svcauth_unix.o \ rpcb_clnt.o timer.o xdr.o \ sunrpc_syms.o cache.o rpc_pipe.o \ diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index eca941c..6bfea9e 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -11,6 +11,7 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/errno.h> +#include <linux/hash.h> #include <linux/sunrpc/clnt.h> #include <linux/spinlock.h> @@ -219,6 +220,9 @@ rpcauth_destroy_credcache(struct rpc_auth *auth) } EXPORT_SYMBOL_GPL(rpcauth_destroy_credcache); + +#define RPC_AUTH_EXPIRY_MORATORIUM (60 * HZ) + /* * Remove stale credentials. Avoid sleeping inside the loop. */ @@ -227,6 +231,7 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) { spinlock_t *cache_lock; struct rpc_cred *cred; + unsigned long expired = jiffies - RPC_AUTH_EXPIRY_MORATORIUM; while (!list_empty(&cred_unused)) { cred = list_entry(cred_unused.next, struct rpc_cred, cr_lru); @@ -234,6 +239,10 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) number_cred_unused--; if (atomic_read(&cred->cr_count) != 0) continue; + /* Enforce a 5 second garbage collection moratorium */ + if (time_in_range(cred->cr_expire, expired, jiffies) && + test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0) + continue; cache_lock = &cred->cr_auth->au_credcache->lock; spin_lock(cache_lock); if (atomic_read(&cred->cr_count) == 0) { @@ -280,10 +289,9 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, struct hlist_node *pos; struct rpc_cred *cred = NULL, *entry, *new; - int nr = 0; + unsigned int nr; - if (!(flags & RPCAUTH_LOOKUP_ROOTCREDS)) - nr = acred->uid & RPC_CREDCACHE_MASK; + nr = hash_long(acred->uid, RPC_CREDCACHE_HASHBITS); rcu_read_lock(); hlist_for_each_entry_rcu(entry, pos, &cache->hashtable[nr], cr_hash) { @@ -356,7 +364,6 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags) put_group_info(acred.group_info); return ret; } -EXPORT_SYMBOL_GPL(rpcauth_lookupcred); void rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, @@ -375,41 +382,58 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, } EXPORT_SYMBOL_GPL(rpcauth_init_cred); -struct rpc_cred * -rpcauth_bindcred(struct rpc_task *task) +void +rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred) +{ + task->tk_msg.rpc_cred = get_rpccred(cred); + dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid, + cred->cr_auth->au_ops->au_name, cred); +} +EXPORT_SYMBOL_GPL(rpcauth_generic_bind_cred); + +static void +rpcauth_bind_root_cred(struct rpc_task *task) { struct rpc_auth *auth = task->tk_client->cl_auth; struct auth_cred acred = { - .uid = current->fsuid, - .gid = current->fsgid, - .group_info = current->group_info, + .uid = 0, + .gid = 0, }; struct rpc_cred *ret; - int flags = 0; dprintk("RPC: %5u looking up %s cred\n", task->tk_pid, task->tk_client->cl_auth->au_ops->au_name); - get_group_info(acred.group_info); - if (task->tk_flags & RPC_TASK_ROOTCREDS) - flags |= RPCAUTH_LOOKUP_ROOTCREDS; - ret = auth->au_ops->lookup_cred(auth, &acred, flags); + ret = auth->au_ops->lookup_cred(auth, &acred, 0); + if (!IS_ERR(ret)) + task->tk_msg.rpc_cred = ret; + else + task->tk_status = PTR_ERR(ret); +} + +static void +rpcauth_bind_new_cred(struct rpc_task *task) +{ + struct rpc_auth *auth = task->tk_client->cl_auth; + struct rpc_cred *ret; + + dprintk("RPC: %5u looking up %s cred\n", + task->tk_pid, auth->au_ops->au_name); + ret = rpcauth_lookupcred(auth, 0); if (!IS_ERR(ret)) task->tk_msg.rpc_cred = ret; else task->tk_status = PTR_ERR(ret); - put_group_info(acred.group_info); - return ret; } void -rpcauth_holdcred(struct rpc_task *task) +rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags) { - struct rpc_cred *cred = task->tk_msg.rpc_cred; - if (cred != NULL) { - get_rpccred(cred); - dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid, - cred->cr_auth->au_ops->au_name, cred); - } + if (cred != NULL) + cred->cr_ops->crbind(task, cred); + else if (flags & RPC_TASK_ROOTCREDS) + rpcauth_bind_root_cred(task); + else + rpcauth_bind_new_cred(task); } void @@ -550,6 +574,7 @@ static struct shrinker rpc_cred_shrinker = { void __init rpcauth_init_module(void) { rpc_init_authunix(); + rpc_init_generic_auth(); register_shrinker(&rpc_cred_shrinker); } diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c new file mode 100644 index 0000000..d927d9f --- /dev/null +++ b/net/sunrpc/auth_generic.c @@ -0,0 +1,177 @@ +/* + * Generic RPC credential + * + * Copyright (C) 2008, Trond Myklebust <Trond.Myklebust@netapp.com> + */ + +#include <linux/err.h> +#include <linux/types.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/sunrpc/auth.h> +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/debug.h> +#include <linux/sunrpc/sched.h> + +#ifdef RPC_DEBUG +# define RPCDBG_FACILITY RPCDBG_AUTH +#endif + +#define RPC_ANONYMOUS_USERID ((uid_t)-2) +#define RPC_ANONYMOUS_GROUPID ((gid_t)-2) + +struct generic_cred { + struct rpc_cred gc_base; + struct auth_cred acred; +}; + +static struct rpc_auth generic_auth; +static struct rpc_cred_cache generic_cred_cache; +static const struct rpc_credops generic_credops; + +/* + * Public call interface + */ +struct rpc_cred *rpc_lookup_cred(void) +{ + return rpcauth_lookupcred(&generic_auth, 0); +} +EXPORT_SYMBOL_GPL(rpc_lookup_cred); + +/* + * Public call interface for looking up machine creds. + */ +struct rpc_cred *rpc_lookup_machine_cred(void) +{ + struct auth_cred acred = { + .uid = RPC_ANONYMOUS_USERID, + .gid = RPC_ANONYMOUS_GROUPID, + .machine_cred = 1, + }; + + dprintk("RPC: looking up machine cred\n"); + return generic_auth.au_ops->lookup_cred(&generic_auth, &acred, 0); +} +EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred); + +static void +generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred) +{ + struct rpc_auth *auth = task->tk_client->cl_auth; + struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred; + struct rpc_cred *ret; + + ret = auth->au_ops->lookup_cred(auth, acred, 0); + if (!IS_ERR(ret)) + task->tk_msg.rpc_cred = ret; + else + task->tk_status = PTR_ERR(ret); +} + +/* + * Lookup generic creds for current process + */ +static struct rpc_cred * +generic_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) +{ + return rpcauth_lookup_credcache(&generic_auth, acred, flags); +} + +static struct rpc_cred * +generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) +{ + struct generic_cred *gcred; + + gcred = kmalloc(sizeof(*gcred), GFP_KERNEL); + if (gcred == NULL) + return ERR_PTR(-ENOMEM); + + rpcauth_init_cred(&gcred->gc_base, acred, &generic_auth, &generic_credops); + gcred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; + + gcred->acred.uid = acred->uid; + gcred->acred.gid = acred->gid; + gcred->acred.group_info = acred->group_info; + if (gcred->acred.group_info != NULL) + get_group_info(gcred->acred.group_info); + gcred->acred.machine_cred = acred->machine_cred; + + dprintk("RPC: allocated %s cred %p for uid %d gid %d\n", + gcred->acred.machine_cred ? "machine" : "generic", + gcred, acred->uid, acred->gid); + return &gcred->gc_base; +} + +static void +generic_free_cred(struct rpc_cred *cred) +{ + struct generic_cred *gcred = container_of(cred, struct generic_cred, gc_base); + + dprintk("RPC: generic_free_cred %p\n", gcred); + if (gcred->acred.group_info != NULL) + put_group_info(gcred->acred.group_info); + kfree(gcred); +} + +static void +generic_free_cred_callback(struct rcu_head *head) +{ + struct rpc_cred *cred = container_of(head, struct rpc_cred, cr_rcu); + generic_free_cred(cred); +} + +static void +generic_destroy_cred(struct rpc_cred *cred) +{ + call_rcu(&cred->cr_rcu, generic_free_cred_callback); +} + +/* + * Match credentials against current process creds. + */ +static int +generic_match(struct auth_cred *acred, struct rpc_cred *cred, int flags) +{ + struct generic_cred *gcred = container_of(cred, struct generic_cred, gc_base); + + if (gcred->acred.uid != acred->uid || + gcred->acred.gid != acred->gid || + gcred->acred.group_info != acred->group_info || + gcred->acred.machine_cred != acred->machine_cred) + return 0; + return 1; +} + +void __init rpc_init_generic_auth(void) +{ + spin_lock_init(&generic_cred_cache.lock); +} + +void __exit rpc_destroy_generic_auth(void) +{ + rpcauth_clear_credcache(&generic_cred_cache); +} + +static struct rpc_cred_cache generic_cred_cache = { + {{ NULL, },}, +}; + +static const struct rpc_authops generic_auth_ops = { + .owner = THIS_MODULE, + .au_name = "Generic", + .lookup_cred = generic_lookup_cred, + .crcreate = generic_create_cred, +}; + +static struct rpc_auth generic_auth = { + .au_ops = &generic_auth_ops, + .au_count = ATOMIC_INIT(0), + .au_credcache = &generic_cred_cache, +}; + +static const struct rpc_credops generic_credops = { + .cr_name = "Generic cred", + .crdestroy = generic_destroy_cred, + .crbind = generic_bind_cred, + .crmatch = generic_match, +}; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 5828e5c..cc12d5f 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -114,27 +114,14 @@ static void gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx) { struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); - struct gss_cl_ctx *old; - old = gss_cred->gc_ctx; + if (!test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags)) + return; + gss_get_ctx(ctx); rcu_assign_pointer(gss_cred->gc_ctx, ctx); set_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); + smp_mb__before_clear_bit(); clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags); - if (old) - gss_put_ctx(old); -} - -static int -gss_cred_is_uptodate_ctx(struct rpc_cred *cred) -{ - struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); - int res = 0; - - rcu_read_lock(); - if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) && gss_cred->gc_ctx) - res = 1; - rcu_read_unlock(); - return res; } static const void * @@ -266,6 +253,7 @@ gss_release_msg(struct gss_upcall_msg *gss_msg) BUG_ON(!list_empty(&gss_msg->list)); if (gss_msg->ctx != NULL) gss_put_ctx(gss_msg->ctx); + rpc_destroy_wait_queue(&gss_msg->rpc_waitqueue); kfree(gss_msg); } @@ -339,7 +327,7 @@ gss_upcall_callback(struct rpc_task *task) spin_lock(&inode->i_lock); if (gss_msg->ctx) - gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_get_ctx(gss_msg->ctx)); + gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_msg->ctx); else task->tk_status = gss_msg->msg.errno; gss_cred->gc_upcall = NULL; @@ -370,9 +358,16 @@ gss_alloc_msg(struct gss_auth *gss_auth, uid_t uid) static struct gss_upcall_msg * gss_setup_upcall(struct rpc_clnt *clnt, struct gss_auth *gss_auth, struct rpc_cred *cred) { + struct gss_cred *gss_cred = container_of(cred, + struct gss_cred, gc_base); struct gss_upcall_msg *gss_new, *gss_msg; + uid_t uid = cred->cr_uid; - gss_new = gss_alloc_msg(gss_auth, cred->cr_uid); + /* Special case: rpc.gssd assumes that uid == 0 implies machine creds */ + if (gss_cred->gc_machine_cred != 0) + uid = 0; + + gss_new = gss_alloc_msg(gss_auth, uid); if (gss_new == NULL) return ERR_PTR(-ENOMEM); gss_msg = gss_add_msg(gss_auth, gss_new); @@ -408,13 +403,17 @@ gss_refresh_upcall(struct rpc_task *task) } spin_lock(&inode->i_lock); if (gss_cred->gc_upcall != NULL) - rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL, NULL); - else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) { + rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL); + else if (gss_msg->ctx != NULL) { + gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_msg->ctx); + gss_cred->gc_upcall = NULL; + rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno); + } else if (gss_msg->msg.errno >= 0) { task->tk_timeout = 0; gss_cred->gc_upcall = gss_msg; /* gss_upcall_callback will release the reference to gss_upcall_msg */ atomic_inc(&gss_msg->count); - rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback, NULL); + rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback); } else err = gss_msg->msg.errno; spin_unlock(&inode->i_lock); @@ -454,7 +453,7 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) schedule(); } if (gss_msg->ctx) - gss_cred_set_ctx(cred, gss_get_ctx(gss_msg->ctx)); + gss_cred_set_ctx(cred, gss_msg->ctx); else err = gss_msg->msg.errno; spin_unlock(&inode->i_lock); @@ -709,7 +708,7 @@ gss_destroying_context(struct rpc_cred *cred) struct rpc_task *task; if (gss_cred->gc_ctx == NULL || - gss_cred->gc_ctx->gc_proc == RPC_GSS_PROC_DESTROY) + test_and_clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0) return 0; gss_cred->gc_ctx->gc_proc = RPC_GSS_PROC_DESTROY; @@ -719,7 +718,7 @@ gss_destroying_context(struct rpc_cred *cred) * by the RPC call or by the put_rpccred() below */ get_rpccred(cred); - task = rpc_call_null(gss_auth->client, cred, RPC_TASK_ASYNC); + task = rpc_call_null(gss_auth->client, cred, RPC_TASK_ASYNC|RPC_TASK_SOFT); if (!IS_ERR(task)) rpc_put_task(task); @@ -817,6 +816,7 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) */ cred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_NEW; cred->gc_service = gss_auth->service; + cred->gc_machine_cred = acred->machine_cred; kref_get(&gss_auth->kref); return &cred->gc_base; @@ -843,17 +843,16 @@ gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags) { struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base); - /* - * If the searchflags have set RPCAUTH_LOOKUP_NEW, then - * we don't really care if the credential has expired or not, - * since the caller should be prepared to reinitialise it. - */ - if ((flags & RPCAUTH_LOOKUP_NEW) && test_bit(RPCAUTH_CRED_NEW, &rc->cr_flags)) + if (test_bit(RPCAUTH_CRED_NEW, &rc->cr_flags)) goto out; /* Don't match with creds that have expired. */ - if (gss_cred->gc_ctx && time_after(jiffies, gss_cred->gc_ctx->gc_expiry)) + if (time_after(jiffies, gss_cred->gc_ctx->gc_expiry)) + return 0; + if (!test_bit(RPCAUTH_CRED_UPTODATE, &rc->cr_flags)) return 0; out: + if (acred->machine_cred != gss_cred->gc_machine_cred) + return 0; return (rc->cr_uid == acred->uid); } @@ -917,16 +916,48 @@ out_put_ctx: return NULL; } +static int gss_renew_cred(struct rpc_task *task) +{ + struct rpc_cred *oldcred = task->tk_msg.rpc_cred; + struct gss_cred *gss_cred = container_of(oldcred, + struct gss_cred, + gc_base); + struct rpc_auth *auth = oldcred->cr_auth; + struct auth_cred acred = { + .uid = oldcred->cr_uid, + .machine_cred = gss_cred->gc_machine_cred, + }; + struct rpc_cred *new; + + new = gss_lookup_cred(auth, &acred, RPCAUTH_LOOKUP_NEW); + if (IS_ERR(new)) + return PTR_ERR(new); + task->tk_msg.rpc_cred = new; + put_rpccred(oldcred); + return 0; +} + /* * Refresh credentials. XXX - finish */ static int gss_refresh(struct rpc_task *task) { + struct rpc_cred *cred = task->tk_msg.rpc_cred; + int ret = 0; + + if (!test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) && + !test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags)) { + ret = gss_renew_cred(task); + if (ret < 0) + goto out; + cred = task->tk_msg.rpc_cred; + } - if (!gss_cred_is_uptodate_ctx(task->tk_msg.rpc_cred)) - return gss_refresh_upcall(task); - return 0; + if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags)) + ret = gss_refresh_upcall(task); +out: + return ret; } /* Dummy refresh routine: used only when destroying the context */ @@ -1286,9 +1317,7 @@ out: static const struct rpc_authops authgss_ops = { .owner = THIS_MODULE, .au_flavor = RPC_AUTH_GSS, -#ifdef RPC_DEBUG .au_name = "RPCSEC_GSS", -#endif .create = gss_create, .destroy = gss_destroy, .lookup_cred = gss_lookup_cred, @@ -1299,6 +1328,7 @@ static const struct rpc_credops gss_credops = { .cr_name = "AUTH_GSS", .crdestroy = gss_destroy_cred, .cr_init = gss_cred_init, + .crbind = rpcauth_generic_bind_cred, .crmatch = gss_match, .crmarshal = gss_marshal, .crrefresh = gss_refresh, @@ -1310,6 +1340,7 @@ static const struct rpc_credops gss_credops = { static const struct rpc_credops gss_nullops = { .cr_name = "AUTH_GSS", .crdestroy = gss_destroy_cred, + .crbind = rpcauth_generic_bind_cred, .crmatch = gss_match, .crmarshal = gss_marshal, .crrefresh = gss_refresh_null, diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index 537d0e8..c70dd7f 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -104,9 +104,7 @@ nul_validate(struct rpc_task *task, __be32 *p) const struct rpc_authops authnull_ops = { .owner = THIS_MODULE, .au_flavor = RPC_AUTH_NULL, -#ifdef RPC_DEBUG .au_name = "NULL", -#endif .create = nul_create, .destroy = nul_destroy, .lookup_cred = nul_lookup_cred, @@ -125,6 +123,7 @@ static const struct rpc_credops null_credops = { .cr_name = "AUTH_NULL", .crdestroy = nul_destroy_cred, + .crbind = rpcauth_generic_bind_cred, .crmatch = nul_match, .crmarshal = nul_marshal, .crrefresh = nul_refresh, diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 5ed91e5..44920b9 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -60,7 +60,8 @@ static struct rpc_cred * unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) { struct unx_cred *cred; - int i; + unsigned int groups = 0; + unsigned int i; dprintk("RPC: allocating UNIX cred for uid %d gid %d\n", acred->uid, acred->gid); @@ -70,21 +71,17 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops); cred->uc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; - if (flags & RPCAUTH_LOOKUP_ROOTCREDS) { - cred->uc_uid = 0; - cred->uc_gid = 0; - cred->uc_gids[0] = NOGROUP; - } else { - int groups = acred->group_info->ngroups; - if (groups > NFS_NGROUPS) - groups = NFS_NGROUPS; - - cred->uc_gid = acred->gid; - for (i = 0; i < groups; i++) - cred->uc_gids[i] = GROUP_AT(acred->group_info, i); - if (i < NFS_NGROUPS) - cred->uc_gids[i] = NOGROUP; - } + + if (acred->group_info != NULL) + groups = acred->group_info->ngroups; + if (groups > NFS_NGROUPS) + groups = NFS_NGROUPS; + + cred->uc_gid = acred->gid; + for (i = 0; i < groups; i++) + cred->uc_gids[i] = GROUP_AT(acred->group_info, i); + if (i < NFS_NGROUPS) + cred->uc_gids[i] = NOGROUP; return &cred->uc_base; } @@ -118,26 +115,21 @@ static int unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags) { struct unx_cred *cred = container_of(rcred, struct unx_cred, uc_base); - int i; + unsigned int groups = 0; + unsigned int i; - if (!(flags & RPCAUTH_LOOKUP_ROOTCREDS)) { - int groups; - if (cred->uc_uid != acred->uid - || cred->uc_gid != acred->gid) - return 0; + if (cred->uc_uid != acred->uid || cred->uc_gid != acred->gid) + return 0; + if (acred->group_info != NULL) groups = acred->group_info->ngroups; - if (groups > NFS_NGROUPS) - groups = NFS_NGROUPS; - for (i = 0; i < groups ; i++) - if (cred->uc_gids[i] != GROUP_AT(acred->group_info, i)) - return 0; - return 1; - } - return (cred->uc_uid == 0 - && cred->uc_gid == 0 - && cred->uc_gids[0] == (gid_t) NOGROUP); + if (groups > NFS_NGROUPS) + groups = NFS_NGROUPS; + for (i = 0; i < groups ; i++) + if (cred->uc_gids[i] != GROUP_AT(acred->group_info, i)) + return 0; + return 1; } /* @@ -218,9 +210,7 @@ void __init rpc_init_authunix(void) const struct rpc_authops authunix_ops = { .owner = THIS_MODULE, .au_flavor = RPC_AUTH_UNIX, -#ifdef RPC_DEBUG .au_name = "UNIX", -#endif .create = unx_create, .destroy = unx_destroy, .lookup_cred = unx_lookup_cred, @@ -245,6 +235,7 @@ static const struct rpc_credops unix_credops = { .cr_name = "AUTH_UNIX", .crdestroy = unx_destroy_cred, + .crbind = rpcauth_generic_bind_cred, .crmatch = unx_match, .crmarshal = unx_marshal, .crrefresh = unx_refresh, diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 7b96ff3..8945307 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -544,7 +544,7 @@ EXPORT_SYMBOL_GPL(rpc_run_task); * @msg: RPC call parameters * @flags: RPC call flags */ -int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) +int rpc_call_sync(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags) { struct rpc_task *task; struct rpc_task_setup task_setup_data = { @@ -575,7 +575,7 @@ EXPORT_SYMBOL_GPL(rpc_call_sync); * @data: user call data */ int -rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags, +rpc_call_async(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags, const struct rpc_call_ops *tk_ops, void *data) { struct rpc_task *task; @@ -1062,7 +1062,7 @@ call_transmit(struct rpc_task *task) if (task->tk_msg.rpc_proc->p_decode != NULL) return; task->tk_action = rpc_exit_task; - rpc_wake_up_task(task); + rpc_wake_up_queued_task(&task->tk_xprt->pending, task); } /* @@ -1116,7 +1116,8 @@ call_status(struct rpc_task *task) case -ETIMEDOUT: task->tk_action = call_timeout; if (task->tk_client->cl_discrtry) - xprt_force_disconnect(task->tk_xprt); + xprt_conditional_disconnect(task->tk_xprt, + req->rq_connect_cookie); break; case -ECONNREFUSED: case -ENOTCONN: @@ -1168,6 +1169,11 @@ call_timeout(struct rpc_task *task) clnt->cl_protname, clnt->cl_server); } rpc_force_rebind(clnt); + /* + * Did our request time out due to an RPCSEC_GSS out-of-sequence + * event? RFC2203 requires the server to drop all such requests. + */ + rpcauth_invalcred(task); retry: clnt->cl_stats->rpcretrans++; @@ -1195,18 +1201,6 @@ call_decode(struct rpc_task *task) task->tk_flags &= ~RPC_CALL_MAJORSEEN; } - if (task->tk_status < 12) { - if (!RPC_IS_SOFT(task)) { - task->tk_action = call_bind; - clnt->cl_stats->rpcretrans++; - goto out_retry; - } - dprintk("RPC: %s: too small RPC reply size (%d bytes)\n", - clnt->cl_protname, task->tk_status); - task->tk_action = call_timeout; - goto out_retry; - } - /* * Ensure that we see all writes made by xprt_complete_rqst() * before it changed req->rq_received. @@ -1218,6 +1212,18 @@ call_decode(struct rpc_task *task) WARN_ON(memcmp(&req->rq_rcv_buf, &req->rq_private_buf, sizeof(req->rq_rcv_buf)) != 0); + if (req->rq_rcv_buf.len < 12) { + if (!RPC_IS_SOFT(task)) { + task->tk_action = call_bind; + clnt->cl_stats->rpcretrans++; + goto out_retry; + } + dprintk("RPC: %s: too small RPC reply size (%d bytes)\n", + clnt->cl_protname, task->tk_status); + task->tk_action = call_timeout; + goto out_retry; + } + /* Verify the RPC header */ p = call_verify(task); if (IS_ERR(p)) { @@ -1236,10 +1242,14 @@ call_decode(struct rpc_task *task) task->tk_status); return; out_retry: - req->rq_received = req->rq_private_buf.len = 0; task->tk_status = 0; - if (task->tk_client->cl_discrtry) - xprt_force_disconnect(task->tk_xprt); + /* Note: call_verify() may have freed the RPC slot */ + if (task->tk_rqstp == req) { + req->rq_received = req->rq_rcv_buf.len = 0; + if (task->tk_client->cl_discrtry) + xprt_conditional_disconnect(task->tk_xprt, + req->rq_connect_cookie); + } } /* @@ -1531,7 +1541,7 @@ void rpc_show_tasks(void) proc = -1; if (RPC_IS_QUEUED(t)) - rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq); + rpc_waitq = rpc_qname(t->tk_waitqueue); printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n", t->tk_pid, proc, diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 56aa018..0517967 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -298,7 +298,7 @@ void rpcb_getport_async(struct rpc_task *task) /* Put self on queue before sending rpcbind request, in case * rpcb_getport_done completes before we return from rpc_run_task */ - rpc_sleep_on(&xprt->binding, task, NULL, NULL); + rpc_sleep_on(&xprt->binding, task, NULL); /* Someone else may have bound if we slept */ if (xprt_bound(xprt)) { diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 4c66912..6eab9bf 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -38,9 +38,9 @@ static struct kmem_cache *rpc_buffer_slabp __read_mostly; static mempool_t *rpc_task_mempool __read_mostly; static mempool_t *rpc_buffer_mempool __read_mostly; -static void __rpc_default_timer(struct rpc_task *task); static void rpc_async_schedule(struct work_struct *); static void rpc_release_task(struct rpc_task *task); +static void __rpc_queue_timer_fn(unsigned long ptr); /* * RPC tasks sit here while waiting for conditions to improve. @@ -57,41 +57,30 @@ struct workqueue_struct *rpciod_workqueue; * queue->lock and bh_disabled in order to avoid races within * rpc_run_timer(). */ -static inline void -__rpc_disable_timer(struct rpc_task *task) +static void +__rpc_disable_timer(struct rpc_wait_queue *queue, struct rpc_task *task) { + if (task->tk_timeout == 0) + return; dprintk("RPC: %5u disabling timer\n", task->tk_pid); - task->tk_timeout_fn = NULL; task->tk_timeout = 0; + list_del(&task->u.tk_wait.timer_list); + if (list_empty(&queue->timer_list.list)) + del_timer(&queue->timer_list.timer); } -/* - * Run a timeout function. - * We use the callback in order to allow __rpc_wake_up_task() - * and friends to disable the timer synchronously on SMP systems - * without calling del_timer_sync(). The latter could cause a - * deadlock if called while we're holding spinlocks... - */ -static void rpc_run_timer(struct rpc_task *task) +static void +rpc_set_queue_timer(struct rpc_wait_queue *queue, unsigned long expires) { - void (*callback)(struct rpc_task *); - - callback = task->tk_timeout_fn; - task->tk_timeout_fn = NULL; - if (callback && RPC_IS_QUEUED(task)) { - dprintk("RPC: %5u running timer\n", task->tk_pid); - callback(task); - } - smp_mb__before_clear_bit(); - clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate); - smp_mb__after_clear_bit(); + queue->timer_list.expires = expires; + mod_timer(&queue->timer_list.timer, expires); } /* * Set up a timer for the current task. */ -static inline void -__rpc_add_timer(struct rpc_task *task, rpc_action timer) +static void +__rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task) { if (!task->tk_timeout) return; @@ -99,27 +88,10 @@ __rpc_add_timer(struct rpc_task *task, rpc_action timer) dprintk("RPC: %5u setting alarm for %lu ms\n", task->tk_pid, task->tk_timeout * 1000 / HZ); - if (timer) - task->tk_timeout_fn = timer; - else - task->tk_timeout_fn = __rpc_default_timer; - set_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate); - mod_timer(&task->tk_timer, jiffies + task->tk_timeout); -} - -/* - * Delete any timer for the current task. Because we use del_timer_sync(), - * this function should never be called while holding queue->lock. - */ -static void -rpc_delete_timer(struct rpc_task *task) -{ - if (RPC_IS_QUEUED(task)) - return; - if (test_and_clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate)) { - del_singleshot_timer_sync(&task->tk_timer); - dprintk("RPC: %5u deleting timer\n", task->tk_pid); - } + task->u.tk_wait.expires = jiffies + task->tk_timeout; + if (list_empty(&queue->timer_list.list) || time_before(task->u.tk_wait.expires, queue->timer_list.expires)) + rpc_set_queue_timer(queue, task->u.tk_wait.expires); + list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list); } /* @@ -161,7 +133,7 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task * list_add(&task->u.tk_wait.list, &queue->tasks[0]); else list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]); - task->u.tk_wait.rpc_waitq = queue; + task->tk_waitqueue = queue; queue->qlen++; rpc_set_queued(task); @@ -181,22 +153,18 @@ static void __rpc_remove_wait_queue_priority(struct rpc_task *task) list_move(&t->u.tk_wait.list, &task->u.tk_wait.list); list_splice_init(&task->u.tk_wait.links, &t->u.tk_wait.links); } - list_del(&task->u.tk_wait.list); } /* * Remove request from queue. * Note: must be called with spin lock held. */ -static void __rpc_remove_wait_queue(struct rpc_task *task) +static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task) { - struct rpc_wait_queue *queue; - queue = task->u.tk_wait.rpc_waitq; - + __rpc_disable_timer(queue, task); if (RPC_IS_PRIORITY(queue)) __rpc_remove_wait_queue_priority(task); - else - list_del(&task->u.tk_wait.list); + list_del(&task->u.tk_wait.list); queue->qlen--; dprintk("RPC: %5u removed from queue %p \"%s\"\n", task->tk_pid, queue, rpc_qname(queue)); @@ -229,6 +197,9 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c INIT_LIST_HEAD(&queue->tasks[i]); queue->maxpriority = nr_queues - 1; rpc_reset_waitqueue_priority(queue); + queue->qlen = 0; + setup_timer(&queue->timer_list.timer, __rpc_queue_timer_fn, (unsigned long)queue); + INIT_LIST_HEAD(&queue->timer_list.list); #ifdef RPC_DEBUG queue->name = qname; #endif @@ -245,6 +216,12 @@ void rpc_init_wait_queue(struct rpc_wait_queue *queue, const char *qname) } EXPORT_SYMBOL_GPL(rpc_init_wait_queue); +void rpc_destroy_wait_queue(struct rpc_wait_queue *queue) +{ + del_timer_sync(&queue->timer_list.timer); +} +EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue); + static int rpc_wait_bit_killable(void *word) { if (fatal_signal_pending(current)) @@ -313,7 +290,6 @@ EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task); */ static void rpc_make_runnable(struct rpc_task *task) { - BUG_ON(task->tk_timeout_fn); rpc_clear_queued(task); if (rpc_test_and_set_running(task)) return; @@ -326,7 +302,7 @@ static void rpc_make_runnable(struct rpc_task *task) int status; INIT_WORK(&task->u.tk_work, rpc_async_schedule); - status = queue_work(task->tk_workqueue, &task->u.tk_work); + status = queue_work(rpciod_workqueue, &task->u.tk_work); if (status < 0) { printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status); task->tk_status = status; @@ -343,7 +319,7 @@ static void rpc_make_runnable(struct rpc_task *task) * as it's on a wait queue. */ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, - rpc_action action, rpc_action timer) + rpc_action action) { dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n", task->tk_pid, rpc_qname(q), jiffies); @@ -357,11 +333,11 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, BUG_ON(task->tk_callback != NULL); task->tk_callback = action; - __rpc_add_timer(task, timer); + __rpc_add_timer(q, task); } void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, - rpc_action action, rpc_action timer) + rpc_action action) { /* Mark the task as being activated if so needed */ rpc_set_active(task); @@ -370,18 +346,19 @@ void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, * Protect the queue operations. */ spin_lock_bh(&q->lock); - __rpc_sleep_on(q, task, action, timer); + __rpc_sleep_on(q, task, action); spin_unlock_bh(&q->lock); } EXPORT_SYMBOL_GPL(rpc_sleep_on); /** * __rpc_do_wake_up_task - wake up a single rpc_task + * @queue: wait queue * @task: task to be woken up * * Caller must hold queue->lock, and have cleared the task queued flag. */ -static void __rpc_do_wake_up_task(struct rpc_task *task) +static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task *task) { dprintk("RPC: %5u __rpc_wake_up_task (now %lu)\n", task->tk_pid, jiffies); @@ -395,8 +372,7 @@ static void __rpc_do_wake_up_task(struct rpc_task *task) return; } - __rpc_disable_timer(task); - __rpc_remove_wait_queue(task); + __rpc_remove_wait_queue(queue, task); rpc_make_runnable(task); @@ -404,48 +380,32 @@ static void __rpc_do_wake_up_task(struct rpc_task *task) } /* - * Wake up the specified task + * Wake up a queued task while the queue lock is being held */ -static void __rpc_wake_up_task(struct rpc_task *task) +static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct rpc_task *task) { - if (rpc_start_wakeup(task)) { - if (RPC_IS_QUEUED(task)) - __rpc_do_wake_up_task(task); - rpc_finish_wakeup(task); - } + if (RPC_IS_QUEUED(task) && task->tk_waitqueue == queue) + __rpc_do_wake_up_task(queue, task); } /* - * Default timeout handler if none specified by user + * Wake up a task on a specific queue */ -static void -__rpc_default_timer(struct rpc_task *task) +void rpc_wake_up_queued_task(struct rpc_wait_queue *queue, struct rpc_task *task) { - dprintk("RPC: %5u timeout (default timer)\n", task->tk_pid); - task->tk_status = -ETIMEDOUT; - rpc_wake_up_task(task); + spin_lock_bh(&queue->lock); + rpc_wake_up_task_queue_locked(queue, task); + spin_unlock_bh(&queue->lock); } +EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task); /* * Wake up the specified task */ -void rpc_wake_up_task(struct rpc_task *task) +static void rpc_wake_up_task(struct rpc_task *task) { - rcu_read_lock_bh(); - if (rpc_start_wakeup(task)) { - if (RPC_IS_QUEUED(task)) { - struct rpc_wait_queue *queue = task->u.tk_wait.rpc_waitq; - - /* Note: we're already in a bh-safe context */ - spin_lock(&queue->lock); - __rpc_do_wake_up_task(task); - spin_unlock(&queue->lock); - } - rpc_finish_wakeup(task); - } - rcu_read_unlock_bh(); + rpc_wake_up_queued_task(task->tk_waitqueue, task); } -EXPORT_SYMBOL_GPL(rpc_wake_up_task); /* * Wake up the next task on a priority queue. @@ -495,7 +455,7 @@ new_queue: new_owner: rpc_set_waitqueue_owner(queue, task->tk_owner); out: - __rpc_wake_up_task(task); + rpc_wake_up_task_queue_locked(queue, task); return task; } @@ -508,16 +468,14 @@ struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue) dprintk("RPC: wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue)); - rcu_read_lock_bh(); - spin_lock(&queue->lock); + spin_lock_bh(&queue->lock); if (RPC_IS_PRIORITY(queue)) task = __rpc_wake_up_next_priority(queue); else { task_for_first(task, &queue->tasks[0]) - __rpc_wake_up_task(task); + rpc_wake_up_task_queue_locked(queue, task); } - spin_unlock(&queue->lock); - rcu_read_unlock_bh(); + spin_unlock_bh(&queue->lock); return task; } @@ -534,18 +492,16 @@ void rpc_wake_up(struct rpc_wait_queue *queue) struct rpc_task *task, *next; struct list_head *head; - rcu_read_lock_bh(); - spin_lock(&queue->lock); + spin_lock_bh(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { list_for_each_entry_safe(task, next, head, u.tk_wait.list) - __rpc_wake_up_task(task); + rpc_wake_up_task_queue_locked(queue, task); if (head == &queue->tasks[0]) break; head--; } - spin_unlock(&queue->lock); - rcu_read_unlock_bh(); + spin_unlock_bh(&queue->lock); } EXPORT_SYMBOL_GPL(rpc_wake_up); @@ -561,26 +517,48 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) struct rpc_task *task, *next; struct list_head *head; - rcu_read_lock_bh(); - spin_lock(&queue->lock); + spin_lock_bh(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { list_for_each_entry_safe(task, next, head, u.tk_wait.list) { task->tk_status = status; - __rpc_wake_up_task(task); + rpc_wake_up_task_queue_locked(queue, task); } if (head == &queue->tasks[0]) break; head--; } - spin_unlock(&queue->lock); - rcu_read_unlock_bh(); + spin_unlock_bh(&queue->lock); } EXPORT_SYMBOL_GPL(rpc_wake_up_status); +static void __rpc_queue_timer_fn(unsigned long ptr) +{ + struct rpc_wait_queue *queue = (struct rpc_wait_queue *)ptr; + struct rpc_task *task, *n; + unsigned long expires, now, timeo; + + spin_lock(&queue->lock); + expires = now = jiffies; + list_for_each_entry_safe(task, n, &queue->timer_list.list, u.tk_wait.timer_list) { + timeo = task->u.tk_wait.expires; + if (time_after_eq(now, timeo)) { + dprintk("RPC: %5u timeout\n", task->tk_pid); + task->tk_status = -ETIMEDOUT; + rpc_wake_up_task_queue_locked(queue, task); + continue; + } + if (expires == now || time_after(expires, timeo)) + expires = timeo; + } + if (!list_empty(&queue->timer_list.list)) + rpc_set_queue_timer(queue, expires); + spin_unlock(&queue->lock); +} + static void __rpc_atrun(struct rpc_task *task) { - rpc_wake_up_task(task); + task->tk_status = 0; } /* @@ -589,7 +567,7 @@ static void __rpc_atrun(struct rpc_task *task) void rpc_delay(struct rpc_task *task, unsigned long delay) { task->tk_timeout = delay; - rpc_sleep_on(&delay_queue, task, NULL, __rpc_atrun); + rpc_sleep_on(&delay_queue, task, __rpc_atrun); } EXPORT_SYMBOL_GPL(rpc_delay); @@ -644,10 +622,6 @@ static void __rpc_execute(struct rpc_task *task) BUG_ON(RPC_IS_QUEUED(task)); for (;;) { - /* - * Garbage collection of pending timers... - */ - rpc_delete_timer(task); /* * Execute any pending callback. @@ -816,8 +790,6 @@ EXPORT_SYMBOL_GPL(rpc_free); static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *task_setup_data) { memset(task, 0, sizeof(*task)); - setup_timer(&task->tk_timer, (void (*)(unsigned long))rpc_run_timer, - (unsigned long)task); atomic_set(&task->tk_count, 1); task->tk_flags = task_setup_data->flags; task->tk_ops = task_setup_data->callback_ops; @@ -832,7 +804,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta task->tk_owner = current->tgid; /* Initialize workqueue for async tasks */ - task->tk_workqueue = rpciod_workqueue; + task->tk_workqueue = task_setup_data->workqueue; task->tk_client = task_setup_data->rpc_client; if (task->tk_client != NULL) { @@ -845,12 +817,11 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta task->tk_action = rpc_prepare_task; if (task_setup_data->rpc_message != NULL) { - memcpy(&task->tk_msg, task_setup_data->rpc_message, sizeof(task->tk_msg)); + task->tk_msg.rpc_proc = task_setup_data->rpc_message->rpc_proc; + task->tk_msg.rpc_argp = task_setup_data->rpc_message->rpc_argp; + task->tk_msg.rpc_resp = task_setup_data->rpc_message->rpc_resp; /* Bind the user cred */ - if (task->tk_msg.rpc_cred != NULL) - rpcauth_holdcred(task); - else - rpcauth_bindcred(task); + rpcauth_bindcred(task, task_setup_data->rpc_message->rpc_cred, task_setup_data->flags); if (task->tk_action == NULL) rpc_call_start(task); } @@ -868,13 +839,6 @@ rpc_alloc_task(void) return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS); } -static void rpc_free_task(struct rcu_head *rcu) -{ - struct rpc_task *task = container_of(rcu, struct rpc_task, u.tk_rcu); - dprintk("RPC: %5u freeing task\n", task->tk_pid); - mempool_free(task, rpc_task_mempool); -} - /* * Create a new task for the specified client. */ @@ -898,12 +862,25 @@ out: return task; } - -void rpc_put_task(struct rpc_task *task) +static void rpc_free_task(struct rpc_task *task) { const struct rpc_call_ops *tk_ops = task->tk_ops; void *calldata = task->tk_calldata; + if (task->tk_flags & RPC_TASK_DYNAMIC) { + dprintk("RPC: %5u freeing task\n", task->tk_pid); + mempool_free(task, rpc_task_mempool); + } + rpc_release_calldata(tk_ops, calldata); +} + +static void rpc_async_release(struct work_struct *work) +{ + rpc_free_task(container_of(work, struct rpc_task, u.tk_work)); +} + +void rpc_put_task(struct rpc_task *task) +{ if (!atomic_dec_and_test(&task->tk_count)) return; /* Release resources */ @@ -915,9 +892,11 @@ void rpc_put_task(struct rpc_task *task) rpc_release_client(task->tk_client); task->tk_client = NULL; } - if (task->tk_flags & RPC_TASK_DYNAMIC) - call_rcu_bh(&task->u.tk_rcu, rpc_free_task); - rpc_release_calldata(tk_ops, calldata); + if (task->tk_workqueue != NULL) { + INIT_WORK(&task->u.tk_work, rpc_async_release); + queue_work(task->tk_workqueue, &task->u.tk_work); + } else + rpc_free_task(task); } EXPORT_SYMBOL_GPL(rpc_put_task); @@ -937,9 +916,6 @@ static void rpc_release_task(struct rpc_task *task) } BUG_ON (RPC_IS_QUEUED(task)); - /* Synchronously delete any running timer */ - rpc_delete_timer(task); - #ifdef RPC_DEBUG task->tk_magic = 0; #endif @@ -1029,11 +1005,20 @@ rpc_destroy_mempool(void) kmem_cache_destroy(rpc_task_slabp); if (rpc_buffer_slabp) kmem_cache_destroy(rpc_buffer_slabp); + rpc_destroy_wait_queue(&delay_queue); } int rpc_init_mempool(void) { + /* + * The following is not strictly a mempool initialisation, + * but there is no harm in doing it here + */ + rpc_init_wait_queue(&delay_queue, "delayq"); + if (!rpciod_start()) + goto err_nomem; + rpc_task_slabp = kmem_cache_create("rpc_tasks", sizeof(struct rpc_task), 0, SLAB_HWCACHE_ALIGN, @@ -1054,13 +1039,6 @@ rpc_init_mempool(void) rpc_buffer_slabp); if (!rpc_buffer_mempool) goto err_nomem; - if (!rpciod_start()) - goto err_nomem; - /* - * The following is not strictly a mempool initialisation, - * but there is no harm in doing it here - */ - rpc_init_wait_queue(&delay_queue, "delayq"); return 0; err_nomem: rpc_destroy_mempool(); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index d5553b8..75d748e 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -188,9 +188,9 @@ out_sleep: task->tk_timeout = 0; task->tk_status = -EAGAIN; if (req && req->rq_ntrans) - rpc_sleep_on(&xprt->resend, task, NULL, NULL); + rpc_sleep_on(&xprt->resend, task, NULL); else - rpc_sleep_on(&xprt->sending, task, NULL, NULL); + rpc_sleep_on(&xprt->sending, task, NULL); return 0; } EXPORT_SYMBOL_GPL(xprt_reserve_xprt); @@ -238,9 +238,9 @@ out_sleep: task->tk_timeout = 0; task->tk_status = -EAGAIN; if (req && req->rq_ntrans) - rpc_sleep_on(&xprt->resend, task, NULL, NULL); + rpc_sleep_on(&xprt->resend, task, NULL); else - rpc_sleep_on(&xprt->sending, task, NULL, NULL); + rpc_sleep_on(&xprt->sending, task, NULL); return 0; } EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong); @@ -447,13 +447,13 @@ EXPORT_SYMBOL_GPL(xprt_wake_pending_tasks); * @task: task to be put to sleep * */ -void xprt_wait_for_buffer_space(struct rpc_task *task) +void xprt_wait_for_buffer_space(struct rpc_task *task, rpc_action action) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; task->tk_timeout = req->rq_timeout; - rpc_sleep_on(&xprt->pending, task, NULL, NULL); + rpc_sleep_on(&xprt->pending, task, action); } EXPORT_SYMBOL_GPL(xprt_wait_for_buffer_space); @@ -472,7 +472,7 @@ void xprt_write_space(struct rpc_xprt *xprt) if (xprt->snd_task) { dprintk("RPC: write space: waking waiting task on " "xprt %p\n", xprt); - rpc_wake_up_task(xprt->snd_task); + rpc_wake_up_queued_task(&xprt->pending, xprt->snd_task); } spin_unlock_bh(&xprt->transport_lock); } @@ -602,11 +602,37 @@ void xprt_force_disconnect(struct rpc_xprt *xprt) /* Try to schedule an autoclose RPC call */ if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) queue_work(rpciod_workqueue, &xprt->task_cleanup); - else if (xprt->snd_task != NULL) - rpc_wake_up_task(xprt->snd_task); + xprt_wake_pending_tasks(xprt, -ENOTCONN); + spin_unlock_bh(&xprt->transport_lock); +} + +/** + * xprt_conditional_disconnect - force a transport to disconnect + * @xprt: transport to disconnect + * @cookie: 'connection cookie' + * + * This attempts to break the connection if and only if 'cookie' matches + * the current transport 'connection cookie'. It ensures that we don't + * try to break the connection more than once when we need to retransmit + * a batch of RPC requests. + * + */ +void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie) +{ + /* Don't race with the test_bit() in xprt_clear_locked() */ + spin_lock_bh(&xprt->transport_lock); + if (cookie != xprt->connect_cookie) + goto out; + if (test_bit(XPRT_CLOSING, &xprt->state) || !xprt_connected(xprt)) + goto out; + set_bit(XPRT_CLOSE_WAIT, &xprt->state); + /* Try to schedule an autoclose RPC call */ + if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) + queue_work(rpciod_workqueue, &xprt->task_cleanup); + xprt_wake_pending_tasks(xprt, -ENOTCONN); +out: spin_unlock_bh(&xprt->transport_lock); } -EXPORT_SYMBOL_GPL(xprt_force_disconnect); static void xprt_init_autodisconnect(unsigned long data) @@ -653,7 +679,7 @@ void xprt_connect(struct rpc_task *task) task->tk_rqstp->rq_bytes_sent = 0; task->tk_timeout = xprt->connect_timeout; - rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL); + rpc_sleep_on(&xprt->pending, task, xprt_connect_status); xprt->stat.connect_start = jiffies; xprt->ops->connect(task); } @@ -749,18 +775,20 @@ EXPORT_SYMBOL_GPL(xprt_update_rtt); void xprt_complete_rqst(struct rpc_task *task, int copied) { struct rpc_rqst *req = task->tk_rqstp; + struct rpc_xprt *xprt = req->rq_xprt; dprintk("RPC: %5u xid %08x complete (%d bytes received)\n", task->tk_pid, ntohl(req->rq_xid), copied); - task->tk_xprt->stat.recvs++; + xprt->stat.recvs++; task->tk_rtt = (long)jiffies - req->rq_xtime; list_del_init(&req->rq_list); + req->rq_private_buf.len = copied; /* Ensure all writes are done before we update req->rq_received */ smp_wmb(); - req->rq_received = req->rq_private_buf.len = copied; - rpc_wake_up_task(task); + req->rq_received = copied; + rpc_wake_up_queued_task(&xprt->pending, task); } EXPORT_SYMBOL_GPL(xprt_complete_rqst); @@ -769,17 +797,17 @@ static void xprt_timer(struct rpc_task *task) struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; + if (task->tk_status != -ETIMEDOUT) + return; dprintk("RPC: %5u xprt_timer\n", task->tk_pid); - spin_lock(&xprt->transport_lock); + spin_lock_bh(&xprt->transport_lock); if (!req->rq_received) { if (xprt->ops->timer) xprt->ops->timer(task); - task->tk_status = -ETIMEDOUT; - } - task->tk_timeout = 0; - rpc_wake_up_task(task); - spin_unlock(&xprt->transport_lock); + } else + task->tk_status = 0; + spin_unlock_bh(&xprt->transport_lock); } /** @@ -849,6 +877,7 @@ void xprt_transmit(struct rpc_task *task) } else if (!req->rq_bytes_sent) return; + req->rq_connect_cookie = xprt->connect_cookie; status = xprt->ops->send_request(task); if (status == 0) { dprintk("RPC: %5u xmit complete\n", task->tk_pid); @@ -864,7 +893,7 @@ void xprt_transmit(struct rpc_task *task) if (!xprt_connected(xprt)) task->tk_status = -ENOTCONN; else if (!req->rq_received) - rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer); + rpc_sleep_on(&xprt->pending, task, xprt_timer); spin_unlock_bh(&xprt->transport_lock); return; } @@ -875,7 +904,7 @@ void xprt_transmit(struct rpc_task *task) */ task->tk_status = status; if (status == -ECONNREFUSED) - rpc_sleep_on(&xprt->sending, task, NULL, NULL); + rpc_sleep_on(&xprt->sending, task, NULL); } static inline void do_xprt_reserve(struct rpc_task *task) @@ -895,7 +924,7 @@ static inline void do_xprt_reserve(struct rpc_task *task) dprintk("RPC: waiting for request slot\n"); task->tk_status = -EAGAIN; task->tk_timeout = 0; - rpc_sleep_on(&xprt->backlog, task, NULL, NULL); + rpc_sleep_on(&xprt->backlog, task, NULL); } /** @@ -1052,6 +1081,11 @@ static void xprt_destroy(struct kref *kref) xprt->shutdown = 1; del_timer_sync(&xprt->timer); + rpc_destroy_wait_queue(&xprt->binding); + rpc_destroy_wait_queue(&xprt->pending); + rpc_destroy_wait_queue(&xprt->sending); + rpc_destroy_wait_queue(&xprt->resend); + rpc_destroy_wait_queue(&xprt->backlog); /* * Tear down transport state and free the rpc_xprt */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 613daf8..ddbe981 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -136,12 +136,6 @@ static ctl_table sunrpc_table[] = { #endif /* - * How many times to try sending a request on a socket before waiting - * for the socket buffer to clear. - */ -#define XS_SENDMSG_RETRY (10U) - -/* * Time out for an RPC UDP socket connect. UDP socket connects are * synchronous, but we set a timeout anyway in case of resource * exhaustion on the local host. @@ -516,6 +510,14 @@ out: return sent; } +static void xs_nospace_callback(struct rpc_task *task) +{ + struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt); + + transport->inet->sk_write_pending--; + clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); +} + /** * xs_nospace - place task on wait queue if transmit was incomplete * @task: task to put to sleep @@ -531,20 +533,27 @@ static void xs_nospace(struct rpc_task *task) task->tk_pid, req->rq_slen - req->rq_bytes_sent, req->rq_slen); - if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) { - /* Protect against races with write_space */ - spin_lock_bh(&xprt->transport_lock); - - /* Don't race with disconnect */ - if (!xprt_connected(xprt)) - task->tk_status = -ENOTCONN; - else if (test_bit(SOCK_NOSPACE, &transport->sock->flags)) - xprt_wait_for_buffer_space(task); + /* Protect against races with write_space */ + spin_lock_bh(&xprt->transport_lock); + + /* Don't race with disconnect */ + if (xprt_connected(xprt)) { + if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) { + /* + * Notify TCP that we're limited by the application + * window size + */ + set_bit(SOCK_NOSPACE, &transport->sock->flags); + transport->inet->sk_write_pending++; + /* ...and wait for more buffer space */ + xprt_wait_for_buffer_space(task, xs_nospace_callback); + } + } else { + clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); + task->tk_status = -ENOTCONN; + } - spin_unlock_bh(&xprt->transport_lock); - } else - /* Keep holding the socket if it is blocked */ - rpc_delay(task, HZ>>4); + spin_unlock_bh(&xprt->transport_lock); } /** @@ -588,19 +597,20 @@ static int xs_udp_send_request(struct rpc_task *task) } switch (status) { + case -EAGAIN: + xs_nospace(task); + break; case -ENETUNREACH: case -EPIPE: case -ECONNREFUSED: /* When the server has died, an ICMP port unreachable message * prompts ECONNREFUSED. */ - break; - case -EAGAIN: - xs_nospace(task); + clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); break; default: + clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); dprintk("RPC: sendmsg returned unrecognized error %d\n", -status); - break; } return status; @@ -650,7 +660,6 @@ static int xs_tcp_send_request(struct rpc_task *task) struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct xdr_buf *xdr = &req->rq_snd_buf; int status; - unsigned int retry = 0; xs_encode_tcp_record_marker(&req->rq_snd_buf); @@ -681,9 +690,10 @@ static int xs_tcp_send_request(struct rpc_task *task) return 0; } + if (status != 0) + continue; status = -EAGAIN; - if (retry++ > XS_SENDMSG_RETRY) - break; + break; } switch (status) { @@ -695,12 +705,13 @@ static int xs_tcp_send_request(struct rpc_task *task) case -ENOTCONN: case -EPIPE: status = -ENOTCONN; + clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); break; default: dprintk("RPC: sendmsg returned unrecognized error %d\n", -status); + clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); xs_tcp_shutdown(xprt); - break; } return status; @@ -1073,6 +1084,7 @@ static void xs_tcp_data_ready(struct sock *sk, int bytes) { struct rpc_xprt *xprt; read_descriptor_t rd_desc; + int read; dprintk("RPC: xs_tcp_data_ready...\n"); @@ -1084,8 +1096,10 @@ static void xs_tcp_data_ready(struct sock *sk, int bytes) /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ rd_desc.arg.data = xprt; - rd_desc.count = 65536; - tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); + do { + rd_desc.count = 65536; + read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); + } while (read > 0); out: read_unlock(&sk->sk_callback_lock); } @@ -1128,6 +1142,7 @@ static void xs_tcp_state_change(struct sock *sk) break; case TCP_FIN_WAIT1: /* The client initiated a shutdown of the socket */ + xprt->connect_cookie++; xprt->reestablish_timeout = 0; set_bit(XPRT_CLOSING, &xprt->state); smp_mb__before_clear_bit(); @@ -1140,6 +1155,7 @@ static void xs_tcp_state_change(struct sock *sk) set_bit(XPRT_CLOSING, &xprt->state); xprt_force_disconnect(xprt); case TCP_SYN_SENT: + xprt->connect_cookie++; case TCP_CLOSING: /* * If the server closed down the connection, make sure that @@ -1186,9 +1202,11 @@ static void xs_udp_write_space(struct sock *sk) if (unlikely(!(sock = sk->sk_socket))) goto out; + clear_bit(SOCK_NOSPACE, &sock->flags); + if (unlikely(!(xprt = xprt_from_sock(sk)))) goto out; - if (unlikely(!test_and_clear_bit(SOCK_NOSPACE, &sock->flags))) + if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0) goto out; xprt_write_space(xprt); @@ -1219,9 +1237,11 @@ static void xs_tcp_write_space(struct sock *sk) if (unlikely(!(sock = sk->sk_socket))) goto out; + clear_bit(SOCK_NOSPACE, &sock->flags); + if (unlikely(!(xprt = xprt_from_sock(sk)))) goto out; - if (unlikely(!test_and_clear_bit(SOCK_NOSPACE, &sock->flags))) + if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0) goto out; xprt_write_space(xprt); |