From 3c1818275cc65c6d53e0adfde0c989bfe89ab8d7 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 26 Jul 2017 10:14:55 -0400 Subject: NFS: Create NFS_ACCESS_* flags Passing the NFS v4 flags into the v3 code seems weird to me, even if they are defined to the same values. This patch adds in generic flags to help me feel better Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 5ceaeb1..80f397d 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2369,15 +2369,15 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) } EXPORT_SYMBOL_GPL(nfs_access_add_cache); -#define NFS_MAY_READ (NFS4_ACCESS_READ) -#define NFS_MAY_WRITE (NFS4_ACCESS_MODIFY | \ - NFS4_ACCESS_EXTEND | \ - NFS4_ACCESS_DELETE) -#define NFS_FILE_MAY_WRITE (NFS4_ACCESS_MODIFY | \ - NFS4_ACCESS_EXTEND) +#define NFS_MAY_READ (NFS_ACCESS_READ) +#define NFS_MAY_WRITE (NFS_ACCESS_MODIFY | \ + NFS_ACCESS_EXTEND | \ + NFS_ACCESS_DELETE) +#define NFS_FILE_MAY_WRITE (NFS_ACCESS_MODIFY | \ + NFS_ACCESS_EXTEND) #define NFS_DIR_MAY_WRITE NFS_MAY_WRITE -#define NFS_MAY_LOOKUP (NFS4_ACCESS_LOOKUP) -#define NFS_MAY_EXECUTE (NFS4_ACCESS_EXECUTE) +#define NFS_MAY_LOOKUP (NFS_ACCESS_LOOKUP) +#define NFS_MAY_EXECUTE (NFS_ACCESS_EXECUTE) static int nfs_access_calc_mask(u32 access_result, umode_t umode) { -- cgit v1.1 From 1750d929b08764dd293d5bdddaa9bc4d3f94d228 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 26 Jul 2017 12:00:21 -0400 Subject: NFS: Don't compare apples to elephants to determine access bits The NFS_ACCESS_* flags aren't a 1:1 mapping to the MAY_* flags, so checking for MAY_WHATEVER might have surprising results in nfs*_proc_access(). Let's simplify this check when determining which bits to ask for, and do it in a generic place instead of copying code for each NFS version. Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 11 ++++++++--- fs/nfs/nfs3proc.c | 17 +---------------- fs/nfs/nfs4proc.c | 19 +------------------ 3 files changed, 10 insertions(+), 37 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 80f397d..db482d4 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2425,9 +2425,14 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) if (!may_block) goto out; - /* Be clever: ask server to check for all possible rights */ - cache.mask = NFS_MAY_LOOKUP | NFS_MAY_EXECUTE - | NFS_MAY_WRITE | NFS_MAY_READ; + /* + * Determine which access bits we want to ask for... + */ + cache.mask = NFS_ACCESS_READ | NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND; + if (S_ISDIR(inode->i_mode)) + cache.mask |= NFS_ACCESS_DELETE | NFS_ACCESS_LOOKUP; + else + cache.mask |= NFS_ACCESS_EXECUTE; cache.cred = cred; status = NFS_PROTO(inode)->access(inode, &cache); if (status != 0) { diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index d1e87ec..44bf961 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -187,6 +187,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry) { struct nfs3_accessargs arg = { .fh = NFS_FH(inode), + .access = entry->mask, }; struct nfs3_accessres res; struct rpc_message msg = { @@ -195,25 +196,9 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry) .rpc_resp = &res, .rpc_cred = entry->cred, }; - int mode = entry->mask; int status = -ENOMEM; dprintk("NFS call access\n"); - - if (mode & MAY_READ) - arg.access |= NFS3_ACCESS_READ; - if (S_ISDIR(inode->i_mode)) { - if (mode & MAY_WRITE) - arg.access |= NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND | NFS3_ACCESS_DELETE; - if (mode & MAY_EXEC) - arg.access |= NFS3_ACCESS_LOOKUP; - } else { - if (mode & MAY_WRITE) - arg.access |= NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND; - if (mode & MAY_EXEC) - arg.access |= NFS3_ACCESS_EXECUTE; - } - res.fattr = nfs_alloc_fattr(); if (res.fattr == NULL) goto out; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f90090e..7fd6423 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3889,6 +3889,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry struct nfs4_accessargs args = { .fh = NFS_FH(inode), .bitmask = server->cache_consistency_bitmask, + .access = entry->mask, }; struct nfs4_accessres res = { .server = server, @@ -3899,26 +3900,8 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry .rpc_resp = &res, .rpc_cred = entry->cred, }; - int mode = entry->mask; int status = 0; - /* - * Determine which access bits we want to ask for... - */ - if (mode & MAY_READ) - args.access |= NFS4_ACCESS_READ; - if (S_ISDIR(inode->i_mode)) { - if (mode & MAY_WRITE) - args.access |= NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE; - if (mode & MAY_EXEC) - args.access |= NFS4_ACCESS_LOOKUP; - } else { - if (mode & MAY_WRITE) - args.access |= NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND; - if (mode & MAY_EXEC) - args.access |= NFS4_ACCESS_EXECUTE; - } - res.fattr = nfs_alloc_fattr(); if (res.fattr == NULL) return -ENOMEM; -- cgit v1.1 From b688741cb06695312f18b730653d6611e1bad28d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 25 Aug 2017 17:34:41 +1000 Subject: NFS: revalidate "." etc correctly on "open". For correct close-to-open semantics, NFS must validate the change attribute of a directory (or file) on open. Since commit ecf3d1f1aa74 ("vfs: kill FS_REVAL_DOT by adding a d_weak_revalidate dentry op"), open() of "." or a path ending ".." is not revalidated reliably (except when that direct is a mount point). Prior to that commit, "." was revalidated using nfs_lookup_revalidate() which checks the LOOKUP_OPEN flag and forces revalidation if the flag is set. Since that commit, nfs_weak_revalidate() is used for NFSv3 (which ignores the flags) and nothing is used for NFSv4. This is fixed by using nfs_lookup_verify_inode() in nfs_weak_revalidate(). This does the revalidation exactly when needed. Also, add a definition of .d_weak_revalidate for NFSv4. The incorrect behavior is easily demonstrated by running "echo *" in some non-mountpoint NFS directory while watching network traffic. Without this patch, "echo *" sometimes doesn't produce any traffic. With the patch it always does. Fixes: ecf3d1f1aa74 ("vfs: kill FS_REVAL_DOT by adding a d_weak_revalidate dentry op") cc: stable@vger.kernel.org (3.9+) Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index db482d4..c583093 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1241,8 +1241,7 @@ static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags) return 0; } - if (nfs_mapping_need_revalidate_inode(inode)) - error = __nfs_revalidate_inode(NFS_SERVER(inode), inode); + error = nfs_lookup_verify_inode(inode, flags); dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n", __func__, inode->i_ino, error ? "invalid" : "valid"); return !error; @@ -1393,6 +1392,7 @@ static int nfs4_lookup_revalidate(struct dentry *, unsigned int); const struct dentry_operations nfs4_dentry_operations = { .d_revalidate = nfs4_lookup_revalidate, + .d_weak_revalidate = nfs_weak_revalidate, .d_delete = nfs_dentry_delete, .d_iput = nfs_dentry_iput, .d_automount = nfs_d_automount, -- cgit v1.1 From 1fea73ac92f628538032ce746f7dd5d6f8dfcc32 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 25 Aug 2017 17:34:41 +1000 Subject: NFS: remove special-case revalidate in nfs_opendir() Commit f5a73672d181 ("NFS: allow close-to-open cache semantics to apply to root of NFS filesystem") added a call to __nfs_revalidate_inode() to nfs_opendir to as the lookup process wouldn't reliable do this. Subsequent commit a3fbbde70a0c ("VFS: we need to set LOOKUP_JUMPED on mountpoint crossing") make this unnecessary. So remove the unnecessary code. Signed-off-by: NeilBrown Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index c583093..15104f7 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -118,13 +118,6 @@ nfs_opendir(struct inode *inode, struct file *filp) goto out; } filp->private_data = ctx; - if (filp->f_path.dentry == filp->f_path.mnt->mnt_root) { - /* This is a mountpoint, so d_revalidate will never - * have been called, so we need to refresh the - * inode (for close-open consistency) ourselves. - */ - __nfs_revalidate_inode(NFS_SERVER(inode), inode); - } out: put_rpccred(cred); return res; -- cgit v1.1 From 3be0f80b5fe9c16eca2d538f799b94ca8aa59433 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 19 Oct 2017 15:46:45 -0400 Subject: NFSv4.1: Fix up replays of interrupted requests If the previous request on a slot was interrupted before it was processed by the server, then our slot sequence number may be out of whack, and so we try the next operation using the old sequence number. The problem with this, is that not all servers check to see that the client is replaying the same operations as previously when they decide to go to the replay cache, and so instead of the expected error of NFS4ERR_SEQ_FALSE_RETRY, we get a replay of the old reply, which could (if the operations match up) be mistaken by the client for a new reply. To fix this, we attempt to send a COMPOUND containing only the SEQUENCE op in order to resync our slot sequence number. Cc: Olga Kornievskaia [olga.kornievskaia@gmail.com: fix an Oops] Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4_fs.h | 2 +- fs/nfs/nfs4proc.c | 148 +++++++++++++++++++++++++++++++++++++----------------- 2 files changed, 103 insertions(+), 47 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index ac4f10b..b547d93 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -464,7 +464,7 @@ extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid); extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid); extern void nfs_release_seqid(struct nfs_seqid *seqid); extern void nfs_free_seqid(struct nfs_seqid *seqid); -extern int nfs4_setup_sequence(const struct nfs_client *client, +extern int nfs4_setup_sequence(struct nfs_client *client, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, struct rpc_task *task); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7fd6423..d0fd96b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -96,6 +96,10 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, struct nfs_open_context *ctx, struct nfs4_label *ilabel, struct nfs4_label *olabel); #ifdef CONFIG_NFS_V4_1 +static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, + struct rpc_cred *cred, + struct nfs4_slot *slot, + bool is_privileged); static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *, struct rpc_cred *); static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *, @@ -644,13 +648,14 @@ static int nfs40_sequence_done(struct rpc_task *task, #if defined(CONFIG_NFS_V4_1) -static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) +static void nfs41_release_slot(struct nfs4_slot *slot) { struct nfs4_session *session; struct nfs4_slot_table *tbl; - struct nfs4_slot *slot = res->sr_slot; bool send_new_highest_used_slotid = false; + if (!slot) + return; tbl = slot->table; session = tbl->session; @@ -676,13 +681,18 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) send_new_highest_used_slotid = false; out_unlock: spin_unlock(&tbl->slot_tbl_lock); - res->sr_slot = NULL; if (send_new_highest_used_slotid) nfs41_notify_server(session->clp); if (waitqueue_active(&tbl->slot_waitq)) wake_up_all(&tbl->slot_waitq); } +static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) +{ + nfs41_release_slot(res->sr_slot); + res->sr_slot = NULL; +} + static int nfs41_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res) { @@ -710,13 +720,6 @@ static int nfs41_sequence_process(struct rpc_task *task, /* Check the SEQUENCE operation status */ switch (res->sr_status) { case 0: - /* If previous op on slot was interrupted and we reused - * the seq# and got a reply from the cache, then retry - */ - if (task->tk_status == -EREMOTEIO && interrupted) { - ++slot->seq_nr; - goto retry_nowait; - } /* Update the slot's sequence and clientid lease timer */ slot->seq_done = 1; clp = session->clp; @@ -750,16 +753,16 @@ static int nfs41_sequence_process(struct rpc_task *task, * The slot id we used was probably retired. Try again * using a different slot id. */ + if (slot->seq_nr < slot->table->target_highest_slotid) + goto session_recover; goto retry_nowait; case -NFS4ERR_SEQ_MISORDERED: /* * Was the last operation on this sequence interrupted? * If so, retry after bumping the sequence number. */ - if (interrupted) { - ++slot->seq_nr; - goto retry_nowait; - } + if (interrupted) + goto retry_new_seq; /* * Could this slot have been previously retired? * If so, then the server may be expecting seq_nr = 1! @@ -768,10 +771,11 @@ static int nfs41_sequence_process(struct rpc_task *task, slot->seq_nr = 1; goto retry_nowait; } - break; + goto session_recover; case -NFS4ERR_SEQ_FALSE_RETRY: - ++slot->seq_nr; - goto retry_nowait; + if (interrupted) + goto retry_new_seq; + goto session_recover; default: /* Just update the slot sequence no. */ slot->seq_done = 1; @@ -781,6 +785,11 @@ out: dprintk("%s: Error %d free the slot \n", __func__, res->sr_status); out_noaction: return ret; +session_recover: + nfs4_schedule_session_recovery(session, res->sr_status); + goto retry_nowait; +retry_new_seq: + ++slot->seq_nr; retry_nowait: if (rpc_restart_call_prepare(task)) { nfs41_sequence_free_slot(res); @@ -857,6 +866,17 @@ static const struct rpc_call_ops nfs41_call_sync_ops = { .rpc_call_done = nfs41_call_sync_done, }; +static void +nfs4_sequence_process_interrupted(struct nfs_client *client, + struct nfs4_slot *slot, struct rpc_cred *cred) +{ + struct rpc_task *task; + + task = _nfs41_proc_sequence(client, cred, slot, true); + if (!IS_ERR(task)) + rpc_put_task_async(task); +} + #else /* !CONFIG_NFS_V4_1 */ static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res) @@ -877,9 +897,34 @@ int nfs4_sequence_done(struct rpc_task *task, } EXPORT_SYMBOL_GPL(nfs4_sequence_done); +static void +nfs4_sequence_process_interrupted(struct nfs_client *client, + struct nfs4_slot *slot, struct rpc_cred *cred) +{ + WARN_ON_ONCE(1); + slot->interrupted = 0; +} + #endif /* !CONFIG_NFS_V4_1 */ -int nfs4_setup_sequence(const struct nfs_client *client, +static +void nfs4_sequence_attach_slot(struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + struct nfs4_slot *slot) +{ + if (!slot) + return; + slot->privileged = args->sa_privileged ? 1 : 0; + args->sa_slot = slot; + + res->sr_slot = slot; + res->sr_timestamp = jiffies; + res->sr_status_flags = 0; + res->sr_status = 1; + +} + +int nfs4_setup_sequence(struct nfs_client *client, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, struct rpc_task *task) @@ -897,29 +942,28 @@ int nfs4_setup_sequence(const struct nfs_client *client, task->tk_timeout = 0; } - spin_lock(&tbl->slot_tbl_lock); - /* The state manager will wait until the slot table is empty */ - if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged) - goto out_sleep; + for (;;) { + spin_lock(&tbl->slot_tbl_lock); + /* The state manager will wait until the slot table is empty */ + if (nfs4_slot_tbl_draining(tbl) && !args->sa_privileged) + goto out_sleep; + + slot = nfs4_alloc_slot(tbl); + if (IS_ERR(slot)) { + /* Try again in 1/4 second */ + if (slot == ERR_PTR(-ENOMEM)) + task->tk_timeout = HZ >> 2; + goto out_sleep; + } + spin_unlock(&tbl->slot_tbl_lock); - slot = nfs4_alloc_slot(tbl); - if (IS_ERR(slot)) { - /* Try again in 1/4 second */ - if (slot == ERR_PTR(-ENOMEM)) - task->tk_timeout = HZ >> 2; - goto out_sleep; + if (likely(!slot->interrupted)) + break; + nfs4_sequence_process_interrupted(client, + slot, task->tk_msg.rpc_cred); } - spin_unlock(&tbl->slot_tbl_lock); - - slot->privileged = args->sa_privileged ? 1 : 0; - args->sa_slot = slot; - res->sr_slot = slot; - if (session) { - res->sr_timestamp = jiffies; - res->sr_status_flags = 0; - res->sr_status = 1; - } + nfs4_sequence_attach_slot(args, res, slot); trace_nfs4_setup_sequence(session, args); out_start: @@ -8118,6 +8162,7 @@ static const struct rpc_call_ops nfs41_sequence_ops = { static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred, + struct nfs4_slot *slot, bool is_privileged) { struct nfs4_sequence_data *calldata; @@ -8131,15 +8176,18 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, .callback_ops = &nfs41_sequence_ops, .flags = RPC_TASK_ASYNC | RPC_TASK_TIMEOUT, }; + struct rpc_task *ret; + ret = ERR_PTR(-EIO); if (!atomic_inc_not_zero(&clp->cl_count)) - return ERR_PTR(-EIO); + goto out_err; + + ret = ERR_PTR(-ENOMEM); calldata = kzalloc(sizeof(*calldata), GFP_NOFS); - if (calldata == NULL) { - nfs_put_client(clp); - return ERR_PTR(-ENOMEM); - } + if (calldata == NULL) + goto out_put_clp; nfs4_init_sequence(&calldata->args, &calldata->res, 0); + nfs4_sequence_attach_slot(&calldata->args, &calldata->res, slot); if (is_privileged) nfs4_set_sequence_privileged(&calldata->args); msg.rpc_argp = &calldata->args; @@ -8147,7 +8195,15 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, calldata->clp = clp; task_setup_data.callback_data = calldata; - return rpc_run_task(&task_setup_data); + ret = rpc_run_task(&task_setup_data); + if (IS_ERR(ret)) + goto out_err; + return ret; +out_put_clp: + nfs_put_client(clp); +out_err: + nfs41_release_slot(slot); + return ret; } static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags) @@ -8157,7 +8213,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0) return -EAGAIN; - task = _nfs41_proc_sequence(clp, cred, false); + task = _nfs41_proc_sequence(clp, cred, NULL, false); if (IS_ERR(task)) ret = PTR_ERR(task); else @@ -8171,7 +8227,7 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred) struct rpc_task *task; int ret; - task = _nfs41_proc_sequence(clp, cred, true); + task = _nfs41_proc_sequence(clp, cred, NULL, true); if (IS_ERR(task)) { ret = PTR_ERR(task); goto out; -- cgit v1.1 From a2a5dea7b6cb77365ed9c987f54d160668c8a95f Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 12:53:31 +0300 Subject: fs, nfs: convert nfs4_pnfs_ds.ds_count from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable nfs4_pnfs_ds.ds_count is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: Anna Schumaker --- fs/nfs/pnfs.h | 3 ++- fs/nfs/pnfs_nfs.c | 10 +++++----- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 87f144f..cefa7da 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -30,6 +30,7 @@ #ifndef FS_NFS_PNFS_H #define FS_NFS_PNFS_H +#include #include #include #include @@ -54,7 +55,7 @@ struct nfs4_pnfs_ds { char *ds_remotestr; /* comma sep list of addrs */ struct list_head ds_addrs; struct nfs_client *ds_clp; - atomic_t ds_count; + refcount_t ds_count; unsigned long ds_state; #define NFS4DS_CONNECTING 0 /* ds is establishing connection */ }; diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 60da59b..03aaa60 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -338,7 +338,7 @@ print_ds(struct nfs4_pnfs_ds *ds) " client %p\n" " cl_exchange_flags %x\n", ds->ds_remotestr, - atomic_read(&ds->ds_count), ds->ds_clp, + refcount_read(&ds->ds_count), ds->ds_clp, ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); } @@ -451,7 +451,7 @@ static void destroy_ds(struct nfs4_pnfs_ds *ds) void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds) { - if (atomic_dec_and_lock(&ds->ds_count, + if (refcount_dec_and_lock(&ds->ds_count, &nfs4_ds_cache_lock)) { list_del_init(&ds->ds_node); spin_unlock(&nfs4_ds_cache_lock); @@ -537,7 +537,7 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) INIT_LIST_HEAD(&ds->ds_addrs); list_splice_init(dsaddrs, &ds->ds_addrs); ds->ds_remotestr = remotestr; - atomic_set(&ds->ds_count, 1); + refcount_set(&ds->ds_count, 1); INIT_LIST_HEAD(&ds->ds_node); ds->ds_clp = NULL; list_add(&ds->ds_node, &nfs4_data_server_cache); @@ -546,10 +546,10 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) } else { kfree(remotestr); kfree(ds); - atomic_inc(&tmp_ds->ds_count); + refcount_inc(&tmp_ds->ds_count); dprintk("%s data server %s found, inc'ed ds_count to %d\n", __func__, tmp_ds->ds_remotestr, - atomic_read(&tmp_ds->ds_count)); + refcount_read(&tmp_ds->ds_count)); ds = tmp_ds; } spin_unlock(&nfs4_ds_cache_lock); -- cgit v1.1 From eba6dd691743a9d7a57735f36bf6946fc58878ec Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 12:53:32 +0300 Subject: fs, nfs: convert pnfs_layout_segment.pls_refcount from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Signed-off-by: Anna Schumaker --- fs/nfs/pnfs.c | 12 ++++++------ fs/nfs/pnfs.h | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 3bcd669..499bb71 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -450,7 +450,7 @@ pnfs_init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg, { INIT_LIST_HEAD(&lseg->pls_list); INIT_LIST_HEAD(&lseg->pls_lc_list); - atomic_set(&lseg->pls_refcount, 1); + refcount_set(&lseg->pls_refcount, 1); set_bit(NFS_LSEG_VALID, &lseg->pls_flags); lseg->pls_layout = lo; lseg->pls_range = *range; @@ -507,13 +507,13 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) return; dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, - atomic_read(&lseg->pls_refcount), + refcount_read(&lseg->pls_refcount), test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); lo = lseg->pls_layout; inode = lo->plh_inode; - if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { + if (refcount_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) { spin_unlock(&inode->i_lock); return; @@ -551,7 +551,7 @@ pnfs_lseg_range_contained(const struct pnfs_layout_range *l1, static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, struct list_head *tmp_list) { - if (!atomic_dec_and_test(&lseg->pls_refcount)) + if (!refcount_dec_and_test(&lseg->pls_refcount)) return false; pnfs_layout_remove_lseg(lseg->pls_layout, lseg); list_add(&lseg->pls_list, tmp_list); @@ -570,7 +570,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, * outstanding io is finished. */ dprintk("%s: lseg %p ref %d\n", __func__, lseg, - atomic_read(&lseg->pls_refcount)); + refcount_read(&lseg->pls_refcount)); if (pnfs_lseg_dec_and_remove_zero(lseg, tmp_list)) rv = 1; } @@ -1546,7 +1546,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, } dprintk("%s:Return lseg %p ref %d\n", - __func__, ret, ret ? atomic_read(&ret->pls_refcount) : 0); + __func__, ret, ret ? refcount_read(&ret->pls_refcount) : 0); return ret; } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index cefa7da..f0e98e1 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -64,7 +64,7 @@ struct pnfs_layout_segment { struct list_head pls_list; struct list_head pls_lc_list; struct pnfs_layout_range pls_range; - atomic_t pls_refcount; + refcount_t pls_refcount; u32 pls_seq; unsigned long pls_flags; struct pnfs_layout_hdr *pls_layout; @@ -394,7 +394,7 @@ static inline struct pnfs_layout_segment * pnfs_get_lseg(struct pnfs_layout_segment *lseg) { if (lseg) { - atomic_inc(&lseg->pls_refcount); + refcount_inc(&lseg->pls_refcount); smp_mb__after_atomic(); } return lseg; -- cgit v1.1 From 2b28a7bee4539d27f4ec3b0298f03bfd83d4f794 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 12:53:33 +0300 Subject: fs, nfs: convert pnfs_layout_hdr.plh_refcount from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable pnfs_layout_hdr.plh_refcount is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: Anna Schumaker --- fs/nfs/pnfs.c | 12 ++++++------ fs/nfs/pnfs.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 499bb71..4aab53b 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -251,7 +251,7 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo) { - atomic_inc(&lo->plh_refcount); + refcount_inc(&lo->plh_refcount); } static struct pnfs_layout_hdr * @@ -296,7 +296,7 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) pnfs_layoutreturn_before_put_layout_hdr(lo); - if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { + if (refcount_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { if (!list_empty(&lo->plh_segs)) WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n"); pnfs_detach_layout_hdr(lo); @@ -395,14 +395,14 @@ pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) { lo->plh_retry_timestamp = jiffies; if (!test_and_set_bit(fail_bit, &lo->plh_flags)) - atomic_inc(&lo->plh_refcount); + refcount_inc(&lo->plh_refcount); } static void pnfs_layout_clear_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit) { if (test_and_clear_bit(fail_bit, &lo->plh_flags)) - atomic_dec(&lo->plh_refcount); + refcount_dec(&lo->plh_refcount); } static void @@ -472,7 +472,7 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); list_del_init(&lseg->pls_list); /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ - atomic_dec(&lo->plh_refcount); + refcount_dec(&lo->plh_refcount); if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) return; if (list_empty(&lo->plh_segs) && @@ -1451,7 +1451,7 @@ alloc_init_layout_hdr(struct inode *ino, lo = pnfs_alloc_layout_hdr(ino, gfp_flags); if (!lo) return NULL; - atomic_set(&lo->plh_refcount, 1); + refcount_set(&lo->plh_refcount, 1); INIT_LIST_HEAD(&lo->plh_layouts); INIT_LIST_HEAD(&lo->plh_segs); INIT_LIST_HEAD(&lo->plh_return_segs); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index f0e98e1..78de7a2 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -180,7 +180,7 @@ struct pnfs_layoutdriver_type { }; struct pnfs_layout_hdr { - atomic_t plh_refcount; + refcount_t plh_refcount; atomic_t plh_outstanding; /* number of RPCs out */ struct list_head plh_layouts; /* other client layouts */ struct list_head plh_bulk_destroy; -- cgit v1.1 From 81a090b9975b89ca15e71d0bca945358f6fe7696 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 12:53:34 +0300 Subject: fs, nfs: convert nfs4_ff_layout_mirror.ref from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable nfs4_ff_layout_mirror.ref is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: Anna Schumaker --- fs/nfs/flexfilelayout/flexfilelayout.c | 8 ++++---- fs/nfs/flexfilelayout/flexfilelayout.h | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index b0fa83a..ff55a0a 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -187,7 +187,7 @@ ff_layout_add_mirror(struct pnfs_layout_hdr *lo, continue; if (!ff_mirror_match_fh(mirror, pos)) continue; - if (atomic_inc_not_zero(&pos->ref)) { + if (refcount_inc_not_zero(&pos->ref)) { spin_unlock(&inode->i_lock); return pos; } @@ -218,7 +218,7 @@ static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags) mirror = kzalloc(sizeof(*mirror), gfp_flags); if (mirror != NULL) { spin_lock_init(&mirror->lock); - atomic_set(&mirror->ref, 1); + refcount_set(&mirror->ref, 1); INIT_LIST_HEAD(&mirror->mirrors); } return mirror; @@ -242,7 +242,7 @@ static void ff_layout_free_mirror(struct nfs4_ff_layout_mirror *mirror) static void ff_layout_put_mirror(struct nfs4_ff_layout_mirror *mirror) { - if (mirror != NULL && atomic_dec_and_test(&mirror->ref)) + if (mirror != NULL && refcount_dec_and_test(&mirror->ref)) ff_layout_free_mirror(mirror); } @@ -2286,7 +2286,7 @@ ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo, if (!test_and_clear_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags)) continue; /* mirror refcount put in cleanup_layoutstats */ - if (!atomic_inc_not_zero(&mirror->ref)) + if (!refcount_inc_not_zero(&mirror->ref)) continue; dev = &mirror->mirror_ds->id_node; memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE); diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 98b34c9..ca35426 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -13,6 +13,7 @@ #define FF_FLAGS_NO_IO_THRU_MDS 2 #define FF_FLAGS_NO_READ_IO 4 +#include #include "../pnfs.h" /* XXX: Let's filter out insanely large mirror count for now to avoid oom @@ -81,7 +82,7 @@ struct nfs4_ff_layout_mirror { nfs4_stateid stateid; struct rpc_cred __rcu *ro_cred; struct rpc_cred __rcu *rw_cred; - atomic_t ref; + refcount_t ref; spinlock_t lock; unsigned long flags; struct nfs4_ff_layoutstat read_stat; -- cgit v1.1 From 0896cade1224f167296c786043e9bdc0ca63d392 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 12:53:35 +0300 Subject: fs, nfs: convert nfs_cache_defer_req.count from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable nfs_cache_defer_req.count is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: Anna Schumaker --- fs/nfs/cache_lib.c | 6 +++--- fs/nfs/cache_lib.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c index 2ae676f..6167f13 100644 --- a/fs/nfs/cache_lib.c +++ b/fs/nfs/cache_lib.c @@ -66,7 +66,7 @@ out: */ void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq) { - if (atomic_dec_and_test(&dreq->count)) + if (refcount_dec_and_test(&dreq->count)) kfree(dreq); } @@ -86,7 +86,7 @@ static struct cache_deferred_req *nfs_dns_cache_defer(struct cache_req *req) dreq = container_of(req, struct nfs_cache_defer_req, req); dreq->deferred_req.revisit = nfs_dns_cache_revisit; - atomic_inc(&dreq->count); + refcount_inc(&dreq->count); return &dreq->deferred_req; } @@ -98,7 +98,7 @@ struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void) dreq = kzalloc(sizeof(*dreq), GFP_KERNEL); if (dreq) { init_completion(&dreq->completion); - atomic_set(&dreq->count, 1); + refcount_set(&dreq->count, 1); dreq->req.defer = nfs_dns_cache_defer; } return dreq; diff --git a/fs/nfs/cache_lib.h b/fs/nfs/cache_lib.h index 4116d2c..02b378c 100644 --- a/fs/nfs/cache_lib.h +++ b/fs/nfs/cache_lib.h @@ -15,7 +15,7 @@ struct nfs_cache_defer_req { struct cache_req req; struct cache_deferred_req deferred_req; struct completion completion; - atomic_t count; + refcount_t count; }; extern int nfs_cache_upcall(struct cache_detail *cd, char *entry_name); -- cgit v1.1 From 194bc1f48100a3b6fbd50d7a2218b594f0c564b6 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 12:53:36 +0300 Subject: fs, nfs: convert nfs4_lock_state.ls_count from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable nfs4_lock_state.ls_count is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: Anna Schumaker --- fs/nfs/nfs4_fs.h | 2 +- fs/nfs/nfs4proc.c | 6 +++--- fs/nfs/nfs4state.c | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index b547d93..78946c6 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -144,7 +144,7 @@ struct nfs4_lock_state { unsigned long ls_flags; struct nfs_seqid_counter ls_seqid; nfs4_stateid ls_stateid; - atomic_t ls_count; + refcount_t ls_count; fl_owner_t ls_owner; }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d0fd96b..be8c75a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2562,7 +2562,7 @@ static int nfs41_check_expired_locks(struct nfs4_state *state) if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) { struct rpc_cred *cred = lsp->ls_state->owner->so_cred; - atomic_inc(&lsp->ls_count); + refcount_inc(&lsp->ls_count); spin_unlock(&state->state_lock); nfs4_put_lock_state(prev); @@ -5923,7 +5923,7 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, p->arg.seqid = seqid; p->res.seqid = seqid; p->lsp = lsp; - atomic_inc(&lsp->ls_count); + refcount_inc(&lsp->ls_count); /* Ensure we don't close file until we're done freeing locks! */ p->ctx = get_nfs_open_context(ctx); p->l_ctx = nfs_get_lock_context(ctx); @@ -6139,7 +6139,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, p->res.lock_seqid = p->arg.lock_seqid; p->lsp = lsp; p->server = server; - atomic_inc(&lsp->ls_count); + refcount_inc(&lsp->ls_count); p->ctx = get_nfs_open_context(ctx); memcpy(&p->fl, fl, sizeof(p->fl)); return p; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 0378e225..1887134 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -825,7 +825,7 @@ __nfs4_find_lock_state(struct nfs4_state *state, ret = pos; } if (ret) - atomic_inc(&ret->ls_count); + refcount_inc(&ret->ls_count); return ret; } @@ -843,7 +843,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f if (lsp == NULL) return NULL; nfs4_init_seqid_counter(&lsp->ls_seqid); - atomic_set(&lsp->ls_count, 1); + refcount_set(&lsp->ls_count, 1); lsp->ls_state = state; lsp->ls_owner = fl_owner; lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS); @@ -907,7 +907,7 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp) if (lsp == NULL) return; state = lsp->ls_state; - if (!atomic_dec_and_lock(&lsp->ls_count, &state->state_lock)) + if (!refcount_dec_and_lock(&lsp->ls_count, &state->state_lock)) return; list_del(&lsp->ls_locks); if (list_empty(&state->lock_states)) @@ -927,7 +927,7 @@ static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) struct nfs4_lock_state *lsp = src->fl_u.nfs4_fl.owner; dst->fl_u.nfs4_fl.owner = lsp; - atomic_inc(&lsp->ls_count); + refcount_inc(&lsp->ls_count); } static void nfs4_fl_release_lock(struct file_lock *fl) -- cgit v1.1 From 2f62b5aa4814be2c511553fd6afb4d35b6c2503b Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 12:53:37 +0300 Subject: fs, nfs: convert nfs_lock_context.count from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable nfs_lock_context.count is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: Anna Schumaker --- fs/nfs/inode.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 134d9f5..52a60e3 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -783,7 +783,7 @@ EXPORT_SYMBOL_GPL(nfs_getattr); static void nfs_init_lock_context(struct nfs_lock_context *l_ctx) { - atomic_set(&l_ctx->count, 1); + refcount_set(&l_ctx->count, 1); l_ctx->lockowner = current->files; INIT_LIST_HEAD(&l_ctx->list); atomic_set(&l_ctx->io_count, 0); @@ -797,7 +797,7 @@ static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context do { if (pos->lockowner != current->files) continue; - atomic_inc(&pos->count); + refcount_inc(&pos->count); return pos; } while ((pos = list_entry(pos->list.next, typeof(*pos), list)) != head); return NULL; @@ -836,7 +836,7 @@ void nfs_put_lock_context(struct nfs_lock_context *l_ctx) struct nfs_open_context *ctx = l_ctx->open_context; struct inode *inode = d_inode(ctx->dentry); - if (!atomic_dec_and_lock(&l_ctx->count, &inode->i_lock)) + if (!refcount_dec_and_lock(&l_ctx->count, &inode->i_lock)) return; list_del(&l_ctx->list); spin_unlock(&inode->i_lock); @@ -913,7 +913,7 @@ EXPORT_SYMBOL_GPL(alloc_nfs_open_context); struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) { if (ctx != NULL) - atomic_inc(&ctx->lock_context.count); + refcount_inc(&ctx->lock_context.count); return ctx; } EXPORT_SYMBOL_GPL(get_nfs_open_context); @@ -924,11 +924,11 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) struct super_block *sb = ctx->dentry->d_sb; if (!list_empty(&ctx->list)) { - if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock)) + if (!refcount_dec_and_lock(&ctx->lock_context.count, &inode->i_lock)) return; list_del(&ctx->list); spin_unlock(&inode->i_lock); - } else if (!atomic_dec_and_test(&ctx->lock_context.count)) + } else if (!refcount_dec_and_test(&ctx->lock_context.count)) return; if (inode != NULL) NFS_PROTO(inode)->close_context(ctx, is_sync); -- cgit v1.1 From 212bf41d88c06afc23e03f9b274eebf1e8dba197 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 12:53:38 +0300 Subject: fs, nfs: convert nfs_client.cl_count from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable nfs_client.cl_count is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: Anna Schumaker --- fs/nfs/client.c | 10 +++++----- fs/nfs/filelayout/filelayout.c | 12 ++++++------ fs/nfs/flexfilelayout/flexfilelayout.c | 12 ++++++------ fs/nfs/nfs4client.c | 10 +++++----- fs/nfs/nfs4proc.c | 12 ++++++------ fs/nfs/nfs4state.c | 6 +++--- 6 files changed, 31 insertions(+), 31 deletions(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 22880ef..0ac2fb1 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -163,7 +163,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) clp->rpc_ops = clp->cl_nfs_mod->rpc_ops; - atomic_set(&clp->cl_count, 1); + refcount_set(&clp->cl_count, 1); clp->cl_cons_state = NFS_CS_INITING; memcpy(&clp->cl_addr, cl_init->addr, cl_init->addrlen); @@ -269,7 +269,7 @@ void nfs_put_client(struct nfs_client *clp) nn = net_generic(clp->cl_net, nfs_net_id); - if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) { + if (refcount_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) { list_del(&clp->cl_share_link); nfs_cb_idr_remove_locked(clp); spin_unlock(&nn->nfs_client_lock); @@ -314,7 +314,7 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat sap)) continue; - atomic_inc(&clp->cl_count); + refcount_inc(&clp->cl_count); return clp; } return NULL; @@ -1006,7 +1006,7 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, /* Copy data from the source */ server->nfs_client = source->nfs_client; server->destroy = source->destroy; - atomic_inc(&server->nfs_client->cl_count); + refcount_inc(&server->nfs_client->cl_count); nfs_server_copy_userdata(server, source); server->fsid = fattr->fsid; @@ -1166,7 +1166,7 @@ static int nfs_server_list_show(struct seq_file *m, void *v) clp->rpc_ops->version, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR), rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT), - atomic_read(&clp->cl_count), + refcount_read(&clp->cl_count), clp->cl_hostname); rcu_read_unlock(); diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 508126e..4e54d8b 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -471,10 +471,10 @@ filelayout_read_pagelist(struct nfs_pgio_header *hdr) return PNFS_NOT_ATTEMPTED; dprintk("%s USE DS: %s cl_count %d\n", __func__, - ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); + ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count)); /* No multipath support. Use first DS */ - atomic_inc(&ds->ds_clp->cl_count); + refcount_inc(&ds->ds_clp->cl_count); hdr->ds_clp = ds->ds_clp; hdr->ds_commit_idx = idx; fh = nfs4_fl_select_ds_fh(lseg, j); @@ -515,10 +515,10 @@ filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync) dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d\n", __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count, - offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); + offset, ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count)); hdr->pgio_done_cb = filelayout_write_done_cb; - atomic_inc(&ds->ds_clp->cl_count); + refcount_inc(&ds->ds_clp->cl_count); hdr->ds_clp = ds->ds_clp; hdr->ds_commit_idx = idx; fh = nfs4_fl_select_ds_fh(lseg, j); @@ -1064,9 +1064,9 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how) goto out_err; dprintk("%s ino %lu, how %d cl_count %d\n", __func__, - data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count)); + data->inode->i_ino, how, refcount_read(&ds->ds_clp->cl_count)); data->commit_done_cb = filelayout_commit_done_cb; - atomic_inc(&ds->ds_clp->cl_count); + refcount_inc(&ds->ds_clp->cl_count); data->ds_clp = ds->ds_clp; fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); if (fh) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index ff55a0a..c75ad98 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1726,10 +1726,10 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr) vers = nfs4_ff_layout_ds_version(lseg, idx); dprintk("%s USE DS: %s cl_count %d vers %d\n", __func__, - ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count), vers); + ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count), vers); hdr->pgio_done_cb = ff_layout_read_done_cb; - atomic_inc(&ds->ds_clp->cl_count); + refcount_inc(&ds->ds_clp->cl_count); hdr->ds_clp = ds->ds_clp; fh = nfs4_ff_layout_select_ds_fh(lseg, idx); if (fh) @@ -1785,11 +1785,11 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) dprintk("%s ino %lu sync %d req %zu@%llu DS: %s cl_count %d vers %d\n", __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count, - offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count), + offset, ds->ds_remotestr, refcount_read(&ds->ds_clp->cl_count), vers); hdr->pgio_done_cb = ff_layout_write_done_cb; - atomic_inc(&ds->ds_clp->cl_count); + refcount_inc(&ds->ds_clp->cl_count); hdr->ds_clp = ds->ds_clp; hdr->ds_commit_idx = idx; fh = nfs4_ff_layout_select_ds_fh(lseg, idx); @@ -1863,11 +1863,11 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) vers = nfs4_ff_layout_ds_version(lseg, idx); dprintk("%s ino %lu, how %d cl_count %d vers %d\n", __func__, - data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count), + data->inode->i_ino, how, refcount_read(&ds->ds_clp->cl_count), vers); data->commit_done_cb = ff_layout_commit_done_cb; data->cred = ds_cred; - atomic_inc(&ds->ds_clp->cl_count); + refcount_inc(&ds->ds_clp->cl_count); data->ds_clp = ds->ds_clp; fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); if (fh) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index e9bea90..31b5bc0 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -483,7 +483,7 @@ static int nfs4_match_client(struct nfs_client *pos, struct nfs_client *new, * ID and serverowner fields. Wait for CREATE_SESSION * to finish. */ if (pos->cl_cons_state > NFS_CS_READY) { - atomic_inc(&pos->cl_count); + refcount_inc(&pos->cl_count); spin_unlock(&nn->nfs_client_lock); nfs_put_client(*prev); @@ -559,7 +559,7 @@ int nfs40_walk_client_list(struct nfs_client *new, * way that a SETCLIENTID_CONFIRM to pos can succeed is * if new and pos point to the same server: */ - atomic_inc(&pos->cl_count); + refcount_inc(&pos->cl_count); spin_unlock(&nn->nfs_client_lock); nfs_put_client(prev); @@ -715,7 +715,7 @@ int nfs41_walk_client_list(struct nfs_client *new, continue; found: - atomic_inc(&pos->cl_count); + refcount_inc(&pos->cl_count); *result = pos; status = 0; break; @@ -749,7 +749,7 @@ nfs4_find_client_ident(struct net *net, int cb_ident) spin_lock(&nn->nfs_client_lock); clp = idr_find(&nn->cb_ident_idr, cb_ident); if (clp) - atomic_inc(&clp->cl_count); + refcount_inc(&clp->cl_count); spin_unlock(&nn->nfs_client_lock); return clp; } @@ -804,7 +804,7 @@ nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, sid->data, NFS4_MAX_SESSIONID_LEN) != 0) continue; - atomic_inc(&clp->cl_count); + refcount_inc(&clp->cl_count); spin_unlock(&nn->nfs_client_lock); return clp; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index be8c75a..82e5ed2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4870,7 +4870,7 @@ static void nfs4_renew_release(void *calldata) struct nfs4_renewdata *data = calldata; struct nfs_client *clp = data->client; - if (atomic_read(&clp->cl_count) > 1) + if (refcount_read(&clp->cl_count) > 1) nfs4_schedule_state_renewal(clp); nfs_put_client(clp); kfree(data); @@ -4918,7 +4918,7 @@ static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred, if (renew_flags == 0) return 0; - if (!atomic_inc_not_zero(&clp->cl_count)) + if (!refcount_inc_not_zero(&clp->cl_count)) return -EIO; data = kmalloc(sizeof(*data), GFP_NOFS); if (data == NULL) { @@ -7499,7 +7499,7 @@ nfs4_run_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, struct nfs41_exchange_id_data *calldata; int status; - if (!atomic_inc_not_zero(&clp->cl_count)) + if (!refcount_inc_not_zero(&clp->cl_count)) return ERR_PTR(-EIO); status = -ENOMEM; @@ -8099,7 +8099,7 @@ static void nfs41_sequence_release(void *data) struct nfs4_sequence_data *calldata = data; struct nfs_client *clp = calldata->clp; - if (atomic_read(&clp->cl_count) > 1) + if (refcount_read(&clp->cl_count) > 1) nfs4_schedule_state_renewal(clp); nfs_put_client(clp); kfree(calldata); @@ -8128,7 +8128,7 @@ static void nfs41_sequence_call_done(struct rpc_task *task, void *data) trace_nfs4_sequence(clp, task->tk_status); if (task->tk_status < 0) { dprintk("%s ERROR %d\n", __func__, task->tk_status); - if (atomic_read(&clp->cl_count) == 1) + if (refcount_read(&clp->cl_count) == 1) goto out; if (nfs41_sequence_handle_errors(task, clp) == -EAGAIN) { @@ -8179,7 +8179,7 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_task *ret; ret = ERR_PTR(-EIO); - if (!atomic_inc_not_zero(&clp->cl_count)) + if (!refcount_inc_not_zero(&clp->cl_count)) goto out_err; ret = ERR_PTR(-ENOMEM); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 1887134..3bd79b8 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1177,7 +1177,7 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) return; __module_get(THIS_MODULE); - atomic_inc(&clp->cl_count); + refcount_inc(&clp->cl_count); /* The rcu_read_lock() is not strictly necessary, as the state * manager is the only thread that ever changes the rpc_xprt @@ -1269,7 +1269,7 @@ int nfs4_wait_clnt_recover(struct nfs_client *clp) might_sleep(); - atomic_inc(&clp->cl_count); + refcount_inc(&clp->cl_count); res = wait_on_bit_action(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING, nfs_wait_bit_killable, TASK_KILLABLE); if (res) @@ -2510,7 +2510,7 @@ static void nfs4_state_manager(struct nfs_client *clp) break; if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) break; - } while (atomic_read(&clp->cl_count) > 1); + } while (refcount_read(&clp->cl_count) > 1); return; out_error: if (strlen(section)) -- cgit v1.1 From 3944369db701f075092357b511fd9f5755771585 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 1 Nov 2017 15:48:43 -0400 Subject: NFS: Avoid RCU usage in tracepoints There isn't an obvious way to acquire and release the RCU lock during a tracepoint, so we can't use the rpc_peeraddr2str() function here. Instead, rely on the client's cl_hostname, which should have similar enough information without needing an rcu_dereference(). Reported-by: Dave Jones Cc: stable@vger.kernel.org # v3.12 Signed-off-by: Anna Schumaker --- fs/nfs/nfs4trace.h | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index be1da19..7a5588c 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -201,17 +201,13 @@ DECLARE_EVENT_CLASS(nfs4_clientid_event, TP_ARGS(clp, error), TP_STRUCT__entry( - __string(dstaddr, - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_ADDR)) + __string(dstaddr, clp->cl_hostname) __field(int, error) ), TP_fast_assign( __entry->error = error; - __assign_str(dstaddr, - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_ADDR)); + __assign_str(dstaddr, clp->cl_hostname); ), TP_printk( @@ -1132,9 +1128,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event, __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) - __string(dstaddr, clp ? - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_ADDR) : "unknown") + __string(dstaddr, clp ? clp->cl_hostname : "unknown") ), TP_fast_assign( @@ -1147,9 +1141,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event, __entry->fileid = 0; __entry->dev = 0; } - __assign_str(dstaddr, clp ? - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_ADDR) : "unknown") + __assign_str(dstaddr, clp ? clp->cl_hostname : "unknown") ), TP_printk( @@ -1191,9 +1183,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event, __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) - __string(dstaddr, clp ? - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_ADDR) : "unknown") + __string(dstaddr, clp ? clp->cl_hostname : "unknown") __field(int, stateid_seq) __field(u32, stateid_hash) ), @@ -1208,9 +1198,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event, __entry->fileid = 0; __entry->dev = 0; } - __assign_str(dstaddr, clp ? - rpc_peeraddr2str(clp->cl_rpcclient, - RPC_DISPLAY_ADDR) : "unknown") + __assign_str(dstaddr, clp ? clp->cl_hostname : "unknown") __entry->stateid_seq = be32_to_cpu(stateid->seqid); __entry->stateid_hash = -- cgit v1.1 From 6089dd0d731028531fb1148be9fd33274ff90da4 Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Sat, 7 Oct 2017 16:02:21 +0200 Subject: NFS: Fix bool initialization/comparison Bool initializations should use true and false. Bool tests don't need comparisons. Signed-off-by: Thomas Meyer Signed-off-by: Anna Schumaker --- fs/nfs/callback_proc.c | 2 +- fs/nfs/dir.c | 10 +++++----- fs/nfs/nfs4client.c | 2 +- fs/nfs/pnfs.c | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 14358de..a82abff 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -439,7 +439,7 @@ static bool referring_call_exists(struct nfs_client *clp, uint32_t nrclists, struct referring_call_list *rclists) { - bool status = 0; + bool status = false; int i, j; struct nfs4_session *session; struct nfs4_slot_table *tbl; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 15104f7..a642ed3 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -246,7 +246,7 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri desc->cache_entry_index = index; return 0; out_eof: - desc->eof = 1; + desc->eof = true; return -EBADCOOKIE; } @@ -300,7 +300,7 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des if (array->eof_index >= 0) { status = -EBADCOOKIE; if (*desc->dir_cookie == array->last_cookie) - desc->eof = 1; + desc->eof = true; } out: return status; @@ -754,7 +754,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc) ent = &array->array[i]; if (!dir_emit(desc->ctx, ent->string.name, ent->string.len, nfs_compat_user_ino64(ent->ino), ent->d_type)) { - desc->eof = 1; + desc->eof = true; break; } desc->ctx->pos++; @@ -766,7 +766,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc) ctx->duped = 1; } if (array->eof_index >= 0) - desc->eof = 1; + desc->eof = true; kunmap(desc->page); cache_page_release(desc); @@ -866,7 +866,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) if (res == -EBADCOOKIE) { res = 0; /* This means either end of directory */ - if (*desc->dir_cookie && desc->eof == 0) { + if (*desc->dir_cookie && !desc->eof) { /* Or that the server has 'lost' a cookie */ res = uncached_readdir(desc); if (res == 0) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 31b5bc0..12bbab0 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -793,7 +793,7 @@ nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr, spin_lock(&nn->nfs_client_lock); list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { - if (nfs4_cb_match_client(addr, clp, minorversion) == false) + if (!nfs4_cb_match_client(addr, clp, minorversion)) continue; if (!nfs4_has_session(clp)) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 4aab53b..ec30dac 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1513,7 +1513,7 @@ pnfs_lseg_range_match(const struct pnfs_layout_range *ls_range, if ((range->iomode == IOMODE_RW && ls_range->iomode != IOMODE_RW) || (range->iomode != ls_range->iomode && - strict_iomode == true) || + strict_iomode) || !pnfs_lseg_range_intersecting(ls_range, range)) return 0; -- cgit v1.1 From c9399f21c215453b414702758b8c4b7d66605eac Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Nov 2017 15:28:01 -0500 Subject: NFSv4: Fix OPEN / CLOSE race Ben Coddington has noted the following race between OPEN and CLOSE on a single client. Process 1 Process 2 Server ========= ========= ====== 1) OPEN file 2) OPEN file 3) Process OPEN (1) seqid=1 4) Process OPEN (2) seqid=2 5) Reply OPEN (2) 6) Receive reply (2) 7) new stateid, seqid=2 8) CLOSE file, using stateid w/ seqid=2 9) Reply OPEN (1) 10( Process CLOSE (8) 11) Reply CLOSE (8) 12) Forget stateid file closed 13) Receive reply (7) 14) Forget stateid file closed. 15) Receive reply (1). 16) New stateid seqid=1 is really the same stateid that was closed. IOW: the reply to the first OPEN is delayed. Since "Process 2" does not wait before closing the file, and it does not cache the closed stateid, then when the delayed reply is finally received, it is treated as setting up a new stateid by the client. The fix is to ensure that the client processes the OPEN and CLOSE calls in the same order in which the server processed them. This commit ensures that we examine the seqid of the stateid returned by OPEN. If it is a new stateid, we assume the seqid must be equal to the value 1, and that each state transition increments the seqid value by 1 (See RFC7530, Section 9.1.4.2, and RFC5661, Section 8.2.2). If the tracker sees that an OPEN returns with a seqid that is greater than the cached seqid + 1, then it bumps a flag to ensure that the caller waits for the RPCs carrying the missing seqids to complete. Note that there can still be pathologies where the server crashes before it can even send us the missing seqids. Since the OPEN call is still holding a slot when it waits here, that could cause the recovery to stall forever. To avoid that, we time out after a 5 second wait. Reported-by: Benjamin Coddington Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4_fs.h | 3 ++ fs/nfs/nfs4proc.c | 154 +++++++++++++++++++++++++++++++++++++++++------------ fs/nfs/nfs4state.c | 1 + 3 files changed, 123 insertions(+), 35 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 78946c6..fdcfbab 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -161,6 +161,7 @@ enum { NFS_STATE_POSIX_LOCKS, /* Posix locks are supported */ NFS_STATE_RECOVERY_FAILED, /* OPEN stateid state recovery failed */ NFS_STATE_MAY_NOTIFY_LOCK, /* server may CB_NOTIFY_LOCK */ + NFS_STATE_CHANGE_WAIT, /* A state changing operation is outstanding */ }; struct nfs4_state { @@ -184,6 +185,8 @@ struct nfs4_state { unsigned int n_rdwr; /* Number of read/write references */ fmode_t state; /* State on the server (R,W, or RW) */ atomic_t count; + + wait_queue_head_t waitq; }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 82e5ed2..cec4bcb 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1378,6 +1378,25 @@ static bool nfs_open_stateid_recover_openmode(struct nfs4_state *state) } #endif /* CONFIG_NFS_V4_1 */ +static void nfs_state_log_update_open_stateid(struct nfs4_state *state) +{ + if (test_and_clear_bit(NFS_STATE_CHANGE_WAIT, &state->flags)) + wake_up_all(&state->waitq); +} + +static void nfs_state_log_out_of_order_open_stateid(struct nfs4_state *state, + const nfs4_stateid *stateid) +{ + u32 state_seqid = be32_to_cpu(state->open_stateid.seqid); + u32 stateid_seqid = be32_to_cpu(stateid->seqid); + + if (stateid_seqid == state_seqid + 1U || + (stateid_seqid == 1U && state_seqid == 0xffffffffU)) + nfs_state_log_update_open_stateid(state); + else + set_bit(NFS_STATE_CHANGE_WAIT, &state->flags); +} + static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state) { struct nfs_client *clp = state->owner->so_server->nfs_client; @@ -1393,18 +1412,32 @@ static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state) nfs4_state_mark_reclaim_nograce(clp, state); } +/* + * Check for whether or not the caller may update the open stateid + * to the value passed in by stateid. + * + * Note: This function relies heavily on the server implementing + * RFC7530 Section 9.1.4.2, and RFC5661 Section 8.2.2 + * correctly. + * i.e. The stateid seqids have to be initialised to 1, and + * are then incremented on every state transition. + */ static bool nfs_need_update_open_stateid(struct nfs4_state *state, - const nfs4_stateid *stateid, nfs4_stateid *freeme) + const nfs4_stateid *stateid) { - if (test_and_set_bit(NFS_OPEN_STATE, &state->flags) == 0) - return true; - if (!nfs4_stateid_match_other(stateid, &state->open_stateid)) { - nfs4_stateid_copy(freeme, &state->open_stateid); - nfs_test_and_clear_all_open_stateid(state); + if (test_bit(NFS_OPEN_STATE, &state->flags) == 0 || + !nfs4_stateid_match_other(stateid, &state->open_stateid)) { + if (stateid->seqid == cpu_to_be32(1)) + nfs_state_log_update_open_stateid(state); + else + set_bit(NFS_STATE_CHANGE_WAIT, &state->flags); return true; } - if (nfs4_stateid_is_newer(stateid, &state->open_stateid)) + + if (nfs4_stateid_is_newer(stateid, &state->open_stateid)) { + nfs_state_log_out_of_order_open_stateid(state, stateid); return true; + } return false; } @@ -1443,11 +1476,13 @@ static void nfs_clear_open_stateid_locked(struct nfs4_state *state, if (nfs4_stateid_match_other(stateid, &state->open_stateid) && !nfs4_stateid_is_newer(stateid, &state->open_stateid)) { nfs_resync_open_stateid_locked(state); - return; + goto out; } if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) nfs4_stateid_copy(&state->stateid, stateid); nfs4_stateid_copy(&state->open_stateid, stateid); +out: + nfs_state_log_update_open_stateid(state); } static void nfs_clear_open_stateid(struct nfs4_state *state, @@ -1464,29 +1499,57 @@ static void nfs_clear_open_stateid(struct nfs4_state *state, } static void nfs_set_open_stateid_locked(struct nfs4_state *state, - const nfs4_stateid *stateid, fmode_t fmode, - nfs4_stateid *freeme) + const nfs4_stateid *stateid, nfs4_stateid *freeme) { - switch (fmode) { - case FMODE_READ: - set_bit(NFS_O_RDONLY_STATE, &state->flags); + DEFINE_WAIT(wait); + int status = 0; + for (;;) { + + if (!nfs_need_update_open_stateid(state, stateid)) + return; + if (!test_bit(NFS_STATE_CHANGE_WAIT, &state->flags)) break; - case FMODE_WRITE: - set_bit(NFS_O_WRONLY_STATE, &state->flags); + if (status) break; - case FMODE_READ|FMODE_WRITE: - set_bit(NFS_O_RDWR_STATE, &state->flags); + /* Rely on seqids for serialisation with NFSv4.0 */ + if (!nfs4_has_session(NFS_SERVER(state->inode)->nfs_client)) + break; + + prepare_to_wait(&state->waitq, &wait, TASK_KILLABLE); + /* + * Ensure we process the state changes in the same order + * in which the server processed them by delaying the + * update of the stateid until we are in sequence. + */ + write_sequnlock(&state->seqlock); + spin_unlock(&state->owner->so_lock); + rcu_read_unlock(); + if (!signal_pending(current)) { + if (schedule_timeout(5*HZ) == 0) + status = -EAGAIN; + else + status = 0; + } else + status = -EINTR; + finish_wait(&state->waitq, &wait); + rcu_read_lock(); + spin_lock(&state->owner->so_lock); + write_seqlock(&state->seqlock); } - if (!nfs_need_update_open_stateid(state, stateid, freeme)) - return; + + if (!nfs4_stateid_match_other(stateid, &state->open_stateid)) { + nfs4_stateid_copy(freeme, &state->open_stateid); + nfs_test_and_clear_all_open_stateid(state); + } + if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) nfs4_stateid_copy(&state->stateid, stateid); nfs4_stateid_copy(&state->open_stateid, stateid); + nfs_state_log_update_open_stateid(state); } -static void __update_open_stateid(struct nfs4_state *state, +static void nfs_state_set_open_stateid(struct nfs4_state *state, const nfs4_stateid *open_stateid, - const nfs4_stateid *deleg_stateid, fmode_t fmode, nfs4_stateid *freeme) { @@ -1494,17 +1557,34 @@ static void __update_open_stateid(struct nfs4_state *state, * Protect the call to nfs4_state_set_mode_locked and * serialise the stateid update */ - spin_lock(&state->owner->so_lock); write_seqlock(&state->seqlock); - if (deleg_stateid != NULL) { - nfs4_stateid_copy(&state->stateid, deleg_stateid); - set_bit(NFS_DELEGATED_STATE, &state->flags); + nfs_set_open_stateid_locked(state, open_stateid, freeme); + switch (fmode) { + case FMODE_READ: + set_bit(NFS_O_RDONLY_STATE, &state->flags); + break; + case FMODE_WRITE: + set_bit(NFS_O_WRONLY_STATE, &state->flags); + break; + case FMODE_READ|FMODE_WRITE: + set_bit(NFS_O_RDWR_STATE, &state->flags); } - if (open_stateid != NULL) - nfs_set_open_stateid_locked(state, open_stateid, fmode, freeme); + set_bit(NFS_OPEN_STATE, &state->flags); + write_sequnlock(&state->seqlock); +} + +static void nfs_state_set_delegation(struct nfs4_state *state, + const nfs4_stateid *deleg_stateid, + fmode_t fmode) +{ + /* + * Protect the call to nfs4_state_set_mode_locked and + * serialise the stateid update + */ + write_seqlock(&state->seqlock); + nfs4_stateid_copy(&state->stateid, deleg_stateid); + set_bit(NFS_DELEGATED_STATE, &state->flags); write_sequnlock(&state->seqlock); - update_open_stateflags(state, fmode); - spin_unlock(&state->owner->so_lock); } static int update_open_stateid(struct nfs4_state *state, @@ -1522,6 +1602,12 @@ static int update_open_stateid(struct nfs4_state *state, fmode &= (FMODE_READ|FMODE_WRITE); rcu_read_lock(); + spin_lock(&state->owner->so_lock); + if (open_stateid != NULL) { + nfs_state_set_open_stateid(state, open_stateid, fmode, &freeme); + ret = 1; + } + deleg_cur = rcu_dereference(nfsi->delegation); if (deleg_cur == NULL) goto no_delegation; @@ -1538,18 +1624,16 @@ static int update_open_stateid(struct nfs4_state *state, goto no_delegation_unlock; nfs_mark_delegation_referenced(deleg_cur); - __update_open_stateid(state, open_stateid, &deleg_cur->stateid, - fmode, &freeme); + nfs_state_set_delegation(state, &deleg_cur->stateid, fmode); ret = 1; no_delegation_unlock: spin_unlock(&deleg_cur->lock); no_delegation: + if (ret) + update_open_stateflags(state, fmode); + spin_unlock(&state->owner->so_lock); rcu_read_unlock(); - if (!ret && open_stateid != NULL) { - __update_open_stateid(state, open_stateid, NULL, fmode, &freeme); - ret = 1; - } if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags)) nfs4_schedule_state_manager(clp); if (freeme.type != 0) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 3bd79b8..6e3f372 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -645,6 +645,7 @@ nfs4_alloc_open_state(void) INIT_LIST_HEAD(&state->lock_states); spin_lock_init(&state->state_lock); seqlock_init(&state->seqlock); + init_waitqueue_head(&state->waitq); return state; } -- cgit v1.1 From ad9e02dc026b75069f6a336e0daf2d54925967b6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Nov 2017 15:28:02 -0500 Subject: NFSv4: Add a tracepoint to document open stateid updates Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 3 +++ fs/nfs/nfs4trace.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index cec4bcb..aa8ea51 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1481,6 +1481,7 @@ static void nfs_clear_open_stateid_locked(struct nfs4_state *state, if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) nfs4_stateid_copy(&state->stateid, stateid); nfs4_stateid_copy(&state->open_stateid, stateid); + trace_nfs4_open_stateid_update(state->inode, stateid, 0); out: nfs_state_log_update_open_stateid(state); } @@ -1524,6 +1525,7 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, write_sequnlock(&state->seqlock); spin_unlock(&state->owner->so_lock); rcu_read_unlock(); + trace_nfs4_open_stateid_update_wait(state->inode, stateid, 0); if (!signal_pending(current)) { if (schedule_timeout(5*HZ) == 0) status = -EAGAIN; @@ -1545,6 +1547,7 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) nfs4_stateid_copy(&state->stateid, stateid); nfs4_stateid_copy(&state->open_stateid, stateid); + trace_nfs4_open_stateid_update(state->inode, stateid, status); nfs_state_log_update_open_stateid(state); } diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 7a5588c..06ac715 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -1061,6 +1061,8 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_event, DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_setattr); DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_delegreturn); +DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_open_stateid_update); +DEFINE_NFS4_INODE_STATEID_EVENT(nfs4_open_stateid_update_wait); DECLARE_EVENT_CLASS(nfs4_getattr_event, TP_PROTO( -- cgit v1.1 From 8fd1ab747d2b1ec7ec663ad0b41a32eaa35117a8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Nov 2017 15:28:03 -0500 Subject: NFSv4: Fix open create exclusive when the server reboots If the server that does not implement NFSv4.1 persistent session semantics reboots while we are performing an exclusive create, then the return value of NFS4ERR_DELAY when we replay the open during the grace period causes us to lose the verifier. When the grace period expires, and we present a new verifier, the server will then correctly reply NFS4ERR_EXIST. This commit ensures that we always present the same verifier when replaying the OPEN. Reported-by: Tigran Mkrtchyan Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index aa8ea51..b2d628c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1088,6 +1088,12 @@ struct nfs4_opendata { int rpc_status; }; +struct nfs4_open_createattrs { + struct nfs4_label *label; + struct iattr *sattr; + const __u32 verf[2]; +}; + static bool nfs4_clear_cap_atomic_open_v1(struct nfs_server *server, int err, struct nfs4_exception *exception) { @@ -1157,8 +1163,7 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p) static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, struct nfs4_state_owner *sp, fmode_t fmode, int flags, - const struct iattr *attrs, - struct nfs4_label *label, + const struct nfs4_open_createattrs *c, enum open_claim_type4 claim, gfp_t gfp_mask) { @@ -1166,6 +1171,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, struct inode *dir = d_inode(parent); struct nfs_server *server = NFS_SERVER(dir); struct nfs_seqid *(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t); + struct nfs4_label *label = (c != NULL) ? c->label : NULL; struct nfs4_opendata *p; p = kzalloc(sizeof(*p), gfp_mask); @@ -1231,15 +1237,11 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, case NFS4_OPEN_CLAIM_DELEG_PREV_FH: p->o_arg.fh = NFS_FH(d_inode(dentry)); } - if (attrs != NULL && attrs->ia_valid != 0) { - __u32 verf[2]; - + if (c != NULL && c->sattr != NULL && c->sattr->ia_valid != 0) { p->o_arg.u.attrs = &p->attrs; - memcpy(&p->attrs, attrs, sizeof(p->attrs)); + memcpy(&p->attrs, c->sattr, sizeof(p->attrs)); - verf[0] = jiffies; - verf[1] = current->pid; - memcpy(p->o_arg.u.verifier.data, verf, + memcpy(p->o_arg.u.verifier.data, c->verf, sizeof(p->o_arg.u.verifier.data)); } p->c_arg.fh = &p->o_res.fh; @@ -1892,7 +1894,7 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context struct nfs4_opendata *opendata; opendata = nfs4_opendata_alloc(ctx->dentry, state->owner, 0, 0, - NULL, NULL, claim, GFP_NOFS); + NULL, claim, GFP_NOFS); if (opendata == NULL) return ERR_PTR(-ENOMEM); opendata->state = state; @@ -2823,8 +2825,7 @@ out: static int _nfs4_do_open(struct inode *dir, struct nfs_open_context *ctx, int flags, - struct iattr *sattr, - struct nfs4_label *label, + const struct nfs4_open_createattrs *c, int *opened) { struct nfs4_state_owner *sp; @@ -2836,6 +2837,8 @@ static int _nfs4_do_open(struct inode *dir, struct nfs4_threshold **ctx_th = &ctx->mdsthreshold; fmode_t fmode = ctx->mode & (FMODE_READ|FMODE_WRITE|FMODE_EXEC); enum open_claim_type4 claim = NFS4_OPEN_CLAIM_NULL; + struct iattr *sattr = c->sattr; + struct nfs4_label *label = c->label; struct nfs4_label *olabel = NULL; int status; @@ -2854,8 +2857,8 @@ static int _nfs4_do_open(struct inode *dir, status = -ENOMEM; if (d_really_is_positive(dentry)) claim = NFS4_OPEN_CLAIM_FH; - opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, sattr, - label, claim, GFP_KERNEL); + opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, + c, claim, GFP_KERNEL); if (opendata == NULL) goto err_put_state_owner; @@ -2936,10 +2939,18 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct nfs_server *server = NFS_SERVER(dir); struct nfs4_exception exception = { }; struct nfs4_state *res; + struct nfs4_open_createattrs c = { + .label = label, + .sattr = sattr, + .verf = { + [0] = (__u32)jiffies, + [1] = (__u32)current->pid, + }, + }; int status; do { - status = _nfs4_do_open(dir, ctx, flags, sattr, label, opened); + status = _nfs4_do_open(dir, ctx, flags, &c, opened); res = ctx->state; trace_nfs4_open_file(ctx, flags, status); if (status == 0) -- cgit v1.1 From d803224c84be067754db7fa58a93f36f61566493 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Nov 2017 15:28:04 -0500 Subject: NFS: Fix a typo in nfs_rename() On successful rename, the "old_dentry" is retained and is attached to the "new_dir", so we need to call nfs_set_verifier() accordingly. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index a642ed3..927fd27 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2057,7 +2057,7 @@ out: * should mark the directories for revalidation. */ d_move(old_dentry, new_dentry); - nfs_set_verifier(new_dentry, + nfs_set_verifier(old_dentry, nfs_save_change_attribute(new_dir)); } else if (error == -ENOENT) nfs_dentry_handle_enoent(old_dentry); -- cgit v1.1 From 12f275cdd1638a163b77b3d65625fc14a81dab2b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Nov 2017 15:28:05 -0500 Subject: NFSv4: Retry CLOSE and DELEGRETURN on NFS4ERR_OLD_STATEID. If we're racing with an OPEN, then retry the operation instead of declaring it a success. Signed-off-by: Trond Myklebust [Andrew W Elble: Fix a typo in nfs4_refresh_open_stateid] Signed-off-by: Anna Schumaker --- fs/nfs/delegation.c | 27 +++++++++++++++++++++++++++ fs/nfs/delegation.h | 1 + fs/nfs/nfs4_fs.h | 2 ++ fs/nfs/nfs4proc.c | 21 +++++++++++++++++++-- fs/nfs/nfs4state.c | 16 ++++++++++++++++ 5 files changed, 65 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 606dd38..ade44ca 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -1041,6 +1041,33 @@ int nfs_delegations_present(struct nfs_client *clp) } /** + * nfs4_refresh_delegation_stateid - Update delegation stateid seqid + * @dst: stateid to refresh + * @inode: inode to check + * + * Returns "true" and updates "dst->seqid" * if inode had a delegation + * that matches our delegation stateid. Otherwise "false" is returned. + */ +bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode) +{ + struct nfs_delegation *delegation; + bool ret = false; + if (!inode) + goto out; + + rcu_read_lock(); + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (delegation != NULL && + nfs4_stateid_match_other(dst, &delegation->stateid)) { + dst->seqid = delegation->stateid.seqid; + return ret; + } + rcu_read_unlock(); +out: + return ret; +} + +/** * nfs4_copy_delegation_stateid - Copy inode's state ID information * @inode: inode to check * @flags: delegation type requirement diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index e9d5557..fe9f388 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -61,6 +61,7 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4 int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid, fmode_t type); int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid); bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid *dst, struct rpc_cred **cred); +bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode); void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); int nfs4_have_delegation(struct inode *inode, fmode_t flags); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index fdcfbab..feb084b 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -460,6 +460,8 @@ extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); extern int nfs4_select_rw_stateid(struct nfs4_state *, fmode_t, const struct nfs_lock_context *, nfs4_stateid *, struct rpc_cred **); +extern bool nfs4_refresh_open_stateid(nfs4_stateid *dst, + struct nfs4_state *state); extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask); extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b2d628c..3e38515 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3199,13 +3199,21 @@ static void nfs4_close_done(struct rpc_task *task, void *data) } break; + case -NFS4ERR_OLD_STATEID: + /* Did we race with OPEN? */ + if (nfs4_refresh_open_stateid(&calldata->arg.stateid, + state)) { + task->tk_status = 0; + rpc_restart_call_prepare(task); + } + goto out_release; case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: nfs4_free_revoked_stateid(server, &calldata->arg.stateid, task->tk_msg.rpc_cred); - case -NFS4ERR_OLD_STATEID: + /* Fallthrough */ case -NFS4ERR_BAD_STATEID: if (!nfs4_stateid_match(&calldata->arg.stateid, &state->open_stateid)) { @@ -3214,6 +3222,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) } if (calldata->arg.fmode == 0) break; + /* Fallthrough */ default: if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN) { rpc_restart_call_prepare(task); @@ -5793,11 +5802,19 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) nfs4_free_revoked_stateid(data->res.server, data->args.stateid, task->tk_msg.rpc_cred); + /* Fallthrough */ case -NFS4ERR_BAD_STATEID: - case -NFS4ERR_OLD_STATEID: case -NFS4ERR_STALE_STATEID: task->tk_status = 0; break; + case -NFS4ERR_OLD_STATEID: + if (nfs4_refresh_delegation_stateid(&data->stateid, data->inode)) { + task->tk_status = 0; + rpc_restart_call_prepare(task); + return; + } + task->tk_status = 0; + break; case -NFS4ERR_ACCESS: if (data->args.bitmask) { data->args.bitmask = NULL; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 6e3f372..cee1e00 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -986,6 +986,22 @@ out: return ret; } +bool nfs4_refresh_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) +{ + bool ret; + int seq; + + do { + ret = false; + seq = read_seqbegin(&state->seqlock); + if (nfs4_state_match_open_stateid_other(state, dst)) { + dst->seqid = state->open_stateid.seqid; + ret = true; + } + } while (read_seqretry(&state->seqlock, seq)); + return ret; +} + static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) { const nfs4_stateid *src; -- cgit v1.1 From c82bac6f4b2af18dcb392b10e80c809ee17b2b1b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Nov 2017 15:28:06 -0500 Subject: NFSv4: Don't try to CLOSE if the stateid 'other' field has changed If the stateid is no longer recognised on the server, either due to a restart, or due to a competing CLOSE call, then we do not have to retry. Any open contexts that triggered a reopen of the file, will also act as triggers for any CLOSE for the updated stateids. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4_fs.h | 2 ++ fs/nfs/nfs4proc.c | 14 ++++---------- fs/nfs/nfs4state.c | 9 +++++++-- 3 files changed, 13 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index feb084b..9721b74 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -462,6 +462,8 @@ extern int nfs4_select_rw_stateid(struct nfs4_state *, fmode_t, struct rpc_cred **); extern bool nfs4_refresh_open_stateid(nfs4_stateid *dst, struct nfs4_state *state); +extern bool nfs4_copy_open_stateid(nfs4_stateid *dst, + struct nfs4_state *state); extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask); extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3e38515..ef313d6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3215,14 +3215,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) task->tk_msg.rpc_cred); /* Fallthrough */ case -NFS4ERR_BAD_STATEID: - if (!nfs4_stateid_match(&calldata->arg.stateid, - &state->open_stateid)) { - rpc_restart_call_prepare(task); - goto out_release; - } - if (calldata->arg.fmode == 0) - break; - /* Fallthrough */ + break; default: if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN) { rpc_restart_call_prepare(task); @@ -3254,7 +3247,6 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags); is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags); is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags); - nfs4_stateid_copy(&calldata->arg.stateid, &state->open_stateid); /* Calculate the change in open mode */ calldata->arg.fmode = 0; if (state->n_rdwr == 0) { @@ -3272,7 +3264,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) calldata->arg.fmode |= FMODE_READ|FMODE_WRITE; if (!nfs4_valid_open_stateid(state) || - test_bit(NFS_OPEN_STATE, &state->flags) == 0) + !nfs4_refresh_open_stateid(&calldata->arg.stateid, state)) call_close = 0; spin_unlock(&state->owner->so_lock); @@ -3366,6 +3358,8 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) calldata->inode = state->inode; calldata->state = state; calldata->arg.fh = NFS_FH(state->inode); + if (!nfs4_copy_open_stateid(&calldata->arg.stateid, state)) + goto out_free_calldata; /* Serialization for the sequence id */ alloc_seqid = server->nfs_client->cl_mvops->alloc_seqid; calldata->arg.seqid = alloc_seqid(&state->owner->so_seqid, gfp_mask); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index cee1e00..b6a0cf7 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1002,18 +1002,23 @@ bool nfs4_refresh_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) return ret; } -static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) +bool nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) { + bool ret; const nfs4_stateid *src; int seq; do { + ret = false; src = &zero_stateid; seq = read_seqbegin(&state->seqlock); - if (test_bit(NFS_OPEN_STATE, &state->flags)) + if (test_bit(NFS_OPEN_STATE, &state->flags)) { src = &state->open_stateid; + ret = true; + } nfs4_stateid_copy(dst, src); } while (read_seqretry(&state->seqlock, seq)); + return ret; } /* -- cgit v1.1 From 7380020e77b61361207420e78a9da925bc79ab4b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Nov 2017 15:28:07 -0500 Subject: pNFS: Retry NFS4ERR_OLD_STATEID errors in layoutreturn-on-close If our layoutreturn on close operation returns an NFS4ERR_OLD_STATEID, then try to update the stateid and retry. We know that there should be no further LAYOUTGET requests being launched. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 18 ++++++++++++++++-- fs/nfs/pnfs.c | 18 ++++++++++++++++++ fs/nfs/pnfs.h | 6 ++++++ 3 files changed, 40 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ef313d6..7ff9c43 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3166,11 +3166,18 @@ static void nfs4_close_done(struct rpc_task *task, void *data) calldata->arg.lr_args = NULL; calldata->res.lr_res = NULL; break; + case -NFS4ERR_OLD_STATEID: + if (nfs4_refresh_layout_stateid(&calldata->arg.lr_args->stateid, + calldata->inode)) { + calldata->res.lr_ret = 0; + rpc_restart_call_prepare(task); + return; + } + /* Fallthrough */ case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_EXPIRED: case -NFS4ERR_BAD_STATEID: - case -NFS4ERR_OLD_STATEID: case -NFS4ERR_UNKNOWN_LAYOUTTYPE: case -NFS4ERR_WRONG_CRED: calldata->arg.lr_args = NULL; @@ -5771,11 +5778,18 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) data->args.lr_args = NULL; data->res.lr_res = NULL; break; + case -NFS4ERR_OLD_STATEID: + if (nfs4_refresh_layout_stateid(&data->args.lr_args->stateid, + data->inode)) { + data->res.lr_ret = 0; + rpc_restart_call_prepare(task); + return; + } + /* Fallthrough */ case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_EXPIRED: case -NFS4ERR_BAD_STATEID: - case -NFS4ERR_OLD_STATEID: case -NFS4ERR_UNKNOWN_LAYOUTTYPE: case -NFS4ERR_WRONG_CRED: data->args.lr_args = NULL; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index ec30dac..d602fe9 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -355,6 +355,24 @@ pnfs_clear_lseg_state(struct pnfs_layout_segment *lseg, } /* + * Update the seqid of a layout stateid + */ +bool nfs4_refresh_layout_stateid(nfs4_stateid *dst, struct inode *inode) +{ + struct pnfs_layout_hdr *lo; + bool ret = false; + + spin_lock(&inode->i_lock); + lo = NFS_I(inode)->layout; + if (lo && nfs4_stateid_match_other(dst, &lo->plh_stateid)) { + dst->seqid = lo->plh_stateid.seqid; + ret = true; + } + spin_unlock(&inode->i_lock); + return ret; +} + +/* * Mark a pnfs_layout_hdr and all associated layout segments as invalid * * In order to continue using the pnfs_layout_hdr, a full recovery diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 78de7a2..8d507c3 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -252,6 +252,7 @@ int pnfs_destroy_layouts_byfsid(struct nfs_client *clp, bool is_recall); int pnfs_destroy_layouts_byclid(struct nfs_client *clp, bool is_recall); +bool nfs4_refresh_layout_stateid(nfs4_stateid *dst, struct inode *inode); void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo); void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, @@ -765,6 +766,11 @@ static inline void nfs4_pnfs_v3_ds_connect_unload(void) { } +static inline bool nfs4_refresh_layout_stateid(nfs4_stateid *dst, + struct inode *inode) +{ + return false; +} #endif /* CONFIG_NFS_V4_1 */ #if IS_ENABLED(CONFIG_NFS_V4_2) -- cgit v1.1 From ff90514ebf96695b937a116b860ff214a8d4a2ac Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Nov 2017 15:28:08 -0500 Subject: NFSv4: Retry NFS4ERR_OLD_STATEID errors in layoutreturn If our layoutreturn returns an NFS4ERR_OLD_STATEID, then try to update the stateid and retry. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7ff9c43..8e7604a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8750,18 +8750,27 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) server = NFS_SERVER(lrp->args.inode); switch (task->tk_status) { + case -NFS4ERR_OLD_STATEID: + if (nfs4_refresh_layout_stateid(&lrp->args.stateid, + lrp->args.inode)) + goto out_restart; + /* Fallthrough */ default: task->tk_status = 0; + /* Fallthrough */ case 0: break; case -NFS4ERR_DELAY: if (nfs4_async_handle_error(task, server, NULL, NULL) != -EAGAIN) break; - nfs4_sequence_free_slot(&lrp->res.seq_res); - rpc_restart_call_prepare(task); - return; + goto out_restart; } dprintk("<-- %s\n", __func__); + return; +out_restart: + task->tk_status = 0; + nfs4_sequence_free_slot(&lrp->res.seq_res); + rpc_restart_call_prepare(task); } static void nfs4_layoutreturn_release(void *calldata) -- cgit v1.1 From 91b30d2e7f303dee9cdd11010e9058bc9124bc57 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Nov 2017 15:28:09 -0500 Subject: NFSv4: cleanup nfs4_close_done Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8e7604a..4a5b01c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3168,11 +3168,8 @@ static void nfs4_close_done(struct rpc_task *task, void *data) break; case -NFS4ERR_OLD_STATEID: if (nfs4_refresh_layout_stateid(&calldata->arg.lr_args->stateid, - calldata->inode)) { - calldata->res.lr_ret = 0; - rpc_restart_call_prepare(task); - return; - } + calldata->inode)) + goto lr_restart; /* Fallthrough */ case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_DELEG_REVOKED: @@ -3182,9 +3179,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) case -NFS4ERR_WRONG_CRED: calldata->arg.lr_args = NULL; calldata->res.lr_res = NULL; - calldata->res.lr_ret = 0; - rpc_restart_call_prepare(task); - return; + goto lr_restart; } } @@ -3200,19 +3195,15 @@ static void nfs4_close_done(struct rpc_task *task, void *data) if (calldata->arg.bitmask != NULL) { calldata->arg.bitmask = NULL; calldata->res.fattr = NULL; - task->tk_status = 0; - rpc_restart_call_prepare(task); - goto out_release; + goto out_restart; } break; case -NFS4ERR_OLD_STATEID: /* Did we race with OPEN? */ if (nfs4_refresh_open_stateid(&calldata->arg.stateid, - state)) { - task->tk_status = 0; - rpc_restart_call_prepare(task); - } + state)) + goto out_restart; goto out_release; case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_STALE_STATEID: @@ -3224,17 +3215,23 @@ static void nfs4_close_done(struct rpc_task *task, void *data) case -NFS4ERR_BAD_STATEID: break; default: - if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN) { - rpc_restart_call_prepare(task); - goto out_release; - } + if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN) + goto out_restart; } nfs_clear_open_stateid(state, &calldata->arg.stateid, res_stateid, calldata->arg.fmode); out_release: + task->tk_status = 0; nfs_release_seqid(calldata->arg.seqid); nfs_refresh_inode(calldata->inode, &calldata->fattr); dprintk("%s: done, ret = %d!\n", __func__, task->tk_status); + return; +lr_restart: + calldata->res.lr_ret = 0; +out_restart: + task->tk_status = 0; + rpc_restart_call_prepare(task); + goto out_release; } static void nfs4_close_prepare(struct rpc_task *task, void *data) -- cgit v1.1 From 140087fdf65b271055de1e1669c8016a908b6a85 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Nov 2017 15:28:10 -0500 Subject: NFSv4: Clean up nfs4_delegreturn_done Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4a5b01c..1090016 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5777,11 +5777,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) break; case -NFS4ERR_OLD_STATEID: if (nfs4_refresh_layout_stateid(&data->args.lr_args->stateid, - data->inode)) { - data->res.lr_ret = 0; - rpc_restart_call_prepare(task); - return; - } + data->inode)) + goto lr_restart; /* Fallthrough */ case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_DELEG_REVOKED: @@ -5791,9 +5788,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) case -NFS4ERR_WRONG_CRED: data->args.lr_args = NULL; data->res.lr_res = NULL; - data->res.lr_ret = 0; - rpc_restart_call_prepare(task); - return; + goto lr_restart; } } @@ -5813,29 +5808,30 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) task->tk_status = 0; break; case -NFS4ERR_OLD_STATEID: - if (nfs4_refresh_delegation_stateid(&data->stateid, data->inode)) { - task->tk_status = 0; - rpc_restart_call_prepare(task); - return; - } + if (nfs4_refresh_delegation_stateid(&data->stateid, data->inode)) + goto out_restart; task->tk_status = 0; break; case -NFS4ERR_ACCESS: if (data->args.bitmask) { data->args.bitmask = NULL; data->res.fattr = NULL; - task->tk_status = 0; - rpc_restart_call_prepare(task); - return; + goto out_restart; } + /* Fallthrough */ default: if (nfs4_async_handle_error(task, data->res.server, NULL, NULL) == -EAGAIN) { - rpc_restart_call_prepare(task); - return; + goto out_restart; } } data->rpc_status = task->tk_status; + return; +lr_restart: + data->res.lr_ret = 0; +out_restart: + task->tk_status = 0; + rpc_restart_call_prepare(task); } static void nfs4_delegreturn_release(void *calldata) -- cgit v1.1 From 46280d9d3de67f64c5bd30a7bbdc372d69f38d98 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Nov 2017 15:28:11 -0500 Subject: NFSv4: Check the open stateid when searching for expired state Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4state.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index b6a0cf7..ff27944 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1431,6 +1431,11 @@ void nfs_inode_find_state_and_recover(struct inode *inode, found = true; continue; } + if (nfs4_stateid_match_other(&state->open_stateid, stateid) && + nfs4_state_mark_reclaim_nograce(clp, state)) { + found = true; + continue; + } if (nfs_state_lock_state_matches_stateid(state, stateid) && nfs4_state_mark_reclaim_nograce(clp, state)) found = true; -- cgit v1.1 From e1fff5df6e04818c711882d40f06e97891bca36e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 7 Nov 2017 13:10:46 -0500 Subject: NFSv4: nfs_set_open_stateid must not trigger state recovery for closed state In nfs_set_open_stateid_locked, we must ignore stateids from closed state. Reported-by: Andrew W Elble Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1090016..2c9c225 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1541,7 +1541,8 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, write_seqlock(&state->seqlock); } - if (!nfs4_stateid_match_other(stateid, &state->open_stateid)) { + if (test_bit(NFS_OPEN_STATE, &state->flags) && + !nfs4_stateid_match_other(stateid, &state->open_stateid)) { nfs4_stateid_copy(freeme, &state->open_stateid); nfs_test_and_clear_all_open_stateid(state); } -- cgit v1.1 From fcd8843c406b46433857ae45e5e9d84b01a7d20b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 7 Nov 2017 12:39:44 -0500 Subject: NFSv4: Replace closed stateids with the "invalid special stateid" When decoding a CLOSE, replace the stateid returned by the server with the "invalid special stateid" described in RFC5661, Section 8.2.3. In nfs_set_open_stateid_locked, ignore stateids from closed state. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4state.c | 8 ++++++++ fs/nfs/nfs4xdr.c | 12 +++++++++++- 3 files changed, 20 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 9721b74..dcfd1af 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -481,6 +481,7 @@ extern int nfs4_sequence_done(struct rpc_task *task, extern void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp); extern const nfs4_stateid zero_stateid; +extern const nfs4_stateid invalid_stateid; /* nfs4super.c */ struct nfs_mount_info; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index ff27944..54fd56d 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -69,6 +69,14 @@ const nfs4_stateid zero_stateid = { { .data = { 0 } }, .type = NFS4_SPECIAL_STATEID_TYPE, }; +const nfs4_stateid invalid_stateid = { + { + .seqid = cpu_to_be32(0xffffffffU), + .other = { 0 }, + }, + .type = NFS4_INVALID_STATEID_TYPE, +}; + static DEFINE_MUTEX(nfs_clid_init_mutex); int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 14ed979..77c6729 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4385,6 +4385,14 @@ static int decode_delegation_stateid(struct xdr_stream *xdr, nfs4_stateid *state return decode_stateid(xdr, stateid); } +static int decode_invalid_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) +{ + nfs4_stateid dummy; + + nfs4_stateid_copy(stateid, &invalid_stateid); + return decode_stateid(xdr, &dummy); +} + static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) { int status; @@ -4393,7 +4401,7 @@ static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) if (status != -EIO) nfs_increment_open_seqid(status, res->seqid); if (!status) - status = decode_open_stateid(xdr, &res->stateid); + status = decode_invalid_stateid(xdr, &res->stateid); return status; } @@ -6108,6 +6116,8 @@ static int decode_layoutreturn(struct xdr_stream *xdr, res->lrs_present = be32_to_cpup(p); if (res->lrs_present) status = decode_layout_stateid(xdr, &res->stateid); + else + nfs4_stateid_copy(&res->stateid, &invalid_stateid); return status; out_overflow: print_overflow_msg(__func__, xdr); -- cgit v1.1 From 0671d8f108762efc51ca893dbf8f0ba72f655c3d Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 7 Nov 2017 08:51:00 +0100 Subject: nfs/write: Use common error handling code in nfs_lock_and_join_requests() Add a jump target so that a bit of exception handling can be better reused at the end of this function. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Anna Schumaker --- fs/nfs/write.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index babebbc..5b5f464 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -487,10 +487,8 @@ try_again: } ret = nfs_page_group_lock(head); - if (ret < 0) { - nfs_unlock_and_release_request(head); - return ERR_PTR(ret); - } + if (ret < 0) + goto release_request; /* lock each request in the page group */ total_bytes = head->wb_bytes; @@ -515,8 +513,7 @@ try_again: if (ret < 0) { nfs_unroll_locks(inode, head, subreq); nfs_release_request(subreq); - nfs_unlock_and_release_request(head); - return ERR_PTR(ret); + goto release_request; } } /* @@ -532,8 +529,8 @@ try_again: nfs_page_group_unlock(head); nfs_unroll_locks(inode, head, subreq); nfs_unlock_and_release_request(subreq); - nfs_unlock_and_release_request(head); - return ERR_PTR(-EIO); + ret = -EIO; + goto release_request; } } @@ -576,6 +573,10 @@ try_again: /* still holds ref on head from nfs_page_find_head_request * and still has lock on head from lock loop */ return head; + +release_request: + nfs_unlock_and_release_request(head); + return ERR_PTR(ret); } static void nfs_write_error_remove_page(struct nfs_page *req) -- cgit v1.1 From b0b5352d9a507b344d4a2aec21f0105c585251fe Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Sun, 12 Nov 2017 11:48:16 +0300 Subject: nfs client: exit_net cleanup check added Be sure that nfs_client_list and nfs_volume_list lists initialized in net_init hook were return to initial state in net_exit hook. Signed-off-by: Vasily Averin Signed-off-by: Anna Schumaker --- fs/nfs/inode.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 52a60e3..e5fcc1f 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2084,8 +2084,12 @@ static int nfs_net_init(struct net *net) static void nfs_net_exit(struct net *net) { + struct nfs_net *nn = net_generic(net, nfs_net_id); + nfs_fs_proc_net_exit(net); nfs_cleanup_cb_ident_idr(net); + WARN_ON_ONCE(!list_empty(&nn->nfs_client_list)); + WARN_ON_ONCE(!list_empty(&nn->nfs_volume_list)); } static struct pernet_operations nfs_net_ops = { -- cgit v1.1 From e4949e4b3d5e056bcecebd340d4c8fab7ed1c20d Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Wed, 8 Nov 2017 08:56:55 +0300 Subject: nfs: remove net pointer from messages Publishing of net pointer is not safe, use net->ns.inum instead Signed-off-by: Vasily Averin Signed-off-by: Anna Schumaker --- fs/nfs/callback.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 2cddf7f..387369c 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -48,15 +48,15 @@ static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net) if (ret <= 0) goto out_err; nn->nfs_callback_tcpport = ret; - dprintk("NFS: Callback listener port = %u (af %u, net %p)\n", - nn->nfs_callback_tcpport, PF_INET, net); + dprintk("NFS: Callback listener port = %u (af %u, net %x)\n", + nn->nfs_callback_tcpport, PF_INET, net->ns.inum); ret = svc_create_xprt(serv, "tcp", net, PF_INET6, nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); if (ret > 0) { nn->nfs_callback_tcpport6 = ret; - dprintk("NFS: Callback listener port = %u (af %u, net %p)\n", - nn->nfs_callback_tcpport6, PF_INET6, net); + dprintk("NFS: Callback listener port = %u (af %u, net %x\n", + nn->nfs_callback_tcpport6, PF_INET6, net->ns.inum); } else if (ret != -EAFNOSUPPORT) goto out_err; return 0; @@ -184,7 +184,7 @@ static void nfs_callback_down_net(u32 minorversion, struct svc_serv *serv, struc if (--nn->cb_users[minorversion]) return; - dprintk("NFS: destroy per-net callback data; net=%p\n", net); + dprintk("NFS: destroy per-net callback data; net=%x\n", net->ns.inum); svc_shutdown_net(serv, net); } @@ -197,7 +197,7 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, if (nn->cb_users[minorversion]++) return 0; - dprintk("NFS: create per-net callback data; net=%p\n", net); + dprintk("NFS: create per-net callback data; net=%x\n", net->ns.inum); ret = svc_bind(serv, net); if (ret < 0) { @@ -222,7 +222,7 @@ err_socks: err_bind: nn->cb_users[minorversion]--; dprintk("NFS: Couldn't create callback socket: err = %d; " - "net = %p\n", ret, net); + "net = %x\n", ret, net->ns.inum); return ret; } -- cgit v1.1 From fd53dde83978ba5f7db3183ce56b3a1c39f448b0 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 8 Nov 2017 20:49:19 -0600 Subject: NFS: super: mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Addresses-Coverity-ID: 703509 Addresses-Coverity-ID: 703510 Addresses-Coverity-ID: 703511 Addresses-Coverity-ID: 703512 Addresses-Coverity-ID: 703513 Signed-off-by: Gustavo A. R. Silva Signed-off-by: Anna Schumaker --- fs/nfs/super.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index c9d24ba..da19c72 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1456,18 +1456,21 @@ static int nfs_parse_mount_options(char *raw, switch (token) { case Opt_xprt_udp6: protofamily = AF_INET6; + /* fall through */ case Opt_xprt_udp: mnt->flags &= ~NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; break; case Opt_xprt_tcp6: protofamily = AF_INET6; + /* fall through */ case Opt_xprt_tcp: mnt->flags |= NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; break; case Opt_xprt_rdma6: protofamily = AF_INET6; + /* fall through */ case Opt_xprt_rdma: /* vector side protocols to TCP */ mnt->flags |= NFS_MOUNT_TCP; @@ -1494,11 +1497,13 @@ static int nfs_parse_mount_options(char *raw, switch (token) { case Opt_xprt_udp6: mountfamily = AF_INET6; + /* fall through */ case Opt_xprt_udp: mnt->mount_server.protocol = XPRT_TRANSPORT_UDP; break; case Opt_xprt_tcp6: mountfamily = AF_INET6; + /* fall through */ case Opt_xprt_tcp: mnt->mount_server.protocol = XPRT_TRANSPORT_TCP; break; @@ -1988,9 +1993,9 @@ static int nfs23_validate_mount_data(void *options, args->version = NFS_DEFAULT_VERSION; switch (data->version) { case 1: - data->namlen = 0; + data->namlen = 0; /* fall through */ case 2: - data->bsize = 0; + data->bsize = 0; /* fall through */ case 3: if (data->flags & NFS_MOUNT_VER3) goto out_no_v3; @@ -1998,11 +2003,14 @@ static int nfs23_validate_mount_data(void *options, memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); /* Turn off security negotiation */ extra_flags |= NFS_MOUNT_SECFLAVOUR; + /* fall through */ case 4: if (data->flags & NFS_MOUNT_SECFLAVOUR) goto out_no_sec; + /* fall through */ case 5: memset(data->context, 0, sizeof(data->context)); + /* fall through */ case 6: if (data->flags & NFS_MOUNT_VER3) { if (data->root.size > NFS3_FHSIZE || data->root.size == 0) -- cgit v1.1 From c05cefcc72416a37eba5a2b35f0704ed758a9145 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 5 Nov 2017 15:45:22 -0500 Subject: nfs: Fix ugly referral attributes Before traversing a referral and performing a mount, the mounted-on directory looks strange: dr-xr-xr-x. 2 4294967294 4294967294 0 Dec 31 1969 dir.0 nfs4_get_referral is wiping out any cached attributes with what was returned via GETATTR(fs_locations), but the bit mask for that operation does not request any file attributes. Retrieve owner and timestamp information so that the memcpy in nfs4_get_referral fills in more attributes. Changes since v1: - Don't request attributes that the client unconditionally replaces - Request only MOUNTED_ON_FILEID or FILEID attribute, not both - encode_fs_locations() doesn't use the third bitmask word Fixes: 6b97fd3da1ea ("NFSv4: Follow a referral") Suggested-by: Pradeep Thomas Signed-off-by: Chuck Lever Cc: stable@vger.kernel.org Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 2c9c225..6b23a032 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -258,15 +258,12 @@ const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE }; const u32 nfs4_fs_locations_bitmap[3] = { - FATTR4_WORD0_TYPE - | FATTR4_WORD0_CHANGE + FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE | FATTR4_WORD0_FSID | FATTR4_WORD0_FILEID | FATTR4_WORD0_FS_LOCATIONS, - FATTR4_WORD1_MODE - | FATTR4_WORD1_NUMLINKS - | FATTR4_WORD1_OWNER + FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP | FATTR4_WORD1_RAWDEV | FATTR4_WORD1_SPACE_USED @@ -6907,9 +6904,7 @@ static int _nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir, struct page *page) { struct nfs_server *server = NFS_SERVER(dir); - u32 bitmask[3] = { - [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS, - }; + u32 bitmask[3]; struct nfs4_fs_locations_arg args = { .dir_fh = NFS_FH(dir), .name = name, @@ -6928,12 +6923,15 @@ static int _nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir, dprintk("%s: start\n", __func__); + bitmask[0] = nfs4_fattr_bitmap[0] | FATTR4_WORD0_FS_LOCATIONS; + bitmask[1] = nfs4_fattr_bitmap[1]; + /* Ask for the fileid of the absent filesystem if mounted_on_fileid * is not supported */ if (NFS_SERVER(dir)->attr_bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) - bitmask[1] |= FATTR4_WORD1_MOUNTED_ON_FILEID; + bitmask[0] &= ~FATTR4_WORD0_FILEID; else - bitmask[0] |= FATTR4_WORD0_FILEID; + bitmask[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; nfs_fattr_init(&fs_locations->fattr); fs_locations->server = server; -- cgit v1.1 From f02fee227e5f21981152850744a6084ff3fa94ee Mon Sep 17 00:00:00 2001 From: Joshua Watt Date: Tue, 7 Nov 2017 16:25:47 -0600 Subject: NFS: Fix typo in nomigration mount option The option was incorrectly masking off all other options. Signed-off-by: Joshua Watt Cc: stable@vger.kernel.org #3.7 Signed-off-by: Anna Schumaker --- fs/nfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index da19c72..43cadb2 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1332,7 +1332,7 @@ static int nfs_parse_mount_options(char *raw, mnt->options |= NFS_OPTION_MIGRATION; break; case Opt_nomigration: - mnt->options &= NFS_OPTION_MIGRATION; + mnt->options &= ~NFS_OPTION_MIGRATION; break; /* -- cgit v1.1 From fcfa447062b2061e11f68b846d61cbfe60d0d604 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Fri, 10 Nov 2017 06:27:49 -0500 Subject: NFS: Revert "NFS: Move the flock open mode check into nfs_flock()" Commit e12937279c8b "NFS: Move the flock open mode check into nfs_flock()" changed NFSv3 behavior for flock() such that the open mode must match the lock type, however that requirement shouldn't be enforced for flock(). Signed-off-by: Benjamin Coddington Cc: stable@vger.kernel.org # v4.12 Signed-off-by: Anna Schumaker --- fs/nfs/file.c | 18 ++---------------- fs/nfs/nfs4proc.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 0214dd1..81cca49 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -829,23 +829,9 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK) is_local = 1; - /* - * VFS doesn't require the open mode to match a flock() lock's type. - * NFS, however, may simulate flock() locking with posix locking which - * requires the open mode to match the lock type. - */ - switch (fl->fl_type) { - case F_UNLCK: + /* We're simulating flock() locks using posix locks on the server */ + if (fl->fl_type == F_UNLCK) return do_unlk(filp, cmd, fl, is_local); - case F_RDLCK: - if (!(filp->f_mode & FMODE_READ)) - return -EBADF; - break; - case F_WRLCK: - if (!(filp->f_mode & FMODE_WRITE)) - return -EBADF; - } - return do_setlk(filp, cmd, fl, is_local); } EXPORT_SYMBOL_GPL(nfs_flock); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6b23a032..56fa5a1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6709,6 +6709,20 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) !test_bit(NFS_STATE_POSIX_LOCKS, &state->flags)) return -ENOLCK; + /* + * Don't rely on the VFS having checked the file open mode, + * since it won't do this for flock() locks. + */ + switch (request->fl_type) { + case F_RDLCK: + if (!(filp->f_mode & FMODE_READ)) + return -EBADF; + break; + case F_WRLCK: + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + } + status = nfs4_set_lock_state(state, request); if (status != 0) return status; -- cgit v1.1