From 536585ccf95d34d64e2855f72608095854b8fc0e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 10 Nov 2016 15:40:34 -0500 Subject: NFSv4: Don't check file access when reclaiming state If we're reclaiming state after a reboot, or as part of returning a delegation, we don't need to check access modes again. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 241da19..b582df8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1221,6 +1221,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, atomic_inc(&sp->so_count); p->o_arg.open_flags = flags; p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); + p->o_arg.claim = nfs4_map_atomic_open_claim(server, claim); p->o_arg.share_access = nfs4_map_atomic_open_share(server, fmode, flags); /* don't put an ACCESS op in OPEN compound if O_EXCL, because ACCESS @@ -1228,8 +1229,16 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, if (!(flags & O_EXCL)) { /* ask server to check for all possible rights as results * are cached */ - p->o_arg.access = NFS4_ACCESS_READ | NFS4_ACCESS_MODIFY | - NFS4_ACCESS_EXTEND | NFS4_ACCESS_EXECUTE; + switch (p->o_arg.claim) { + default: + break; + case NFS4_OPEN_CLAIM_NULL: + case NFS4_OPEN_CLAIM_FH: + p->o_arg.access = NFS4_ACCESS_READ | + NFS4_ACCESS_MODIFY | + NFS4_ACCESS_EXTEND | + NFS4_ACCESS_EXECUTE; + } } p->o_arg.clientid = server->nfs_client->cl_clientid; p->o_arg.id.create_time = ktime_to_ns(sp->so_seqid.create_time); @@ -1239,7 +1248,6 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.bitmask = nfs4_bitmask(server, label); p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0]; p->o_arg.label = nfs4_label_copy(p->a_label, label); - p->o_arg.claim = nfs4_map_atomic_open_claim(server, claim); switch (p->o_arg.claim) { case NFS4_OPEN_CLAIM_NULL: case NFS4_OPEN_CLAIM_DELEGATE_CUR: -- cgit v1.1 From 1cc1baf14b595bd44e83c2aca2fe487df7b6a1e3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 10 Nov 2016 15:47:26 -0500 Subject: NFSv4: Don't ask for the change attribute when reclaiming state We don't need to ask for the change attribute when returning a delegation or recovering from a server reboot, and it could actually cause us to obtain an incorrect value if we're using a pNFS flavour that requires LAYOUTCOMMIT. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b582df8..c0628f7 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -226,7 +226,6 @@ static const u32 nfs4_pnfs_open_bitmap[3] = { static const u32 nfs4_open_noattr_bitmap[3] = { FATTR4_WORD0_TYPE - | FATTR4_WORD0_CHANGE | FATTR4_WORD0_FILEID, }; -- cgit v1.1 From 3947b74d0f9d0738335d8b2bca2bde057f9141be Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 27 Oct 2016 18:27:02 -0400 Subject: NFSv4: Don't request a GETATTR on open_downgrade. If we're not closing the file completely, there is no need to request close-to-open attributes. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index fc89e5e..c374737 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -499,14 +499,12 @@ static int nfs4_stat_to_errno(int); (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ - encode_open_downgrade_maxsz + \ - encode_getattr_maxsz) + encode_open_downgrade_maxsz) #define NFS4_dec_open_downgrade_sz \ (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ - decode_open_downgrade_maxsz + \ - decode_getattr_maxsz) + decode_open_downgrade_maxsz) #define NFS4_enc_close_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ @@ -2328,7 +2326,6 @@ static void nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); encode_open_downgrade(xdr, args, &hdr); - encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); } @@ -6115,9 +6112,6 @@ static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, if (status) goto out; status = decode_open_downgrade(xdr, res); - if (status != 0) - goto out; - decode_getfattr(xdr, res->fattr, res->server); out: return status; } -- cgit v1.1 From 3ecefc9295991eaaad4c67915c6384e5d18cc632 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 27 Oct 2016 18:25:04 -0400 Subject: NFSv4: Don't request close-to-open attribute when holding a delegation If holding a delegation, we do not need to ask the server to return close-to-open cache consistency attributes as part of the CLOSE compound. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 10 ++++++++-- fs/nfs/nfs4xdr.c | 3 ++- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c0628f7..917a6db 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3156,8 +3156,15 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) goto out_wait; } - if (calldata->arg.fmode == 0) + if (calldata->arg.fmode == 0) { task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE]; + + /* Close-to-open cache consistency revalidation */ + if (!nfs4_have_delegation(inode, FMODE_READ)) + calldata->arg.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask; + else + calldata->arg.bitmask = NULL; + } if (calldata->roc) pnfs_roc_get_barrier(inode, &calldata->roc_barrier); @@ -3240,7 +3247,6 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) if (IS_ERR(calldata->arg.seqid)) goto out_free_calldata; calldata->arg.fmode = 0; - calldata->arg.bitmask = server->cache_consistency_bitmask; calldata->res.fattr = &calldata->fattr; calldata->res.seqid = calldata->arg.seqid; calldata->res.server = server; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index c374737..b549315 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2248,7 +2248,8 @@ static void nfs4_xdr_enc_close(struct rpc_rqst *req, struct xdr_stream *xdr, encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); encode_close(xdr, args, &hdr); - encode_getfattr(xdr, args->bitmask, &hdr); + if (args->bitmask != NULL) + encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); } -- cgit v1.1 From 1ad13dbc85911fcf15232342205c92e250335267 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 27 Oct 2016 18:42:04 -0400 Subject: NFSv4: Optimise away forced revalidation when we know the attributes are OK The NFS_INO_REVAL_FORCED flag needs to be set if we just got a delegation, and we see that there might still be some ambiguity as to whether or not our attribute or data cache are valid. In practice, this means that a call to nfs_check_inode_attributes() will have noticed a discrepancy between cached attributes and measured ones, so let's move the setting of NFS_INO_REVAL_FORCED to there. Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 4 ---- fs/nfs/inode.c | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index dff600a..d7df5e6 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -391,10 +391,6 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct rcu_assign_pointer(nfsi->delegation, delegation); delegation = NULL; - /* Ensure we revalidate the attributes and page cache! */ - spin_lock(&inode->i_lock); - nfsi->cache_validity |= NFS_INO_REVAL_FORCED; - spin_unlock(&inode->i_lock); trace_nfs4_set_delegation(inode, res->delegation_type); out: diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index bf4ec5e..3575e34 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1317,7 +1317,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat invalid |= NFS_INO_INVALID_ATIME; if (invalid != 0) - nfs_set_cache_invalid(inode, invalid); + nfs_set_cache_invalid(inode, invalid | NFS_INO_REVAL_FORCED); nfsi->read_cache_jiffies = fattr->time_start; return 0; -- cgit v1.1 From 9a837856cf612d83e9ede7081972a2fa78de6921 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Tue, 27 Sep 2016 15:37:11 -0400 Subject: NFSv4.1: Fix regression in callback retry handling When initializing a freshly created slot for the calllback channel, the seq_nr needs to be 0, not 1. Otherwise validate_seqid and nfs4_slot_wait_on_seqid get confused and believe that the mpty slot corresponds to a previously sent reply. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/nfs4session.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c index a61350f..769b856 100644 --- a/fs/nfs/nfs4session.c +++ b/fs/nfs/nfs4session.c @@ -169,7 +169,7 @@ bool nfs4_try_to_lock_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot) struct nfs4_slot *nfs4_lookup_slot(struct nfs4_slot_table *tbl, u32 slotid) { if (slotid <= tbl->max_slotid) - return nfs4_find_or_create_slot(tbl, slotid, 1, GFP_NOWAIT); + return nfs4_find_or_create_slot(tbl, slotid, 0, GFP_NOWAIT); return ERR_PTR(-E2BIG); } -- cgit v1.1 From 54e4a0dfa25d9365c4e80a639e80d9213eb6edbe Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 27 Nov 2016 15:12:39 -0500 Subject: pNFS: Fix a deadlock between read resends and layoutreturn We must not call nfs_pageio_init_read() on a new nfs_pageio_descriptor while holding a reference to a layout segment, as that can deadlock pnfs_update_layout(). Fixes: d67ae825a59d6 ("pnfs/flexfiles: Add the FlexFile Layout Driver") Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v4.0+ --- fs/nfs/flexfilelayout/flexfilelayout.c | 4 ++++ fs/nfs/pnfs.c | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 98ace12..a5c3888 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -28,6 +28,9 @@ static struct group_info *ff_zero_group; +static void ff_layout_read_record_layoutstats_done(struct rpc_task *task, + struct nfs_pgio_header *hdr); + static struct pnfs_layout_hdr * ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) { @@ -1293,6 +1296,7 @@ static int ff_layout_read_done_cb(struct rpc_task *task, hdr->pgio_mirror_idx + 1, &hdr->pgio_mirror_idx)) goto out_eagain; + ff_layout_read_record_layoutstats_done(task, hdr); pnfs_read_resend_pnfs(hdr); return task->tk_status; case -NFS4ERR_RESET_TO_MDS: diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 259ef85..206d560 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2286,6 +2286,10 @@ void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr) struct nfs_pageio_descriptor pgio; if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { + /* Prevent deadlocks with layoutreturn! */ + pnfs_put_lseg(hdr->lseg); + hdr->lseg = NULL; + nfs_pageio_init_read(&pgio, hdr->inode, false, hdr->completion_ops); hdr->task.tk_status = nfs_pageio_resend(&pgio, hdr); -- cgit v1.1 From 7b650994ab07434ae58a247dc9ac87d2488ca75c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 14 Nov 2016 13:10:48 -0500 Subject: pNFS: Don't clear the layout stateid if a layout return is outstanding If we no longer hold any layout segments, we're normally expected to consider the layout stateid to be invalid. However we cannot assume this if we're about to, or in the process of sending a layoutreturn. Fixes: 334a8f37115b ("pNFS: Don't forget the layout stateid if...") Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v4.8+ --- fs/nfs/pnfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 206d560..09bbb07 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -411,7 +411,9 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, list_del_init(&lseg->pls_list); /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ atomic_dec(&lo->plh_refcount); - if (list_empty(&lo->plh_segs)) { + if (list_empty(&lo->plh_segs) && + !test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) && + !test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { if (atomic_read(&lo->plh_outstanding) == 0) set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); -- cgit v1.1 From ae5a459d5f65c3e83f3e14068dde5fb9c9d81807 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 14 Nov 2016 14:34:18 -0500 Subject: pNFS: Clear NFS_LAYOUT_RETURN_REQUESTED when invalidating the layout stateid We must ensure that we don't schedule a layoutreturn if the layout stateid has been marked as invalid. Fixes: 2a59a0411671e ("pNFS: Fix pnfs_set_layout_stateid() to clear...") Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v4.8+ --- fs/nfs/pnfs.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 09bbb07..d4e06b7 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -299,6 +299,14 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) } } +static void +pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo) +{ + lo->plh_return_iomode = 0; + lo->plh_return_seq = 0; + clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); +} + /* * Mark a pnfs_layout_hdr and all associated layout segments as invalid * @@ -317,6 +325,7 @@ pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, }; set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); + pnfs_clear_layoutreturn_info(lo); return pnfs_mark_matching_lsegs_invalid(lo, lseg_list, &range, 0); } @@ -818,14 +827,6 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) pnfs_destroy_layouts_byclid(clp, false); } -static void -pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo) -{ - lo->plh_return_iomode = 0; - lo->plh_return_seq = 0; - clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); -} - /* update lo->plh_stateid with new if is more recent */ void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, -- cgit v1.1 From 9888d837f3cf6b1f38f7717ab58236f94123ca19 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 23 Nov 2016 12:36:04 -0500 Subject: pNFS: Force a retry of LAYOUTGET if the stateid doesn't match our cache If the server sends us a completely new stateid, and the client thinks it already holds a layout, then force a retry of the LAYOUTGET after invalidating the existing layout in order to avoid corruption due to races. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index d4e06b7..9b7e88b 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1844,7 +1844,10 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) goto out_forget; } - if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) { + if (!pnfs_layout_is_valid(lo)) { + /* We have a completely new layout */ + pnfs_set_layout_stateid(lo, &res->stateid, true); + } else if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) { /* existing state ID, make sure the sequence number matches. */ if (pnfs_layout_stateid_blocked(lo, &res->stateid)) { dprintk("%s forget reply due to sequence\n", __func__); @@ -1854,12 +1857,10 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) } else { /* * We got an entirely new state ID. Mark all segments for the - * inode invalid, and don't bother validating the stateid - * sequence number. + * inode invalid, and retry the layoutget */ pnfs_mark_layout_stateid_invalid(lo, &free_me); - - pnfs_set_layout_stateid(lo, &res->stateid, true); + goto out_forget; } pnfs_get_lseg(lseg); -- cgit v1.1 From 6604b203fb6394ed1f24c21bfa3c207e5ae8e461 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 17 Oct 2016 17:54:32 -0400 Subject: pNFS: On error, do not send LAYOUTGET until the LAYOUTRETURN has completed If there is an I/O error, we should not call LAYOUTGET until the LAYOUTRETURN that reports the error is complete. Signed-off-by: Trond Myklebust Cc: stable@vger.kernel.org # v4.8+ --- fs/nfs/pnfs.c | 6 +++++- fs/nfs/pnfs.h | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 9b7e88b..06bfcd2 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -947,6 +947,7 @@ static void pnfs_clear_layoutcommit(struct inode *inode, void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) { clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); + clear_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags); smp_mb__after_atomic(); wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); @@ -960,8 +961,9 @@ pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo, /* Serialise LAYOUTGET/LAYOUTRETURN */ if (atomic_read(&lo->plh_outstanding) != 0) return false; - if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) + if (test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) return false; + set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); pnfs_get_layout_hdr(lo); if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) { if (stateid != NULL) { @@ -1954,6 +1956,8 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, spin_lock(&inode->i_lock); pnfs_set_plh_return_info(lo, range.iomode, 0); + /* Block LAYOUTGET */ + set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); /* * mark all matching lsegs so that we are sure to have no live * segments at hand when sending layoutreturn. See pnfs_put_lseg() diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 5c29551..44cad8a 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -96,6 +96,7 @@ enum { NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ NFS_LAYOUT_RETURN, /* layoutreturn in progress */ + NFS_LAYOUT_RETURN_LOCK, /* Serialise layoutreturn */ NFS_LAYOUT_RETURN_REQUESTED, /* Return this layout ASAP */ NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */ NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */ -- cgit v1.1 From ee284e35d8c71bf5d4d807eaff6f67a17134b359 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 18 Nov 2016 15:21:30 -0500 Subject: pNFS: Fix race in pnfs_wait_on_layoutreturn We must put the task to sleep while holding the inode->i_lock in order to ensure atomicity with the test for NFS_LAYOUT_RETURN. Fixes: 500d701f336b ("NFS41: make close wait for layoutreturn") Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 06bfcd2..e17d9ec 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1257,13 +1257,11 @@ bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task) * i_lock */ spin_lock(&ino->i_lock); lo = nfsi->layout; - if (lo && test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) + if (lo && test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { + rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); sleep = true; + } spin_unlock(&ino->i_lock); - - if (sleep) - rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); - return sleep; } -- cgit v1.1 From 17822b207f3b66c3aa09d749d583ae63b3637f01 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 25 Oct 2016 12:24:25 -0400 Subject: pNFS: consolidate the different range intersection tests Both pnfs.c and the flexfiles code have their own versions of the range intersection testing, and the "end_offset" helper. Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 33 +++++++---------------------- fs/nfs/pnfs.c | 35 +++---------------------------- fs/nfs/pnfs.h | 32 ++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 57 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index f7a3f6b..6cf545c 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -198,22 +198,13 @@ static bool ff_layout_mirror_valid(struct pnfs_layout_segment *lseg, return true; } -static u64 -end_offset(u64 start, u64 len) -{ - u64 end; - - end = start + len; - return end >= start ? end : NFS4_MAX_UINT64; -} - static void extend_ds_error(struct nfs4_ff_layout_ds_err *err, u64 offset, u64 length) { u64 end; - end = max_t(u64, end_offset(err->offset, err->length), - end_offset(offset, length)); + end = max_t(u64, pnfs_end_offset(err->offset, err->length), + pnfs_end_offset(offset, length)); err->offset = min_t(u64, err->offset, offset); err->length = end - err->offset; } @@ -235,9 +226,9 @@ ff_ds_error_match(const struct nfs4_ff_layout_ds_err *e1, ret = memcmp(&e1->deviceid, &e2->deviceid, sizeof(e1->deviceid)); if (ret != 0) return ret; - if (end_offset(e1->offset, e1->length) < e2->offset) + if (pnfs_end_offset(e1->offset, e1->length) < e2->offset) return -1; - if (e1->offset > end_offset(e2->offset, e2->length)) + if (e1->offset > pnfs_end_offset(e2->offset, e2->length)) return 1; /* If ranges overlap or are contiguous, they are the same */ return 0; @@ -457,16 +448,6 @@ nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg, u32 ds_idx, } } -static bool is_range_intersecting(u64 offset1, u64 length1, - u64 offset2, u64 length2) -{ - u64 end1 = end_offset(offset1, length1); - u64 end2 = end_offset(offset2, length2); - - return (end1 == NFS4_MAX_UINT64 || end1 > offset2) && - (end2 == NFS4_MAX_UINT64 || end2 > offset1); -} - /* called with inode i_lock held */ int ff_layout_encode_ds_ioerr(struct nfs4_flexfile_layout *flo, struct xdr_stream *xdr, int *count, @@ -476,8 +457,10 @@ int ff_layout_encode_ds_ioerr(struct nfs4_flexfile_layout *flo, __be32 *p; list_for_each_entry_safe(err, n, &flo->error_list, list) { - if (!is_range_intersecting(err->offset, err->length, - range->offset, range->length)) + if (!pnfs_is_range_intersecting(err->offset, + pnfs_end_offset(err->offset, err->length), + range->offset, + pnfs_end_offset(range->offset, range->length))) continue; /* offset(8) + length(8) + stateid(NFS4_STATEID_SIZE) * + array length + deviceid(NFS4_DEVICEID4_SIZE) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index e17d9ec..d70cc46 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -500,15 +500,6 @@ pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg) } EXPORT_SYMBOL_GPL(pnfs_put_lseg_locked); -static u64 -end_offset(u64 start, u64 len) -{ - u64 end; - - end = start + len; - return end >= start ? end : NFS4_MAX_UINT64; -} - /* * is l2 fully contained in l1? * start1 end1 @@ -521,33 +512,13 @@ pnfs_lseg_range_contained(const struct pnfs_layout_range *l1, const struct pnfs_layout_range *l2) { u64 start1 = l1->offset; - u64 end1 = end_offset(start1, l1->length); + u64 end1 = pnfs_end_offset(start1, l1->length); u64 start2 = l2->offset; - u64 end2 = end_offset(start2, l2->length); + u64 end2 = pnfs_end_offset(start2, l2->length); return (start1 <= start2) && (end1 >= end2); } -/* - * is l1 and l2 intersecting? - * start1 end1 - * [----------------------------------) - * start2 end2 - * [----------------) - */ -static bool -pnfs_lseg_range_intersecting(const struct pnfs_layout_range *l1, - const struct pnfs_layout_range *l2) -{ - u64 start1 = l1->offset; - u64 end1 = end_offset(start1, l1->length); - u64 start2 = l2->offset; - u64 end2 = end_offset(start2, l2->length); - - return (end1 == NFS4_MAX_UINT64 || end1 > start2) && - (end2 == NFS4_MAX_UINT64 || end2 > start1); -} - static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, struct list_head *tmp_list) { @@ -2069,7 +2040,7 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, * */ if (pgio->pg_lseg) { - seg_end = end_offset(pgio->pg_lseg->pls_range.offset, + seg_end = pnfs_end_offset(pgio->pg_lseg->pls_range.offset, pgio->pg_lseg->pls_range.length); req_start = req_offset(req); WARN_ON_ONCE(req_start >= seg_end); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 44cad8a..337dad3 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -560,6 +560,38 @@ pnfs_copy_range(struct pnfs_layout_range *dst, memcpy(dst, src, sizeof(*dst)); } +static inline u64 +pnfs_end_offset(u64 start, u64 len) +{ + if (NFS4_MAX_UINT64 - start <= len) + return NFS4_MAX_UINT64; + return start + len; +} + +/* + * Are 2 ranges intersecting? + * start1 end1 + * [----------------------------------) + * start2 end2 + * [----------------) + */ +static inline bool +pnfs_is_range_intersecting(u64 start1, u64 end1, u64 start2, u64 end2) +{ + return (end1 == NFS4_MAX_UINT64 || start2 < end1) && + (end2 == NFS4_MAX_UINT64 || start1 < end2); +} + +static inline bool +pnfs_lseg_range_intersecting(const struct pnfs_layout_range *l1, + const struct pnfs_layout_range *l2) +{ + u64 end1 = pnfs_end_offset(l1->offset, l1->length); + u64 end2 = pnfs_end_offset(l2->offset, l2->length); + + return pnfs_is_range_intersecting(l1->offset, end1, l2->offset, end2); +} + extern unsigned int layoutstats_timer; #ifdef NFS_DEBUG -- cgit v1.1 From 7b410d9ce460f70e346d91d8cfbdd2cb054eb775 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 31 Oct 2016 10:05:21 -0400 Subject: pNFS: Delay getting the layout header in CB_LAYOUTRECALL handlers Instead of grabbing the layout, we want to get the inode so that we can reduce races between layoutget and layoutrecall when the server does not support call referring. Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 99 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 67 insertions(+), 32 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index e9aa235e..f073a6d2c 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -110,20 +110,52 @@ out: #if defined(CONFIG_NFS_V4_1) /* - * Lookup a layout by filehandle. + * Lookup a layout inode by stateid * - * Note: gets a refcount on the layout hdr and on its respective inode. - * Caller must put the layout hdr and the inode. + * Note: returns a refcount on the inode and superblock + */ +static struct inode *nfs_layout_find_inode_by_stateid(struct nfs_client *clp, + const nfs4_stateid *stateid) +{ + struct nfs_server *server; + struct inode *inode; + struct pnfs_layout_hdr *lo; + +restart: + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + list_for_each_entry(lo, &server->layouts, plh_layouts) { + if (stateid != NULL && + !nfs4_stateid_match_other(stateid, &lo->plh_stateid)) + continue; + inode = igrab(lo->plh_inode); + if (!inode) + continue; + if (!nfs_sb_active(inode->i_sb)) { + rcu_read_lock(); + spin_unlock(&clp->cl_lock); + iput(inode); + spin_lock(&clp->cl_lock); + goto restart; + } + return inode; + } + } + + return NULL; +} + +/* + * Lookup a layout inode by filehandle. + * + * Note: returns a refcount on the inode and superblock * - * TODO: keep track of all layouts (and delegations) in a hash table - * hashed by filehandle. */ -static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, - struct nfs_fh *fh) +static struct inode *nfs_layout_find_inode_by_fh(struct nfs_client *clp, + const struct nfs_fh *fh) { struct nfs_server *server; struct nfs_inode *nfsi; - struct inode *ino; + struct inode *inode; struct pnfs_layout_hdr *lo; restart: @@ -134,37 +166,38 @@ restart: continue; if (nfsi->layout != lo) continue; - ino = igrab(lo->plh_inode); - if (!ino) - break; - spin_lock(&ino->i_lock); - /* Is this layout in the process of being freed? */ - if (nfsi->layout != lo) { - spin_unlock(&ino->i_lock); - iput(ino); + inode = igrab(lo->plh_inode); + if (!inode) + continue; + if (!nfs_sb_active(inode->i_sb)) { + rcu_read_lock(); + spin_unlock(&clp->cl_lock); + iput(inode); + spin_lock(&clp->cl_lock); goto restart; } - pnfs_get_layout_hdr(lo); - spin_unlock(&ino->i_lock); - return lo; + return inode; } } return NULL; } -static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp, - struct nfs_fh *fh) +static struct inode *nfs_layout_find_inode(struct nfs_client *clp, + const struct nfs_fh *fh, + const nfs4_stateid *stateid) { - struct pnfs_layout_hdr *lo; + struct inode *inode; spin_lock(&clp->cl_lock); rcu_read_lock(); - lo = get_layout_by_fh_locked(clp, fh); + inode = nfs_layout_find_inode_by_stateid(clp, stateid); + if (!inode) + inode = nfs_layout_find_inode_by_fh(clp, fh); rcu_read_unlock(); spin_unlock(&clp->cl_lock); - return lo; + return inode; } /* @@ -213,18 +246,20 @@ static u32 initiate_file_draining(struct nfs_client *clp, u32 rv = NFS4ERR_NOMATCHING_LAYOUT; LIST_HEAD(free_me_list); - lo = get_layout_by_fh(clp, &args->cbl_fh); - if (!lo) { - trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, NULL, - &args->cbl_stateid, -rv); + ino = nfs_layout_find_inode(clp, &args->cbl_fh, &args->cbl_stateid); + if (!ino) goto out; - } - ino = lo->plh_inode; pnfs_layoutcommit_inode(ino, false); spin_lock(&ino->i_lock); + lo = NFS_I(ino)->layout; + if (!lo) { + spin_unlock(&ino->i_lock); + goto out; + } + pnfs_get_layout_hdr(lo); rv = pnfs_check_callback_stateid(lo, &args->cbl_stateid); if (rv != NFS_OK) goto unlock; @@ -258,10 +293,10 @@ unlock: /* Free all lsegs that are attached to commit buckets */ nfs_commit_inode(ino, 0); pnfs_put_layout_hdr(lo); +out: trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, ino, &args->cbl_stateid, -rv); - iput(ino); -out: + nfs_iput_and_deactive(ino); return rv; } -- cgit v1.1 From 68f744797edd27016055c562a605691f5d4ac933 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 12 Oct 2016 19:50:54 -0400 Subject: pNFS: Do not free layout segments that are marked for return We may want to process and transmit layout stat information for the layout segments that are being returned, so we should defer freeing them until after the layoutreturn has completed. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 15 +++-------- fs/nfs/pnfs.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++------- fs/nfs/pnfs.h | 6 ++++- 3 files changed, 73 insertions(+), 22 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 917a6db..561b21e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8572,21 +8572,12 @@ static void nfs4_layoutreturn_release(void *calldata) { struct nfs4_layoutreturn *lrp = calldata; struct pnfs_layout_hdr *lo = lrp->args.layout; - LIST_HEAD(freeme); dprintk("--> %s\n", __func__); - spin_lock(&lo->plh_inode->i_lock); - if (lrp->res.lrs_present) { - pnfs_mark_matching_lsegs_invalid(lo, &freeme, - &lrp->args.range, - be32_to_cpu(lrp->args.stateid.seqid)); - pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); - } else - pnfs_mark_layout_stateid_invalid(lo, &freeme); - pnfs_clear_layoutreturn_waitbit(lo); - spin_unlock(&lo->plh_inode->i_lock); + pnfs_layoutreturn_free_lsegs(lo, &lrp->args.range, + be32_to_cpu(lrp->args.stateid.seqid), + lrp->res.lrs_present ? &lrp->res.stateid : NULL); nfs4_sequence_free_slot(&lrp->res.seq_res); - pnfs_free_lseg_list(&freeme); pnfs_put_layout_hdr(lrp->args.layout); nfs_iput_and_deactive(lrp->inode); kfree(calldata); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index d70cc46..555151b 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -54,6 +54,10 @@ static DEFINE_SPINLOCK(pnfs_spinlock); static LIST_HEAD(pnfs_modules_tbl); static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo); +static void pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo, + struct list_head *free_me, + const struct pnfs_layout_range *range, + u32 seq); /* Return the registered pnfs layout driver module matching given id */ static struct pnfs_layoutdriver_type * @@ -326,6 +330,7 @@ pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); pnfs_clear_layoutreturn_info(lo); + pnfs_free_returned_lsegs(lo, lseg_list, &range, 0); return pnfs_mark_matching_lsegs_invalid(lo, lseg_list, &range, 0); } @@ -405,9 +410,10 @@ pnfs_init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg, static void pnfs_free_lseg(struct pnfs_layout_segment *lseg) { - struct inode *ino = lseg->pls_layout->plh_inode; - - NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); + if (lseg != NULL) { + struct inode *inode = lseg->pls_layout->plh_inode; + NFS_SERVER(inode)->pnfs_curr_ld->free_lseg(lseg); + } } static void @@ -430,6 +436,18 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); } +static bool +pnfs_cache_lseg_for_layoutreturn(struct pnfs_layout_hdr *lo, + struct pnfs_layout_segment *lseg) +{ + if (test_and_clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags) && + pnfs_layout_is_valid(lo)) { + list_move_tail(&lseg->pls_list, &lo->plh_return_segs); + return true; + } + return false; +} + void pnfs_put_lseg(struct pnfs_layout_segment *lseg) { @@ -453,6 +471,8 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) } pnfs_get_layout_hdr(lo); pnfs_layout_remove_lseg(lo, lseg); + if (pnfs_cache_lseg_for_layoutreturn(lo, lseg)) + lseg = NULL; spin_unlock(&inode->i_lock); pnfs_free_lseg(lseg); pnfs_put_layout_hdr(lo); @@ -493,9 +513,11 @@ pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg) struct pnfs_layout_hdr *lo = lseg->pls_layout; if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) return; - pnfs_get_layout_hdr(lo); pnfs_layout_remove_lseg(lo, lseg); - pnfs_free_lseg_async(lseg); + if (!pnfs_cache_lseg_for_layoutreturn(lo, lseg)) { + pnfs_get_layout_hdr(lo); + pnfs_free_lseg_async(lseg); + } } } EXPORT_SYMBOL_GPL(pnfs_put_lseg_locked); @@ -619,6 +641,20 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, return remaining; } +static void +pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo, + struct list_head *free_me, + const struct pnfs_layout_range *range, + u32 seq) +{ + struct pnfs_layout_segment *lseg, *next; + + list_for_each_entry_safe(lseg, next, &lo->plh_return_segs, pls_list) { + if (pnfs_match_lseg_recall(lseg, range, seq)) + list_move_tail(&lseg->pls_list, free_me); + } +} + /* note free_me must contain lsegs from a single layout_hdr */ void pnfs_free_lseg_list(struct list_head *free_me) @@ -915,7 +951,7 @@ static void pnfs_clear_layoutcommit(struct inode *inode, } } -void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) +static void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) { clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); clear_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags); @@ -924,6 +960,27 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); } +void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo, + const struct pnfs_layout_range *range, + u32 seq, + const nfs4_stateid *stateid) +{ + struct inode *inode = lo->plh_inode; + LIST_HEAD(freeme); + + spin_lock(&inode->i_lock); + if (stateid) { + pnfs_mark_matching_lsegs_invalid(lo, &freeme, range, seq); + pnfs_free_returned_lsegs(lo, &freeme, range, seq); + pnfs_set_layout_stateid(lo, stateid, true); + } else + pnfs_mark_layout_stateid_invalid(lo, &freeme); + pnfs_clear_layoutreturn_waitbit(lo); + spin_unlock(&inode->i_lock); + pnfs_free_lseg_list(&freeme); + +} + static bool pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid, @@ -1349,6 +1406,7 @@ alloc_init_layout_hdr(struct inode *ino, atomic_set(&lo->plh_refcount, 1); INIT_LIST_HEAD(&lo->plh_layouts); INIT_LIST_HEAD(&lo->plh_segs); + INIT_LIST_HEAD(&lo->plh_return_segs); INIT_LIST_HEAD(&lo->plh_bulk_destroy); lo->plh_inode = ino; lo->plh_lc_cred = get_rpccred(ctx->cred); @@ -1920,7 +1978,6 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, .offset = 0, .length = NFS4_MAX_UINT64, }; - LIST_HEAD(free_me); bool return_now = false; spin_lock(&inode->i_lock); @@ -1932,7 +1989,7 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, * segments at hand when sending layoutreturn. See pnfs_put_lseg() * for how it works. */ - if (!pnfs_mark_matching_lsegs_return(lo, &free_me, &range, 0)) { + if (!pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, &range, 0)) { nfs4_stateid stateid; enum pnfs_iomode iomode; @@ -1944,7 +2001,6 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, spin_unlock(&inode->i_lock); nfs_commit_inode(inode, 0); } - pnfs_free_lseg_list(&free_me); } EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 337dad3..a382710 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -191,6 +191,7 @@ struct pnfs_layout_hdr { struct list_head plh_layouts; /* other client layouts */ struct list_head plh_bulk_destroy; struct list_head plh_segs; /* layout segments list */ + struct list_head plh_return_segs; /* invalid layout segments */ unsigned long plh_block_lgets; /* block LAYOUTGET if >0 */ unsigned long plh_retry_timestamp; unsigned long plh_flags; @@ -293,7 +294,10 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, enum pnfs_iomode iomode, bool strict_iomode, gfp_t gfp_flags); -void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo); +void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo, + const struct pnfs_layout_range *range, + u32 seq, + const nfs4_stateid *stateid); void pnfs_generic_layout_insert_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg, -- cgit v1.1 From 2a974425e57fb882c93709c6072bf66d04431635 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 20 Nov 2016 13:13:54 -0500 Subject: NFSv4: Ignore LAYOUTRETURN result if the layout doesn't match or is invalid Fix a potential race with CB_LAYOUTRECALL in which the server recalls the remaining layout segments while our LAYOUTRETURN is still in transit. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 3 +-- fs/nfs/pnfs.c | 8 +++++++- fs/nfs/pnfs.h | 2 +- 3 files changed, 9 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 561b21e..87972cf 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8574,8 +8574,7 @@ static void nfs4_layoutreturn_release(void *calldata) struct pnfs_layout_hdr *lo = lrp->args.layout; dprintk("--> %s\n", __func__); - pnfs_layoutreturn_free_lsegs(lo, &lrp->args.range, - be32_to_cpu(lrp->args.stateid.seqid), + pnfs_layoutreturn_free_lsegs(lo, &lrp->args.stateid, &lrp->args.range, lrp->res.lrs_present ? &lrp->res.stateid : NULL); nfs4_sequence_free_slot(&lrp->res.seq_res); pnfs_put_layout_hdr(lrp->args.layout); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 555151b..471018f 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -961,20 +961,26 @@ static void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) } void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo, + const nfs4_stateid *arg_stateid, const struct pnfs_layout_range *range, - u32 seq, const nfs4_stateid *stateid) { struct inode *inode = lo->plh_inode; LIST_HEAD(freeme); spin_lock(&inode->i_lock); + if (!pnfs_layout_is_valid(lo) || !arg_stateid || + !nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid)) + goto out_unlock; if (stateid) { + u32 seq = be32_to_cpu(arg_stateid->seqid); + pnfs_mark_matching_lsegs_invalid(lo, &freeme, range, seq); pnfs_free_returned_lsegs(lo, &freeme, range, seq); pnfs_set_layout_stateid(lo, stateid, true); } else pnfs_mark_layout_stateid_invalid(lo, &freeme); +out_unlock: pnfs_clear_layoutreturn_waitbit(lo); spin_unlock(&inode->i_lock); pnfs_free_lseg_list(&freeme); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index a382710..bc9a3aa 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -295,8 +295,8 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, bool strict_iomode, gfp_t gfp_flags); void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo, + const nfs4_stateid *arg_stateid, const struct pnfs_layout_range *range, - u32 seq, const nfs4_stateid *stateid); void pnfs_generic_layout_insert_lseg(struct pnfs_layout_hdr *lo, -- cgit v1.1 From e685d237e60886afd75c918c80d20a3dc2ad27c9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 18 Nov 2016 15:18:52 -0500 Subject: pNFS: Remove spurious wake up in pnfs_layout_remove_lseg() There is no change to the value of NFS_LAYOUT_RETURN, so we should not be waking up the RPC call. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 471018f..a64d1e4 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -420,8 +420,6 @@ static void pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) { - struct inode *inode = lo->plh_inode; - WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); list_del_init(&lseg->pls_list); /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ @@ -433,7 +431,6 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); } - rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); } static bool -- cgit v1.1 From 0cdc329ec9b150c165bcb603c4314b4031b24785 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 21 Nov 2016 11:05:33 -0500 Subject: pNFS: Skip checking for return-on-close if the layout is invalid Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index a64d1e4..330f3a0 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1190,7 +1190,8 @@ bool pnfs_roc(struct inode *ino) spin_lock(&ino->i_lock); lo = nfsi->layout; - if (!lo || test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) + if (!lo || !pnfs_layout_is_valid(lo) || + test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) goto out_noroc; /* no roc if we hold a delegation */ -- cgit v1.1 From 94e5c571fccb8eb551d3d5f5d163bf0c253a6ed8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 15 Sep 2016 18:49:52 -0400 Subject: pNFS: Get rid of unnecessary layout parameter in encode_layoutreturn callback The parameter is already present in the "args" structure. Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 4 ++-- fs/nfs/nfs4xdr.c | 8 ++++---- fs/nfs/objlayout/objlayout.c | 4 ++-- fs/nfs/objlayout/objlayout.h | 1 - fs/nfs/pnfs.h | 3 +-- 5 files changed, 9 insertions(+), 11 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index a5c3888..90462a2 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -2012,10 +2012,10 @@ ff_layout_alloc_deviceid_node(struct nfs_server *server, } static void -ff_layout_encode_layoutreturn(struct pnfs_layout_hdr *lo, - struct xdr_stream *xdr, +ff_layout_encode_layoutreturn(struct xdr_stream *xdr, const struct nfs4_layoutreturn_args *args) { + struct pnfs_layout_hdr *lo = args->layout; struct nfs4_flexfile_layout *flo = FF_LAYOUT_FROM_HDR(lo); __be32 *start; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index b549315..86f72ae 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2013,6 +2013,7 @@ encode_layoutreturn(struct xdr_stream *xdr, const struct nfs4_layoutreturn_args *args, struct compound_hdr *hdr) { + const struct pnfs_layoutdriver_type *lr_ops = NFS_SERVER(args->inode)->pnfs_curr_ld; __be32 *p; encode_op_hdr(xdr, OP_LAYOUTRETURN, decode_layoutreturn_maxsz, hdr); @@ -2027,10 +2028,9 @@ encode_layoutreturn(struct xdr_stream *xdr, spin_lock(&args->inode->i_lock); encode_nfs4_stateid(xdr, &args->stateid); spin_unlock(&args->inode->i_lock); - if (NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn) { - NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn( - NFS_I(args->inode)->layout, xdr, args); - } else + if (lr_ops->encode_layoutreturn) + lr_ops->encode_layoutreturn(xdr, args); + else encode_uint32(xdr, 0); } diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index 919efd4..2a4cdce9 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -504,10 +504,10 @@ encode_accumulated_error(struct objlayout *objlay, __be32 *p) } void -objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay, - struct xdr_stream *xdr, +objlayout_encode_layoutreturn(struct xdr_stream *xdr, const struct nfs4_layoutreturn_args *args) { + struct pnfs_layout_hdr *pnfslay = args->layout; struct objlayout *objlay = OBJLAYOUT(pnfslay); struct objlayout_io_res *oir, *tmp; __be32 *start; diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index 2641dba..fc94a58 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h @@ -175,7 +175,6 @@ extern void objlayout_encode_layoutcommit( const struct nfs4_layoutcommit_args *); extern void objlayout_encode_layoutreturn( - struct pnfs_layout_hdr *, struct xdr_stream *, const struct nfs4_layoutreturn_args *); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index bc9a3aa..75ff939 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -172,8 +172,7 @@ struct pnfs_layoutdriver_type { (struct nfs_server *server, struct pnfs_device *pdev, gfp_t gfp_flags); - void (*encode_layoutreturn) (struct pnfs_layout_hdr *layoutid, - struct xdr_stream *xdr, + void (*encode_layoutreturn) (struct xdr_stream *xdr, const struct nfs4_layoutreturn_args *args); void (*cleanup_layoutcommit) (struct nfs4_layoutcommit_data *data); -- cgit v1.1 From 69820d22c559c46f94e9ae08677581365c3748d5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 15 Nov 2016 18:29:59 -0500 Subject: pNFS: Don't mark layout segments invalid on layoutreturn in pnfs_roc The layoutreturn call will take care of invalidating the layout segments once the call is successful. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 330f3a0..d7b5ad4 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1205,22 +1205,28 @@ bool pnfs_roc(struct inode *ino) goto out_noroc; } - /* always send layoutreturn if being marked so */ - if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) - layoutreturn = pnfs_prepare_layoutreturn(lo, - &stateid, NULL); - list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) + list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) { /* If we are sending layoutreturn, invalidate all valid lsegs */ - if (layoutreturn || test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { + if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { mark_lseg_invalid(lseg, &tmp_list); found = true; } + } + + /* always send layoutreturn if being marked so */ + if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) { + layoutreturn = pnfs_prepare_layoutreturn(lo, + &stateid, NULL); + if (layoutreturn) + goto out_noroc; + } + /* ROC in two conditions: * 1. there are ROC lsegs * 2. we don't send layoutreturn */ - if (found && !layoutreturn) { + if (found) { /* lo ref dropped in pnfs_roc_release() */ pnfs_get_layout_hdr(lo); roc = true; -- cgit v1.1 From d8434d4c54789bd8ac30b4a69037115b3594d2b3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 16 Nov 2016 13:54:00 -0500 Subject: NFSv4: Fix missing operation accounting in NFS4_dec_delegreturn_sz We need to account for the reply to the PUTFH operation in the DELEGRETURN compound. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 86f72ae..0a82b3f 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -710,6 +710,7 @@ static int nfs4_stat_to_errno(int); encode_getattr_maxsz) #define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ + decode_putfh_maxsz + \ decode_delegreturn_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_getacl_sz (compound_encode_hdr_maxsz + \ -- cgit v1.1 From cf80516579ceb87b91205e68fb31d5affd5aea8d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 15 Nov 2016 14:56:07 -0500 Subject: NFSv4: Add encode/decode of the layoutreturn op in CLOSE Add XDR encoding for the layoutreturn op, and storage for the layoutreturn arguments to the CLOSE compound. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 49 ++++++++++++++++++++++++++++++++++++++++--------- fs/nfs/nfs4xdr.c | 26 ++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 9 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 87972cf..9b9b0ea 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3035,10 +3035,14 @@ struct nfs4_closedata { struct nfs4_state *state; struct nfs_closeargs arg; struct nfs_closeres res; + struct { + struct nfs4_layoutreturn_args arg; + struct nfs4_layoutreturn_res res; + u32 roc_barrier; + bool roc; + } lr; struct nfs_fattr fattr; unsigned long timestamp; - bool roc; - u32 roc_barrier; }; static void nfs4_free_closedata(void *data) @@ -3047,7 +3051,7 @@ static void nfs4_free_closedata(void *data) struct nfs4_state_owner *sp = calldata->state->owner; struct super_block *sb = calldata->state->inode->i_sb; - if (calldata->roc) + if (calldata->lr.roc) pnfs_roc_release(calldata->state->inode); nfs4_put_open_state(calldata->state); nfs_free_seqid(calldata->arg.seqid); @@ -3067,15 +3071,41 @@ static void nfs4_close_done(struct rpc_task *task, void *data) if (!nfs4_sequence_done(task, &calldata->res.seq_res)) return; trace_nfs4_close(state, &calldata->arg, &calldata->res, task->tk_status); + + /* Handle Layoutreturn errors */ + if (calldata->arg.lr_args && task->tk_status != 0) { + switch (calldata->res.lr_ret) { + default: + calldata->res.lr_ret = -NFS4ERR_NOMATCHING_LAYOUT; + break; + case 0: + calldata->arg.lr_args = NULL; + calldata->res.lr_res = NULL; + break; + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_EXPIRED: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_OLD_STATEID: + case -NFS4ERR_UNKNOWN_LAYOUTTYPE: + case -NFS4ERR_WRONG_CRED: + calldata->arg.lr_args = NULL; + calldata->res.lr_res = NULL; + calldata->res.lr_ret = 0; + rpc_restart_call_prepare(task); + return; + } + } + /* hmm. we are done with the inode, and in the process of freeing * the state_owner. we keep this around to process errors */ switch (task->tk_status) { case 0: res_stateid = &calldata->res.stateid; - if (calldata->roc) + if (calldata->lr.roc) pnfs_roc_set_barrier(state->inode, - calldata->roc_barrier); + calldata->lr.roc_barrier); renew_lease(server, calldata->timestamp); break; case -NFS4ERR_ADMIN_REVOKED: @@ -3151,7 +3181,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) goto out_no_action; } - if (nfs4_wait_on_layoutreturn(inode, task)) { + if (!calldata->arg.lr_args && nfs4_wait_on_layoutreturn(inode, task)) { nfs_release_seqid(calldata->arg.seqid); goto out_wait; } @@ -3165,8 +3195,8 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) else calldata->arg.bitmask = NULL; } - if (calldata->roc) - pnfs_roc_get_barrier(inode, &calldata->roc_barrier); + if (calldata->lr.roc) + pnfs_roc_get_barrier(inode, &calldata->lr.roc_barrier); calldata->arg.share_access = nfs4_map_atomic_open_share(NFS_SERVER(inode), @@ -3250,7 +3280,8 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) calldata->res.fattr = &calldata->fattr; calldata->res.seqid = calldata->arg.seqid; calldata->res.server = server; - calldata->roc = nfs4_roc(state->inode); + calldata->res.lr_ret = -NFS4ERR_NOMATCHING_LAYOUT; + calldata->lr.roc = nfs4_roc(state->inode); nfs_sb_active(calldata->inode->i_sb); msg.rpc_argp = &calldata->arg; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 0a82b3f..73d2a68 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -415,6 +415,8 @@ static int nfs4_stat_to_errno(int); #else /* CONFIG_NFS_V4_1 */ #define encode_sequence_maxsz 0 #define decode_sequence_maxsz 0 +#define encode_layoutreturn_maxsz 0 +#define decode_layoutreturn_maxsz 0 #endif /* CONFIG_NFS_V4_1 */ #define NFS4_enc_compound_sz (1024) /* XXX: large enough? */ @@ -508,11 +510,13 @@ static int nfs4_stat_to_errno(int); #define NFS4_enc_close_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ + encode_layoutreturn_maxsz + \ encode_close_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_close_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ + decode_layoutreturn_maxsz + \ decode_close_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_setattr_sz (compound_encode_hdr_maxsz + \ @@ -2061,6 +2065,13 @@ static void encode_free_stateid(struct xdr_stream *xdr, encode_op_hdr(xdr, OP_FREE_STATEID, decode_free_stateid_maxsz, hdr); encode_nfs4_stateid(xdr, &args->stateid); } +#else +static inline void +encode_layoutreturn(struct xdr_stream *xdr, + const struct nfs4_layoutreturn_args *args, + struct compound_hdr *hdr) +{ +} #endif /* CONFIG_NFS_V4_1 */ /* @@ -2248,6 +2259,8 @@ static void nfs4_xdr_enc_close(struct rpc_rqst *req, struct xdr_stream *xdr, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); + if (args->lr_args) + encode_layoutreturn(xdr, args->lr_args, &hdr); encode_close(xdr, args, &hdr); if (args->bitmask != NULL) encode_getfattr(xdr, args->bitmask, &hdr); @@ -6088,6 +6101,13 @@ static int decode_free_stateid(struct xdr_stream *xdr, res->status = decode_op_hdr(xdr, OP_FREE_STATEID); return res->status; } +#else +static inline +int decode_layoutreturn(struct xdr_stream *xdr, + struct nfs4_layoutreturn_res *res) +{ + return 0; +} #endif /* CONFIG_NFS_V4_1 */ /* @@ -6440,6 +6460,12 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_putfh(xdr); if (status) goto out; + if (res->lr_res) { + status = decode_layoutreturn(xdr, res->lr_res); + res->lr_ret = status; + if (status) + goto out; + } status = decode_close(xdr, res); if (status != 0) goto out; -- cgit v1.1 From 586f1c39daf5c840c742b9be1ec236429f26dc13 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 15 Nov 2016 15:03:33 -0500 Subject: NFSv4: Add encode/decode of the layoutreturn op in DELEGRETURN Add XDR encoding for the layoutreturn op, and storage for the layoutreturn arguments to the DELEGRETURN compound. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 50 +++++++++++++++++++++++++++++++++++++++++--------- fs/nfs/nfs4xdr.c | 10 ++++++++++ 2 files changed, 51 insertions(+), 9 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9b9b0ea..765af66 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5608,11 +5608,15 @@ struct nfs4_delegreturndata { struct nfs_fh fh; nfs4_stateid stateid; unsigned long timestamp; + struct { + struct nfs4_layoutreturn_args arg; + struct nfs4_layoutreturn_res res; + u32 roc_barrier; + bool roc; + } lr; struct nfs_fattr fattr; int rpc_status; struct inode *inode; - bool roc; - u32 roc_barrier; }; static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) @@ -5623,6 +5627,32 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) return; trace_nfs4_delegreturn_exit(&data->args, &data->res, task->tk_status); + + /* Handle Layoutreturn errors */ + if (data->args.lr_args && task->tk_status != 0) { + switch(data->res.lr_ret) { + default: + data->res.lr_ret = -NFS4ERR_NOMATCHING_LAYOUT; + break; + case 0: + data->args.lr_args = NULL; + data->res.lr_res = NULL; + break; + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_EXPIRED: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_OLD_STATEID: + case -NFS4ERR_UNKNOWN_LAYOUTTYPE: + case -NFS4ERR_WRONG_CRED: + data->args.lr_args = NULL; + data->res.lr_res = NULL; + data->res.lr_ret = 0; + rpc_restart_call_prepare(task); + return; + } + } + switch (task->tk_status) { case 0: renew_lease(data->res.server, data->timestamp); @@ -5646,8 +5676,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) } } data->rpc_status = task->tk_status; - if (data->roc && data->rpc_status == 0) - pnfs_roc_set_barrier(data->inode, data->roc_barrier); + if (data->lr.roc && data->rpc_status == 0) + pnfs_roc_set_barrier(data->inode, data->lr.roc_barrier); } static void nfs4_delegreturn_release(void *calldata) @@ -5656,7 +5686,7 @@ static void nfs4_delegreturn_release(void *calldata) struct inode *inode = data->inode; if (inode) { - if (data->roc) + if (data->lr.roc) pnfs_roc_release(inode); nfs_iput_and_deactive(inode); } @@ -5669,11 +5699,12 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data) d_data = (struct nfs4_delegreturndata *)data; - if (nfs4_wait_on_layoutreturn(d_data->inode, task)) + if (!d_data->args.lr_args && + nfs4_wait_on_layoutreturn(d_data->inode, task)) return; - if (d_data->roc) - pnfs_roc_get_barrier(d_data->inode, &d_data->roc_barrier); + if (d_data->lr.roc) + pnfs_roc_get_barrier(d_data->inode, &d_data->lr.roc_barrier); nfs4_setup_sequence(d_data->res.server, &d_data->args.seq_args, @@ -5720,12 +5751,13 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co nfs4_stateid_copy(&data->stateid, stateid); data->res.fattr = &data->fattr; data->res.server = server; + data->res.lr_ret = -NFS4ERR_NOMATCHING_LAYOUT; nfs_fattr_init(data->res.fattr); data->timestamp = jiffies; data->rpc_status = 0; data->inode = nfs_igrab_and_active(inode); if (data->inode) - data->roc = nfs4_roc(inode); + data->lr.roc = nfs4_roc(inode); task_setup_data.callback_data = data; msg.rpc_argp = &data->args; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 73d2a68..84fdcf2 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -710,11 +710,13 @@ static int nfs4_stat_to_errno(int); #define NFS4_enc_delegreturn_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ + encode_layoutreturn_maxsz + \ encode_delegreturn_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ + decode_layoutreturn_maxsz + \ decode_delegreturn_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_getacl_sz (compound_encode_hdr_maxsz + \ @@ -2683,6 +2685,8 @@ static void nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fhandle, &hdr); + if (args->lr_args) + encode_layoutreturn(xdr, args->lr_args, &hdr); encode_getfattr(xdr, args->bitmask, &hdr); encode_delegreturn(xdr, args->stateid, &hdr); encode_nops(&hdr); @@ -6942,6 +6946,12 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, status = decode_putfh(xdr); if (status != 0) goto out; + if (res->lr_res) { + status = decode_layoutreturn(xdr, res->lr_res); + res->lr_ret = status; + if (status) + goto out; + } status = decode_getfattr(xdr, res->fattr, res->server); if (status != 0) goto out; -- cgit v1.1 From 828ed9ec1b565445b8c060c8a97be4f396ef614b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 15 Nov 2016 21:47:27 -0500 Subject: pNFS: Clean up - add a helper to initialise struct layoutreturn_args Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index d7b5ad4..a93afdd 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1014,6 +1014,23 @@ pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo, return true; } +static void +pnfs_init_layoutreturn_args(struct nfs4_layoutreturn_args *args, + struct pnfs_layout_hdr *lo, + const nfs4_stateid *stateid, + enum pnfs_iomode iomode) +{ + struct inode *inode = lo->plh_inode; + + args->layout_type = NFS_SERVER(inode)->pnfs_curr_ld->id; + args->inode = inode; + args->range.iomode = iomode; + args->range.offset = 0; + args->range.length = NFS4_MAX_UINT64; + args->layout = lo; + nfs4_stateid_copy(&args->stateid, stateid); +} + static int pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid, enum pnfs_iomode iomode, bool sync) @@ -1032,13 +1049,7 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid, goto out; } - nfs4_stateid_copy(&lrp->args.stateid, stateid); - lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; - lrp->args.inode = ino; - lrp->args.range.iomode = iomode; - lrp->args.range.offset = 0; - lrp->args.range.length = NFS4_MAX_UINT64; - lrp->args.layout = lo; + pnfs_init_layoutreturn_args(&lrp->args, lo, stateid, iomode); lrp->clp = NFS_SERVER(ino)->nfs_client; lrp->cred = lo->plh_lc_cred; -- cgit v1.1 From 1c5bd76d17cca6836e9d9913e4a0356cd8a36598 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 16 Nov 2016 01:11:25 -0500 Subject: pNFS: Enable layoutreturn operation for return-on-close Amend the pnfs return on close helper functions to enable sending the layoutreturn op in CLOSE/DELEGRETURN. This closes a potential race between CLOSE/DELEGRETURN and parallel OPEN calls to the same file, and allows the client and the server to agree on whether or not there is an outstanding layout. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 45 ++++++++---------- fs/nfs/pnfs.c | 139 ++++++++++++++++++++++++------------------------------ fs/nfs/pnfs.h | 30 ++++++------ 3 files changed, 96 insertions(+), 118 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 765af66..221d97d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3052,7 +3052,8 @@ static void nfs4_free_closedata(void *data) struct super_block *sb = calldata->state->inode->i_sb; if (calldata->lr.roc) - pnfs_roc_release(calldata->state->inode); + pnfs_roc_release(&calldata->lr.arg, &calldata->lr.res, + calldata->res.lr_ret); nfs4_put_open_state(calldata->state); nfs_free_seqid(calldata->arg.seqid); nfs4_put_state_owner(sp); @@ -3103,9 +3104,6 @@ static void nfs4_close_done(struct rpc_task *task, void *data) switch (task->tk_status) { case 0: res_stateid = &calldata->res.stateid; - if (calldata->lr.roc) - pnfs_roc_set_barrier(state->inode, - calldata->lr.roc_barrier); renew_lease(server, calldata->timestamp); break; case -NFS4ERR_ADMIN_REVOKED: @@ -3181,7 +3179,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) goto out_no_action; } - if (!calldata->arg.lr_args && nfs4_wait_on_layoutreturn(inode, task)) { + if (!calldata->lr.roc && nfs4_wait_on_layoutreturn(inode, task)) { nfs_release_seqid(calldata->arg.seqid); goto out_wait; } @@ -3195,8 +3193,6 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) else calldata->arg.bitmask = NULL; } - if (calldata->lr.roc) - pnfs_roc_get_barrier(inode, &calldata->lr.roc_barrier); calldata->arg.share_access = nfs4_map_atomic_open_share(NFS_SERVER(inode), @@ -3223,13 +3219,6 @@ static const struct rpc_call_ops nfs4_close_ops = { .rpc_release = nfs4_free_closedata, }; -static bool nfs4_roc(struct inode *inode) -{ - if (!nfs_have_layout(inode)) - return false; - return pnfs_roc(inode); -} - /* * It is possible for data to be read/written from a mem-mapped file * after the sys_close call (which hits the vfs layer as a flush). @@ -3281,7 +3270,12 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) calldata->res.seqid = calldata->arg.seqid; calldata->res.server = server; calldata->res.lr_ret = -NFS4ERR_NOMATCHING_LAYOUT; - calldata->lr.roc = nfs4_roc(state->inode); + calldata->lr.roc = pnfs_roc(state->inode, + &calldata->lr.arg, &calldata->lr.res, msg.rpc_cred); + if (calldata->lr.roc) { + calldata->arg.lr_args = &calldata->lr.arg; + calldata->res.lr_res = &calldata->lr.res; + } nfs_sb_active(calldata->inode->i_sb); msg.rpc_argp = &calldata->arg; @@ -5676,8 +5670,6 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) } } data->rpc_status = task->tk_status; - if (data->lr.roc && data->rpc_status == 0) - pnfs_roc_set_barrier(data->inode, data->lr.roc_barrier); } static void nfs4_delegreturn_release(void *calldata) @@ -5687,7 +5679,8 @@ static void nfs4_delegreturn_release(void *calldata) if (inode) { if (data->lr.roc) - pnfs_roc_release(inode); + pnfs_roc_release(&data->lr.arg, &data->lr.res, + data->res.lr_ret); nfs_iput_and_deactive(inode); } kfree(calldata); @@ -5699,13 +5692,9 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data) d_data = (struct nfs4_delegreturndata *)data; - if (!d_data->args.lr_args && - nfs4_wait_on_layoutreturn(d_data->inode, task)) + if (!d_data->lr.roc && nfs4_wait_on_layoutreturn(d_data->inode, task)) return; - if (d_data->lr.roc) - pnfs_roc_get_barrier(d_data->inode, &d_data->lr.roc_barrier); - nfs4_setup_sequence(d_data->res.server, &d_data->args.seq_args, &d_data->res.seq_res, @@ -5756,8 +5745,14 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co data->timestamp = jiffies; data->rpc_status = 0; data->inode = nfs_igrab_and_active(inode); - if (data->inode) - data->lr.roc = nfs4_roc(inode); + if (data->inode) { + data->lr.roc = pnfs_roc(inode, &data->lr.arg, &data->lr.res, + cred); + if (data->lr.roc) { + data->args.lr_args = &data->lr.arg; + data->res.lr_res = &data->lr.res; + } + } task_setup_data.callback_data = data; msg.rpc_argp = &data->args; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index a93afdd..f61cb81 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -984,6 +984,20 @@ out_unlock: } +static void +pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode, + u32 seq) +{ + if (lo->plh_return_iomode != 0 && lo->plh_return_iomode != iomode) + iomode = IOMODE_ANY; + lo->plh_return_iomode = iomode; + set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); + if (seq != 0) { + WARN_ON_ONCE(lo->plh_return_seq != 0 && lo->plh_return_seq != seq); + lo->plh_return_seq = seq; + } +} + static bool pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid, @@ -1188,17 +1202,22 @@ pnfs_commit_and_return_layout(struct inode *inode) return ret; } -bool pnfs_roc(struct inode *ino) +bool pnfs_roc(struct inode *ino, + struct nfs4_layoutreturn_args *args, + struct nfs4_layoutreturn_res *res, + const struct rpc_cred *cred) { struct nfs_inode *nfsi = NFS_I(ino); struct nfs_open_context *ctx; struct nfs4_state *state; struct pnfs_layout_hdr *lo; - struct pnfs_layout_segment *lseg, *tmp; + struct pnfs_layout_segment *lseg, *next; nfs4_stateid stateid; - LIST_HEAD(tmp_list); - bool found = false, layoutreturn = false, roc = false; + enum pnfs_iomode iomode = 0; + bool layoutreturn = false, roc = false; + if (!nfs_have_layout(ino)) + return false; spin_lock(&ino->i_lock); lo = nfsi->layout; if (!lo || !pnfs_layout_is_valid(lo) || @@ -1217,83 +1236,63 @@ bool pnfs_roc(struct inode *ino) } - list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) { + list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) { /* If we are sending layoutreturn, invalidate all valid lsegs */ - if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { - mark_lseg_invalid(lseg, &tmp_list); - found = true; - } + if (!test_and_clear_bit(NFS_LSEG_ROC, &lseg->pls_flags)) + continue; + /* + * Note: mark lseg for return so pnfs_layout_remove_lseg + * doesn't invalidate the layout for us. + */ + set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); + if (!mark_lseg_invalid(lseg, &lo->plh_return_segs)) + continue; + pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0); } - /* always send layoutreturn if being marked so */ - if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) { - layoutreturn = pnfs_prepare_layoutreturn(lo, - &stateid, NULL); - if (layoutreturn) - goto out_noroc; - } + if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) + goto out_noroc; /* ROC in two conditions: * 1. there are ROC lsegs * 2. we don't send layoutreturn */ - if (found) { - /* lo ref dropped in pnfs_roc_release() */ - pnfs_get_layout_hdr(lo); - roc = true; - } + /* lo ref dropped in pnfs_roc_release() */ + layoutreturn = pnfs_prepare_layoutreturn(lo, &stateid, &iomode); + /* If the creds don't match, we can't compound the layoutreturn */ + if (!layoutreturn || cred != lo->plh_lc_cred) + goto out_noroc; + + roc = layoutreturn; + pnfs_init_layoutreturn_args(args, lo, &stateid, iomode); + res->lrs_present = 0; + layoutreturn = false; out_noroc: spin_unlock(&ino->i_lock); - pnfs_free_lseg_list(&tmp_list); pnfs_layoutcommit_inode(ino, true); if (layoutreturn) - pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true); + pnfs_send_layoutreturn(lo, &stateid, iomode, true); return roc; } -void pnfs_roc_release(struct inode *ino) +void pnfs_roc_release(struct nfs4_layoutreturn_args *args, + struct nfs4_layoutreturn_res *res, + int ret) { - struct pnfs_layout_hdr *lo; + struct pnfs_layout_hdr *lo = args->layout; + const nfs4_stateid *arg_stateid = NULL; + const nfs4_stateid *res_stateid = NULL; - spin_lock(&ino->i_lock); - lo = NFS_I(ino)->layout; - pnfs_clear_layoutreturn_waitbit(lo); - if (atomic_dec_and_test(&lo->plh_refcount)) { - pnfs_detach_layout_hdr(lo); - spin_unlock(&ino->i_lock); - pnfs_free_layout_hdr(lo); - } else - spin_unlock(&ino->i_lock); -} - -void pnfs_roc_set_barrier(struct inode *ino, u32 barrier) -{ - struct pnfs_layout_hdr *lo; - - spin_lock(&ino->i_lock); - lo = NFS_I(ino)->layout; - if (pnfs_seqid_is_newer(barrier, lo->plh_barrier)) - lo->plh_barrier = barrier; - spin_unlock(&ino->i_lock); - trace_nfs4_layoutreturn_on_close(ino, 0); -} - -void pnfs_roc_get_barrier(struct inode *ino, u32 *barrier) -{ - struct nfs_inode *nfsi = NFS_I(ino); - struct pnfs_layout_hdr *lo; - u32 current_seqid; - - spin_lock(&ino->i_lock); - lo = nfsi->layout; - current_seqid = be32_to_cpu(lo->plh_stateid.seqid); - - /* Since close does not return a layout stateid for use as - * a barrier, we choose the worst-case barrier. - */ - *barrier = current_seqid + atomic_read(&lo->plh_outstanding); - spin_unlock(&ino->i_lock); + if (ret == 0) { + arg_stateid = &args->stateid; + if (res->lrs_present) + res_stateid = &res->stateid; + } + pnfs_layoutreturn_free_lsegs(lo, arg_stateid, &args->range, + res_stateid); + pnfs_put_layout_hdr(lo); + trace_nfs4_layoutreturn_on_close(args->inode, 0); } bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task) @@ -1931,20 +1930,6 @@ out_forget: return ERR_PTR(-EAGAIN); } -static void -pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode, - u32 seq) -{ - if (lo->plh_return_iomode != 0 && lo->plh_return_iomode != iomode) - iomode = IOMODE_ANY; - lo->plh_return_iomode = iomode; - set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); - if (seq != 0) { - WARN_ON_ONCE(lo->plh_return_seq != 0 && lo->plh_return_seq != seq); - lo->plh_return_seq = seq; - } -} - /** * pnfs_mark_matching_lsegs_return - Free or return matching layout segments * @lo: pointer to layout header diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 75ff939..f55c065 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -271,10 +271,13 @@ int pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, u32 seq); int pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, struct list_head *lseg_list); -bool pnfs_roc(struct inode *ino); -void pnfs_roc_release(struct inode *ino); -void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); -void pnfs_roc_get_barrier(struct inode *ino, u32 *barrier); +bool pnfs_roc(struct inode *ino, + struct nfs4_layoutreturn_args *args, + struct nfs4_layoutreturn_res *res, + const struct rpc_cred *cred); +void pnfs_roc_release(struct nfs4_layoutreturn_args *args, + struct nfs4_layoutreturn_res *res, + int ret); bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task); void pnfs_set_layoutcommit(struct inode *, struct pnfs_layout_segment *, loff_t); void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); @@ -666,23 +669,18 @@ pnfs_layoutcommit_outstanding(struct inode *inode) static inline bool -pnfs_roc(struct inode *ino) +pnfs_roc(struct inode *ino, + struct nfs4_layoutreturn_args *args, + struct nfs4_layoutreturn_res *res, + const struct rpc_cred *cred) { return false; } static inline void -pnfs_roc_release(struct inode *ino) -{ -} - -static inline void -pnfs_roc_set_barrier(struct inode *ino, u32 barrier) -{ -} - -static inline void -pnfs_roc_get_barrier(struct inode *ino, u32 *barrier) +pnfs_roc_release(struct nfs4_layoutreturn_args *args, + struct nfs4_layoutreturn_res *res, + int ret) { } -- cgit v1.1 From 53e6fc86abbbd7338f16267846a58de7ee24e839 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 19 Nov 2016 08:48:47 -0500 Subject: pNFS: Prevent unnecessary layoutreturns after delegreturn If we cannot grab the inode or superblock, then we cannot pin the layout header, and so we cannot send a layoutreturn as part of an async delegreturn call. In this case, we currently end up sending an extra layoutreturn after the delegreturn. Since the layout was implicitly returned by the delegreturn, that just gets a BAD_STATEID. The fix is to simply complete the return-on-close immediately. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 221d97d..5593b08 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5744,14 +5744,16 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co nfs_fattr_init(data->res.fattr); data->timestamp = jiffies; data->rpc_status = 0; + data->lr.roc = pnfs_roc(inode, &data->lr.arg, &data->lr.res, cred); data->inode = nfs_igrab_and_active(inode); if (data->inode) { - data->lr.roc = pnfs_roc(inode, &data->lr.arg, &data->lr.res, - cred); if (data->lr.roc) { data->args.lr_args = &data->lr.arg; data->res.lr_res = &data->lr.res; } + } else if (data->lr.roc) { + pnfs_roc_release(&data->lr.arg, &data->lr.res, 0); + data->lr.roc = false; } task_setup_data.callback_data = data; -- cgit v1.1 From fe1cf9469d7bcb6af27e42eb555a41b0135bce4a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 30 Nov 2016 12:32:55 -0500 Subject: pNFS: Clear all layout segment state in pnfs_mark_layout_stateid_invalid When the layout state is invalidated, then so is the layout segment state, and hence we do need to clean up the state bits. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index f61cb81..76b1851 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -58,6 +58,8 @@ static void pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo, struct list_head *free_me, const struct pnfs_layout_range *range, u32 seq); +static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, + struct list_head *tmp_list); /* Return the registered pnfs layout driver module matching given id */ static struct pnfs_layoutdriver_type * @@ -311,6 +313,18 @@ pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo) clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); } +static void +pnfs_clear_lseg_state(struct pnfs_layout_segment *lseg, + struct list_head *free_me) +{ + clear_bit(NFS_LSEG_ROC, &lseg->pls_flags); + clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); + if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) + pnfs_lseg_dec_and_remove_zero(lseg, free_me); + if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) + pnfs_lseg_dec_and_remove_zero(lseg, free_me); +} + /* * Mark a pnfs_layout_hdr and all associated layout segments as invalid * @@ -327,11 +341,14 @@ pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, .offset = 0, .length = NFS4_MAX_UINT64, }; + struct pnfs_layout_segment *lseg, *next; set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); pnfs_clear_layoutreturn_info(lo); + list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) + pnfs_clear_lseg_state(lseg, lseg_list); pnfs_free_returned_lsegs(lo, lseg_list, &range, 0); - return pnfs_mark_matching_lsegs_invalid(lo, lseg_list, &range, 0); + return !list_empty(&lo->plh_segs); } static int -- cgit v1.1 From 24408f5282df58210cfecaa6851b2be52f5c661f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 30 Nov 2016 10:19:09 -0500 Subject: pNFS: Fix bugs in _pnfs_return_layout We need to honour the NFS_LAYOUT_RETURN_REQUESTED bit regardless of whether or not there are layout segments pending. Furthermore, we should ensure that we leave the plh_return_segs list empty. This patch fixes a memory leak of the layout segments on plh_return_segs. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 76b1851..a3a5186 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1145,7 +1145,7 @@ _pnfs_return_layout(struct inode *ino) struct nfs_inode *nfsi = NFS_I(ino); LIST_HEAD(tmp_list); nfs4_stateid stateid; - int status = 0, empty; + int status = 0; bool send; dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino); @@ -1159,7 +1159,14 @@ _pnfs_return_layout(struct inode *ino) } /* Reference matched in nfs4_layoutreturn_release */ pnfs_get_layout_hdr(lo); - empty = list_empty(&lo->plh_segs); + /* Is there an outstanding layoutreturn ? */ + if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) { + spin_unlock(&ino->i_lock); + if (wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, + TASK_UNINTERRUPTIBLE)) + goto out_put_layout_hdr; + spin_lock(&ino->i_lock); + } pnfs_clear_layoutcommit(ino, &tmp_list); pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL, 0); @@ -1173,7 +1180,7 @@ _pnfs_return_layout(struct inode *ino) } /* Don't send a LAYOUTRETURN if list was initially empty */ - if (empty) { + if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) { spin_unlock(&ino->i_lock); dprintk("NFS: %s no layout segments to return\n", __func__); goto out_put_layout_hdr; -- cgit v1.1 From 4aab97327f35aac17b7f62976ea44b1bacfaa92b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 30 Nov 2016 10:47:48 -0500 Subject: pNFS: Sync the layout state bits in pnfs_cache_lseg_for_layoutreturn Ensure that the layout state bits are synced when we cache a layout segment for layoutreturn using an appropriate call to pnfs_set_plh_return_info. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index a3a5186..08acfa4 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -306,6 +306,20 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) } static void +pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode, + u32 seq) +{ + if (lo->plh_return_iomode != 0 && lo->plh_return_iomode != iomode) + iomode = IOMODE_ANY; + lo->plh_return_iomode = iomode; + set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); + if (seq != 0) { + WARN_ON_ONCE(lo->plh_return_seq != 0 && lo->plh_return_seq != seq); + lo->plh_return_seq = seq; + } +} + +static void pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo) { lo->plh_return_iomode = 0; @@ -456,6 +470,7 @@ pnfs_cache_lseg_for_layoutreturn(struct pnfs_layout_hdr *lo, { if (test_and_clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags) && pnfs_layout_is_valid(lo)) { + pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0); list_move_tail(&lseg->pls_list, &lo->plh_return_segs); return true; } @@ -1001,20 +1016,6 @@ out_unlock: } -static void -pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode, - u32 seq) -{ - if (lo->plh_return_iomode != 0 && lo->plh_return_iomode != iomode) - iomode = IOMODE_ANY; - lo->plh_return_iomode = iomode; - set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); - if (seq != 0) { - WARN_ON_ONCE(lo->plh_return_seq != 0 && lo->plh_return_seq != seq); - lo->plh_return_seq = seq; - } -} - static bool pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid, -- cgit v1.1 From abb3e1c8777ec2baffa2c736aa06280821018995 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 30 Nov 2016 11:38:10 -0500 Subject: pNFS: Don't mark the layout as freed if the last lseg is marked for return Address another memory leak. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 08acfa4..57ec46b 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -455,6 +455,8 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, list_del_init(&lseg->pls_list); /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ atomic_dec(&lo->plh_refcount); + if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) + return; if (list_empty(&lo->plh_segs) && !test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) && !test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { -- cgit v1.1 From 29ade5db12930ec60133f6a02791f4b1a4af2943 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 30 Nov 2016 16:23:38 -0500 Subject: pNFS: Wait on outstanding layoutreturns to complete in pnfs_roc() Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 57ec46b..5500108 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1245,11 +1245,20 @@ bool pnfs_roc(struct inode *ino, if (!nfs_have_layout(ino)) return false; +retry: spin_lock(&ino->i_lock); lo = nfsi->layout; if (!lo || !pnfs_layout_is_valid(lo) || test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) goto out_noroc; + if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) { + pnfs_get_layout_hdr(lo); + spin_unlock(&ino->i_lock); + wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, + TASK_UNINTERRUPTIBLE); + pnfs_put_layout_hdr(lo); + goto retry; + } /* no roc if we hold a delegation */ if (nfs4_check_delegation(ino, FMODE_READ)) -- cgit v1.1 From b85f562049cc7dce0d65577427a8321197d20983 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 30 Nov 2016 18:00:07 -0500 Subject: pNFS: Skip invalid stateids when doing a bulk destroy If the layout stateid is already invalid, we have no work to do. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 5500108..0b25a1c 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -750,6 +750,8 @@ pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp, struct inode *inode; list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) { + if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) + continue; inode = igrab(lo->plh_inode); if (inode == NULL) continue; -- cgit v1.1 From ced85a7568b57a671fe186562dfc601a5baab54d Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 28 Nov 2016 09:02:52 -0500 Subject: nfs: fix false positives in nfs40_walk_client_list() It's possible that two different servers can return the same (clientid, verifier) pair purely by coincidence. Both are 64-bit values, but depending on the server implementation, they can be highly predictable and collisions may be quite likely, especially when there are lots of servers. So, check for this case. If the clientid and verifier both match, then we actually know they *can't* be the same server, since a new SETCLIENTID to an already-known server should have changed the verifier. This helps fix a bug that could cause the client to mount a filesystem from the wrong server. Reviewed-by: Jeff Layton Tested-by: Yongcheng Yang Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfs/nfs4client.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 074ac71..9301040 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -464,6 +464,11 @@ static bool nfs4_match_client_owner_id(const struct nfs_client *clp1, return strcmp(clp1->cl_owner_id, clp2->cl_owner_id) == 0; } +static bool nfs4_same_verifier(nfs4_verifier *v1, nfs4_verifier *v2) +{ + return memcmp(v1->data, v2->data, sizeof(v1->data)) == 0; +} + /** * nfs40_walk_client_list - Find server that recognizes a client ID * @@ -521,7 +526,21 @@ int nfs40_walk_client_list(struct nfs_client *new, if (!nfs4_match_client_owner_id(pos, new)) continue; - + /* + * We just sent a new SETCLIENTID, which should have + * caused the server to return a new cl_confirm. So if + * cl_confirm is the same, then this is a different + * server that just returned the same cl_confirm by + * coincidence: + */ + if ((new != pos) && nfs4_same_verifier(&pos->cl_confirm, + &new->cl_confirm)) + continue; + /* + * But if the cl_confirm's are different, then the only + * way that a SETCLIENTID_CONFIRM to pos can succeed is + * if new and pos point to the same server: + */ atomic_inc(&pos->cl_count); spin_unlock(&nn->nfs_client_lock); @@ -534,6 +553,7 @@ int nfs40_walk_client_list(struct nfs_client *new, break; case 0: nfs4_swap_callback_idents(pos, new); + pos->cl_confirm = new->cl_confirm; prev = NULL; *result = pos; -- cgit v1.1 From 7d38de3ffa75f92e7b00301dcdc6a3f9c53509ab Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Thu, 17 Nov 2016 15:15:55 -0500 Subject: NFS: Remove unused authflavour parameter from nfs_get_client() This parameter hasn't been used since f8407299 (Linux 3.11-rc2), so let's remove it from this function and callers. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 6 ++---- fs/nfs/filelayout/filelayoutdev.c | 3 +-- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 3 +-- fs/nfs/internal.h | 8 +++----- fs/nfs/nfs3client.c | 5 ++--- fs/nfs/nfs4client.c | 10 +++------- fs/nfs/pnfs.h | 3 +-- fs/nfs/pnfs_nfs.c | 28 ++++++++++------------------ 8 files changed, 23 insertions(+), 43 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index ebecfb8..91a8d61 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -369,9 +369,7 @@ nfs_found_client(const struct nfs_client_initdata *cl_init, * Look up a client by IP address and protocol version * - creates a new record if one doesn't yet exist */ -struct nfs_client * -nfs_get_client(const struct nfs_client_initdata *cl_init, - rpc_authflavor_t authflavour) +struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init) { struct nfs_client *clp, *new = NULL; struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id); @@ -655,7 +653,7 @@ static int nfs_init_server(struct nfs_server *server, set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); /* Allocate or find a client reference we can use */ - clp = nfs_get_client(&cl_init, RPC_AUTH_UNIX); + clp = nfs_get_client(&cl_init); if (IS_ERR(clp)) { dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp)); return PTR_ERR(clp); diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index 4946ef4..a5589b7 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -279,8 +279,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo, dataserver_retrans, 4, - s->nfs_client->cl_minorversion, - s->nfs_client->cl_rpcclient->cl_auth->au_flavor); + s->nfs_client->cl_minorversion); out_test_devid: if (filelayout_test_devid_unavailable(devid)) diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 6cf545c..75767aa 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -384,8 +384,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo, dataserver_retrans, mirror->mirror_ds->ds_versions[0].version, - mirror->mirror_ds->ds_versions[0].minor_version, - RPC_AUTH_UNIX); + mirror->mirror_ds->ds_versions[0].minor_version); /* connect success, check rsize/wsize limit */ if (ds->ds_clp) { diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 80bcc0b..4622d5f 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -154,8 +154,7 @@ extern const struct rpc_program nfs_program; extern void nfs_clients_init(struct net *net); extern struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *); int nfs_create_rpc_client(struct nfs_client *, const struct nfs_client_initdata *, rpc_authflavor_t); -struct nfs_client *nfs_get_client(const struct nfs_client_initdata *, - rpc_authflavor_t); +struct nfs_client *nfs_get_client(const struct nfs_client_initdata *); int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *, struct nfs_fattr *); void nfs_server_insert_lists(struct nfs_server *); void nfs_server_remove_lists(struct nfs_server *); @@ -194,14 +193,13 @@ extern struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, int ds_addrlen, int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans, - u32 minor_version, - rpc_authflavor_t au_flavor); + u32 minor_version); extern struct rpc_clnt *nfs4_find_or_create_ds_client(struct nfs_client *, struct inode *); extern struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, const struct sockaddr *ds_addr, int ds_addrlen, int ds_proto, unsigned int ds_timeo, - unsigned int ds_retrans, rpc_authflavor_t au_flavor); + unsigned int ds_retrans); #ifdef CONFIG_PROC_FS extern int __init nfs_fs_proc_init(void); extern void nfs_fs_proc_exit(void); diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c index ee75354..7879f2a 100644 --- a/fs/nfs/nfs3client.c +++ b/fs/nfs/nfs3client.c @@ -78,8 +78,7 @@ struct nfs_server *nfs3_clone_server(struct nfs_server *source, */ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, const struct sockaddr *ds_addr, int ds_addrlen, - int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans, - rpc_authflavor_t au_flavor) + int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans) { struct rpc_timeout ds_timeout; struct nfs_client *mds_clp = mds_srv->nfs_client; @@ -106,7 +105,7 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, /* Use the MDS nfs_client cl_ipaddr. */ nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans); - clp = nfs_get_client(&cl_init, au_flavor); + clp = nfs_get_client(&cl_init); return clp; } diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 9301040..5ae9d64 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -901,7 +901,6 @@ static int nfs4_set_client(struct nfs_server *server, const struct sockaddr *addr, const size_t addrlen, const char *ip_addr, - rpc_authflavor_t authflavour, int proto, const struct rpc_timeout *timeparms, u32 minorversion, struct net *net) { @@ -927,7 +926,7 @@ static int nfs4_set_client(struct nfs_server *server, set_bit(NFS_CS_MIGRATION, &cl_init.init_flags); /* Allocate or find a client reference we can use */ - clp = nfs_get_client(&cl_init, authflavour); + clp = nfs_get_client(&cl_init); if (IS_ERR(clp)) { error = PTR_ERR(clp); goto error; @@ -968,7 +967,7 @@ error: struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, const struct sockaddr *ds_addr, int ds_addrlen, int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans, - u32 minor_version, rpc_authflavor_t au_flavor) + u32 minor_version) { struct rpc_timeout ds_timeout; struct nfs_client *mds_clp = mds_srv->nfs_client; @@ -999,7 +998,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, * (section 13.1 RFC 5661). */ nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans); - clp = nfs_get_client(&cl_init, au_flavor); + clp = nfs_get_client(&cl_init); dprintk("<-- %s %p\n", __func__, clp); return clp; @@ -1123,7 +1122,6 @@ static int nfs4_init_server(struct nfs_server *server, (const struct sockaddr *)&data->nfs_server.address, data->nfs_server.addrlen, data->client_address, - data->selected_flavor, data->nfs_server.protocol, &timeparms, data->minorversion, @@ -1220,7 +1218,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, data->addr, data->addrlen, parent_client->cl_ipaddr, - data->authflavor, rpc_protocol(parent_server->client), parent_server->client->cl_timeout, parent_client->cl_mvops->minor_version, @@ -1331,7 +1328,6 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname, nfs_server_remove_lists(server); error = nfs4_set_client(server, hostname, sap, salen, buf, - clp->cl_rpcclient->cl_auth->au_flavor, clp->cl_proto, clnt->cl_timeout, clp->cl_minorversion, net); nfs_put_client(clp); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index f55c065..459ac2c 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -369,8 +369,7 @@ struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(struct list_head *dsaddrs, void nfs4_pnfs_v3_ds_connect_unload(void); void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, struct nfs4_deviceid_node *devid, unsigned int timeo, - unsigned int retrans, u32 version, u32 minor_version, - rpc_authflavor_t au_flavor); + unsigned int retrans, u32 version, u32 minor_version); struct nfs4_pnfs_ds_addr *nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags); diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 53b4705..9414b49 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -600,8 +600,7 @@ static struct nfs_client *(*get_v3_ds_connect)( int ds_addrlen, int ds_proto, unsigned int ds_timeo, - unsigned int ds_retrans, - rpc_authflavor_t au_flavor); + unsigned int ds_retrans); static bool load_v3_ds_connect(void) { @@ -625,15 +624,13 @@ EXPORT_SYMBOL_GPL(nfs4_pnfs_v3_ds_connect_unload); static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, unsigned int timeo, - unsigned int retrans, - rpc_authflavor_t au_flavor) + unsigned int retrans) { struct nfs_client *clp = ERR_PTR(-EIO); struct nfs4_pnfs_ds_addr *da; int status = 0; - dprintk("--> %s DS %s au_flavor %d\n", __func__, - ds->ds_remotestr, au_flavor); + dprintk("--> %s DS %s\n", __func__, ds->ds_remotestr); if (!load_v3_ds_connect()) goto out; @@ -657,7 +654,7 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv, clp = get_v3_ds_connect(mds_srv, (struct sockaddr *)&da->da_addr, da->da_addrlen, IPPROTO_TCP, - timeo, retrans, au_flavor); + timeo, retrans); } if (IS_ERR(clp)) { @@ -676,15 +673,13 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, unsigned int timeo, unsigned int retrans, - u32 minor_version, - rpc_authflavor_t au_flavor) + u32 minor_version) { struct nfs_client *clp = ERR_PTR(-EIO); struct nfs4_pnfs_ds_addr *da; int status = 0; - dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr, - au_flavor); + dprintk("--> %s DS %s\n", __func__, ds->ds_remotestr); list_for_each_entry(da, &ds->ds_addrs, da_node) { dprintk("%s: DS %s: trying address %s\n", @@ -720,8 +715,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, clp = nfs4_set_ds_client(mds_srv, (struct sockaddr *)&da->da_addr, da->da_addrlen, IPPROTO_TCP, - timeo, retrans, minor_version, - au_flavor); + timeo, retrans, minor_version); if (IS_ERR(clp)) continue; @@ -755,19 +749,17 @@ out: */ void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, struct nfs4_deviceid_node *devid, unsigned int timeo, - unsigned int retrans, u32 version, - u32 minor_version, rpc_authflavor_t au_flavor) + unsigned int retrans, u32 version, u32 minor_version) { if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) { int err = 0; if (version == 3) { err = _nfs4_pnfs_v3_ds_connect(mds_srv, ds, timeo, - retrans, au_flavor); + retrans); } else if (version == 4) { err = _nfs4_pnfs_v4_ds_connect(mds_srv, ds, timeo, - retrans, minor_version, - au_flavor); + retrans, minor_version); } else { dprintk("%s: unsupported DS version %d\n", __func__, version); -- cgit v1.1 From 4d3b55d3c7c8c628498fefac4779fc02de5af492 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 23 Nov 2016 13:49:38 -0500 Subject: NFS: Remove unused argument from nfs_direct_write_complete() This parameter hasn't been used since 2a009ec9 (Linux 3.13-rc3), so let's remove it from this function. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index bd81bcf..be88bcd 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -105,7 +105,7 @@ struct nfs_direct_req { static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops; static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops; -static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode); +static void nfs_direct_write_complete(struct nfs_direct_req *dreq); static void nfs_direct_write_schedule_work(struct work_struct *work); static inline void get_dreq(struct nfs_direct_req *dreq) @@ -684,7 +684,7 @@ out_failed: } if (put_dreq(dreq)) - nfs_direct_write_complete(dreq, dreq->inode); + nfs_direct_write_complete(dreq); } static void nfs_direct_commit_complete(struct nfs_commit_data *data) @@ -717,7 +717,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) } if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) - nfs_direct_write_complete(dreq, data->inode); + nfs_direct_write_complete(dreq); } static void nfs_direct_resched_write(struct nfs_commit_info *cinfo, @@ -768,7 +768,7 @@ static void nfs_direct_write_schedule_work(struct work_struct *work) } } -static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) +static void nfs_direct_write_complete(struct nfs_direct_req *dreq) { schedule_work(&dreq->work); /* Calls nfs_direct_write_schedule_work */ } @@ -824,7 +824,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) out_put: if (put_dreq(dreq)) - nfs_direct_write_complete(dreq, hdr->inode); + nfs_direct_write_complete(dreq); hdr->release(hdr); } @@ -953,7 +953,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, } if (put_dreq(dreq)) - nfs_direct_write_complete(dreq, dreq->inode); + nfs_direct_write_complete(dreq); return 0; } -- cgit v1.1 From b184b5c38e4640585126e44ef84f2dbdd0d23d5a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 13 Oct 2016 15:26:47 +1100 Subject: NFS: remove l_pid field from nfs_lockowner this field is not used in any important way and probably should have been removed by Commit: 8003d3c4aaa5 ("nfs4: treat lock owners as opaque values") which removed the pid argument from nfs4_get_lock_state. Except in unusual and uninteresting cases, two threads with the same ->tgid will have the same ->files pointer, so keeping them both for comparison brings no benefit. Acked-by: Jeff Layton Signed-off-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 3 --- fs/nfs/nfs4proc.c | 1 - fs/nfs/pagelist.c | 3 +-- fs/nfs/write.c | 3 +-- 4 files changed, 2 insertions(+), 8 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 3575e34..fd99132 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -703,7 +703,6 @@ static void nfs_init_lock_context(struct nfs_lock_context *l_ctx) { atomic_set(&l_ctx->count, 1); l_ctx->lockowner.l_owner = current->files; - l_ctx->lockowner.l_pid = current->tgid; INIT_LIST_HEAD(&l_ctx->list); atomic_set(&l_ctx->io_count, 0); } @@ -716,8 +715,6 @@ static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context do { if (pos->lockowner.l_owner != current->files) continue; - if (pos->lockowner.l_pid != current->tgid) - continue; atomic_inc(&pos->count); return pos; } while ((pos = list_entry(pos->list.next, typeof(*pos), list)) != head); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5593b08..0d108f1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2947,7 +2947,6 @@ static int _nfs4_do_setattr(struct inode *inode, } else if (truncate && state != NULL) { struct nfs_lockowner lockowner = { .l_owner = current->files, - .l_pid = current->tgid, }; if (!nfs4_valid_open_stateid(state)) return -EBADF; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 965db47..161f8b1 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -867,8 +867,7 @@ static void nfs_pageio_cleanup_mirroring(struct nfs_pageio_descriptor *pgio) static bool nfs_match_lock_context(const struct nfs_lock_context *l1, const struct nfs_lock_context *l2) { - return l1->lockowner.l_owner == l2->lockowner.l_owner - && l1->lockowner.l_pid == l2->lockowner.l_pid; + return l1->lockowner.l_owner == l2->lockowner.l_owner; } /** diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 5321183..4d5897e 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1151,8 +1151,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page) if (l_ctx && flctx && !(list_empty_careful(&flctx->flc_posix) && list_empty_careful(&flctx->flc_flock))) { - do_flush |= l_ctx->lockowner.l_owner != current->files - || l_ctx->lockowner.l_pid != current->tgid; + do_flush |= l_ctx->lockowner.l_owner != current->files; } nfs_release_request(req); if (!do_flush) -- cgit v1.1 From 532d4def2f95623a9b8b2cef7723e14521377911 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 13 Oct 2016 15:26:47 +1100 Subject: NFSv4: add flock_owner to open context An open file description (struct file) in a given process can be associated with two different lock owners. It can have a Posix lock owner which will be different in each process that has a fd on the file. It can have a Flock owner which will be the same in all processes. When searching for a lock stateid to use, we need to consider both of these owners So add a new "flock_owner" to the "nfs_open_context" (of which there is one for each open file description). This flock_owner does not need to be reference-counted as there is a 1-1 relation between 'struct file' and nfs open contexts, and it will never be part of a list of contexts. So there is no need for a 'flock_context' - just the owner is enough. The io_count included in the (Posix) lock_context provides no guarantee that all read-aheads that could use the state have completed, so not supporting it for flock locks in not a serious problem. Synchronization between flock and read-ahead can be added later if needed. When creating an open_context for a non-openning create call, we don't have a 'struct file' to pass in, so the lock context gets initialized with a NULL owner, but this will never be used. The flock_owner is not used at all in this patch, that will come later. Acked-by: Jeff Layton Signed-off-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 6 +++--- fs/nfs/inode.c | 7 +++++-- fs/nfs/nfs4file.c | 2 +- fs/nfs/nfs4proc.c | 2 +- 4 files changed, 10 insertions(+), 7 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 5f1af4c..f1ecfcc 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1467,9 +1467,9 @@ static fmode_t flags_to_mode(int flags) return res; } -static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags) +static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags, struct file *filp) { - return alloc_nfs_open_context(dentry, flags_to_mode(open_flags)); + return alloc_nfs_open_context(dentry, flags_to_mode(open_flags), filp); } static int do_open(struct inode *inode, struct file *filp) @@ -1554,7 +1554,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, return finish_no_open(file, dentry); } - ctx = create_nfs_open_context(dentry, open_flags); + ctx = create_nfs_open_context(dentry, open_flags, file); err = PTR_ERR(ctx); if (IS_ERR(ctx)) goto out; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index fd99132..3a7601f 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -796,7 +796,9 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync) } EXPORT_SYMBOL_GPL(nfs_close_context); -struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f_mode) +struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, + fmode_t f_mode, + struct file *filp) { struct nfs_open_context *ctx; struct rpc_cred *cred = rpc_lookup_cred(); @@ -815,6 +817,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f ctx->mode = f_mode; ctx->flags = 0; ctx->error = 0; + ctx->flock_owner = (fl_owner_t)filp; nfs_init_lock_context(&ctx->lock_context); ctx->lock_context.open_context = ctx; INIT_LIST_HEAD(&ctx->list); @@ -939,7 +942,7 @@ int nfs_open(struct inode *inode, struct file *filp) { struct nfs_open_context *ctx; - ctx = alloc_nfs_open_context(file_dentry(filp), filp->f_mode); + ctx = alloc_nfs_open_context(file_dentry(filp), filp->f_mode, filp); if (IS_ERR(ctx)) return PTR_ERR(ctx); nfs_file_set_open_context(filp, ctx); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 89a7795..0efba77 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -57,7 +57,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) parent = dget_parent(dentry); dir = d_inode(parent); - ctx = alloc_nfs_open_context(file_dentry(filp), filp->f_mode); + ctx = alloc_nfs_open_context(file_dentry(filp), filp->f_mode, filp); err = PTR_ERR(ctx); if (IS_ERR(ctx)) goto out; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0d108f1..68a75bf 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4008,7 +4008,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, struct nfs4_state *state; int status = 0; - ctx = alloc_nfs_open_context(dentry, FMODE_READ); + ctx = alloc_nfs_open_context(dentry, FMODE_READ, NULL); if (IS_ERR(ctx)) return PTR_ERR(ctx); -- cgit v1.1 From 29b59f94169374c412cedf60c77073df4e68ce51 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 13 Oct 2016 15:26:47 +1100 Subject: NFSv4: change nfs4_do_setattr to take an open_context instead of a nfs4_state. The open_context can always lead directly to the state, and is always easily available, so this is a straightforward change. Doing this makes more information available to _nfs4_do_setattr() for use in the next patch. Signed-off-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 68a75bf..3ab4dd5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -94,7 +94,7 @@ static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fa static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label); static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, struct nfs_fattr *fattr, struct iattr *sattr, - struct nfs4_state *state, struct nfs4_label *ilabel, + struct nfs_open_context *ctx, struct nfs4_label *ilabel, struct nfs4_label *olabel); #ifdef CONFIG_NFS_V4_1 static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *, @@ -2826,7 +2826,7 @@ static int _nfs4_do_open(struct inode *dir, nfs_fattr_init(opendata->o_res.f_attr); status = nfs4_do_setattr(state->inode, cred, opendata->o_res.f_attr, sattr, - state, label, olabel); + ctx, label, olabel); if (status == 0) { nfs_setattr_update_inode(state->inode, sattr, opendata->o_res.f_attr); @@ -2921,7 +2921,7 @@ static int _nfs4_do_setattr(struct inode *inode, struct nfs_setattrargs *arg, struct nfs_setattrres *res, struct rpc_cred *cred, - struct nfs4_state *state) + struct nfs_open_context *ctx) { struct nfs_server *server = NFS_SERVER(inode); struct rpc_message msg = { @@ -2944,13 +2944,13 @@ static int _nfs4_do_setattr(struct inode *inode, if (nfs4_copy_delegation_stateid(inode, fmode, &arg->stateid, &delegation_cred)) { /* Use that stateid */ - } else if (truncate && state != NULL) { + } else if (truncate && ctx != NULL) { struct nfs_lockowner lockowner = { .l_owner = current->files, }; - if (!nfs4_valid_open_stateid(state)) + if (!nfs4_valid_open_stateid(ctx->state)) return -EBADF; - if (nfs4_select_rw_stateid(state, FMODE_WRITE, &lockowner, + if (nfs4_select_rw_stateid(ctx->state, FMODE_WRITE, &lockowner, &arg->stateid, &delegation_cred) == -EIO) return -EBADF; } else @@ -2961,7 +2961,7 @@ static int _nfs4_do_setattr(struct inode *inode, status = nfs4_call_sync(server->client, server, &msg, &arg->seq_args, &res->seq_res, 1); put_rpccred(delegation_cred); - if (status == 0 && state != NULL) + if (status == 0 && ctx != NULL) renew_lease(server, timestamp); trace_nfs4_setattr(inode, &arg->stateid, status); return status; @@ -2969,10 +2969,11 @@ static int _nfs4_do_setattr(struct inode *inode, static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, struct nfs_fattr *fattr, struct iattr *sattr, - struct nfs4_state *state, struct nfs4_label *ilabel, + struct nfs_open_context *ctx, struct nfs4_label *ilabel, struct nfs4_label *olabel) { struct nfs_server *server = NFS_SERVER(inode); + struct nfs4_state *state = ctx ? ctx->state : NULL; struct nfs_setattrargs arg = { .fh = NFS_FH(inode), .iap = sattr, @@ -2997,7 +2998,7 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, arg.bitmask = nfs4_bitmask(server, olabel); do { - err = _nfs4_do_setattr(inode, &arg, &res, cred, state); + err = _nfs4_do_setattr(inode, &arg, &res, cred, ctx); switch (err) { case -NFS4ERR_OPENMODE: if (!(sattr->ia_valid & ATTR_SIZE)) { @@ -3724,7 +3725,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, { struct inode *inode = d_inode(dentry); struct rpc_cred *cred = NULL; - struct nfs4_state *state = NULL; + struct nfs_open_context *ctx = NULL; struct nfs4_label *label = NULL; int status; @@ -3745,20 +3746,17 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, /* Search for an existing open(O_WRITE) file */ if (sattr->ia_valid & ATTR_FILE) { - struct nfs_open_context *ctx; ctx = nfs_file_open_context(sattr->ia_file); - if (ctx) { + if (ctx) cred = ctx->cred; - state = ctx->state; - } } label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL); if (IS_ERR(label)) return PTR_ERR(label); - status = nfs4_do_setattr(inode, cred, fattr, sattr, state, NULL, label); + status = nfs4_do_setattr(inode, cred, fattr, sattr, ctx, NULL, label); if (status == 0) { nfs_setattr_update_inode(inode, sattr, fattr); nfs_setsecurity(inode, fattr, label); -- cgit v1.1 From 1739347549653dc2463d208d7039f5e97b8f1e8b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 13 Oct 2016 15:26:47 +1100 Subject: NFSv4: change nfs4_select_rw_stateid to take a lock_context inplace of lock_owner The only time that a lock_context is not immediately available is in setattr, and now that it has an open_context, it can easily find one with nfs_get_lock_context. This removes the need for the on-stack nfs_lockowner. This change is preparation for correctly support flock stateids. Signed-off-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 2 +- fs/nfs/nfs4proc.c | 15 ++++++--------- fs/nfs/nfs4state.c | 11 +++++------ 3 files changed, 12 insertions(+), 16 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 1452177..6651658 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -457,7 +457,7 @@ extern void nfs41_handle_server_scope(struct nfs_client *, extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); extern int nfs4_select_rw_stateid(struct nfs4_state *, fmode_t, - const struct nfs_lockowner *, nfs4_stateid *, + const struct nfs_lock_context *, nfs4_stateid *, struct rpc_cred **); extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3ab4dd5..1c629f5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2945,12 +2945,13 @@ static int _nfs4_do_setattr(struct inode *inode, if (nfs4_copy_delegation_stateid(inode, fmode, &arg->stateid, &delegation_cred)) { /* Use that stateid */ } else if (truncate && ctx != NULL) { - struct nfs_lockowner lockowner = { - .l_owner = current->files, - }; + struct nfs_lock_context *l_ctx; if (!nfs4_valid_open_stateid(ctx->state)) return -EBADF; - if (nfs4_select_rw_stateid(ctx->state, FMODE_WRITE, &lockowner, + l_ctx = nfs_get_lock_context(ctx); + if (IS_ERR(l_ctx)) + return PTR_ERR(l_ctx); + if (nfs4_select_rw_stateid(ctx->state, FMODE_WRITE, l_ctx, &arg->stateid, &delegation_cred) == -EIO) return -EBADF; } else @@ -4576,11 +4577,7 @@ int nfs4_set_rw_stateid(nfs4_stateid *stateid, const struct nfs_lock_context *l_ctx, fmode_t fmode) { - const struct nfs_lockowner *lockowner = NULL; - - if (l_ctx != NULL) - lockowner = &l_ctx->lockowner; - return nfs4_select_rw_stateid(ctx->state, fmode, lockowner, stateid, NULL); + return nfs4_select_rw_stateid(ctx->state, fmode, l_ctx, stateid, NULL); } EXPORT_SYMBOL_GPL(nfs4_set_rw_stateid); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 0959c96..04db49d 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -939,20 +939,19 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl) static int nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state, - const struct nfs_lockowner *lockowner) + const struct nfs_lock_context *l_ctx) { struct nfs4_lock_state *lsp; fl_owner_t fl_owner; int ret = -ENOENT; - - if (lockowner == NULL) + if (l_ctx == NULL) goto out; if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) goto out; - fl_owner = lockowner->l_owner; + fl_owner = l_ctx->lockowner.l_owner; spin_lock(&state->state_lock); lsp = __nfs4_find_lock_state(state, fl_owner); if (lsp && test_bit(NFS_LOCK_LOST, &lsp->ls_flags)) @@ -986,7 +985,7 @@ static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state) * requests. */ int nfs4_select_rw_stateid(struct nfs4_state *state, - fmode_t fmode, const struct nfs_lockowner *lockowner, + fmode_t fmode, const struct nfs_lock_context *l_ctx, nfs4_stateid *dst, struct rpc_cred **cred) { int ret; @@ -995,7 +994,7 @@ int nfs4_select_rw_stateid(struct nfs4_state *state, return -EIO; if (cred != NULL) *cred = NULL; - ret = nfs4_copy_lock_stateid(dst, state, lockowner); + ret = nfs4_copy_lock_stateid(dst, state, l_ctx); if (ret == -EIO) /* A lost lock - don't even consider delegations */ goto out; -- cgit v1.1 From 8d42443166a5d3800756db98498a4961a5ea5de7 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 13 Oct 2016 15:26:47 +1100 Subject: NFSv4: enhance nfs4_copy_lock_stateid to use a flock stateid if there is one A process can have two possible lock owner for a given open file: a per-process Posix lock owner and a per-open-file flock owner Use both of these when searching for a suitable stateid to use. With this patch, READ/WRITE requests will use the correct stateid if a flock lock is active. Signed-off-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 04db49d..4f1de9b 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -800,11 +800,13 @@ void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode) * that is compatible with current->files */ static struct nfs4_lock_state * -__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) +__nfs4_find_lock_state(struct nfs4_state *state, + fl_owner_t fl_owner, fl_owner_t fl_owner2) { struct nfs4_lock_state *pos; list_for_each_entry(pos, &state->lock_states, ls_locks) { - if (pos->ls_owner != fl_owner) + if (pos->ls_owner != fl_owner && + pos->ls_owner != fl_owner2) continue; atomic_inc(&pos->ls_count); return pos; @@ -857,7 +859,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_ for(;;) { spin_lock(&state->state_lock); - lsp = __nfs4_find_lock_state(state, owner); + lsp = __nfs4_find_lock_state(state, owner, 0); if (lsp != NULL) break; if (new != NULL) { @@ -942,7 +944,7 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst, const struct nfs_lock_context *l_ctx) { struct nfs4_lock_state *lsp; - fl_owner_t fl_owner; + fl_owner_t fl_owner, fl_flock_owner; int ret = -ENOENT; if (l_ctx == NULL) @@ -952,8 +954,10 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst, goto out; fl_owner = l_ctx->lockowner.l_owner; + fl_flock_owner = l_ctx->open_context->flock_owner; + spin_lock(&state->state_lock); - lsp = __nfs4_find_lock_state(state, fl_owner); + lsp = __nfs4_find_lock_state(state, fl_owner, fl_flock_owner); if (lsp && test_bit(NFS_LOCK_LOST, &lsp->ls_flags)) ret = -EIO; else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) { -- cgit v1.1 From d51fdb87a611f8ef50518df7187173ae10469fd0 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 13 Oct 2016 15:26:47 +1100 Subject: NFS: discard nfs_lockowner structure. It now has only one field and is only used in one structure. So replaced it in that structure by the field it contains. Signed-off-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 4 ++-- fs/nfs/nfs4state.c | 2 +- fs/nfs/pagelist.c | 2 +- fs/nfs/write.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 3a7601f..d79e0bac 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -702,7 +702,7 @@ EXPORT_SYMBOL_GPL(nfs_getattr); static void nfs_init_lock_context(struct nfs_lock_context *l_ctx) { atomic_set(&l_ctx->count, 1); - l_ctx->lockowner.l_owner = current->files; + l_ctx->lockowner = current->files; INIT_LIST_HEAD(&l_ctx->list); atomic_set(&l_ctx->io_count, 0); } @@ -713,7 +713,7 @@ static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context struct nfs_lock_context *pos = head; do { - if (pos->lockowner.l_owner != current->files) + if (pos->lockowner != current->files) continue; atomic_inc(&pos->count); return pos; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 4f1de9b..26b6b8b 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -953,7 +953,7 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst, if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) goto out; - fl_owner = l_ctx->lockowner.l_owner; + fl_owner = l_ctx->lockowner; fl_flock_owner = l_ctx->open_context->flock_owner; spin_lock(&state->state_lock); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 161f8b1..6e629b8 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -867,7 +867,7 @@ static void nfs_pageio_cleanup_mirroring(struct nfs_pageio_descriptor *pgio) static bool nfs_match_lock_context(const struct nfs_lock_context *l1, const struct nfs_lock_context *l2) { - return l1->lockowner.l_owner == l2->lockowner.l_owner; + return l1->lockowner == l2->lockowner; } /** diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 4d5897e..6e761f3 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1151,7 +1151,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page) if (l_ctx && flctx && !(list_empty_careful(&flctx->flc_posix) && list_empty_careful(&flctx->flc_flock))) { - do_flush |= l_ctx->lockowner.l_owner != current->files; + do_flush |= l_ctx->lockowner != current->files; } nfs_release_request(req); if (!do_flush) -- cgit v1.1 From f36ab161bebe464d33b998294eff29b17a9c8918 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 28 Oct 2016 14:37:02 +0000 Subject: NFS: fix typo in parameter description Fix typo in parameter description. Fixes: 5405fc44c337 ("NFSv4.x: Add kernel parameter to control the callback server") Signed-off-by: Wei Yongjun Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 001796b..ddce94ce 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2904,7 +2904,7 @@ module_param(max_session_slots, ushort, 0644); MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 " "requests the client will negotiate"); module_param(max_session_cb_slots, ushort, 0644); -MODULE_PARM_DESC(max_session_slots, "Maximum number of parallel NFSv4.1 " +MODULE_PARM_DESC(max_session_cb_slots, "Maximum number of parallel NFSv4.1 " "callbacks the client will process for a given server"); module_param(send_implementation_id, ushort, 0644); MODULE_PARM_DESC(send_implementation_id, -- cgit v1.1 From 79f687a3de9e3ba2518b4ea33f38ca6cbe9133eb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 19 Nov 2016 10:54:55 -0500 Subject: NFS: Fix a performance regression in readdir Ben Coddington reports that commit 311324ad1713, by adding the function nfs_dir_mapping_need_revalidate() that checks page cache validity on each call to nfs_readdir() causes a performance regression when the directory is being modified. If the directory is changing while we're iterating through the directory, POSIX does not require us to invalidate the page cache unless the user calls rewinddir(). However, we still do want to ensure that we use readdirplus in order to avoid a load of stat() calls when the user is doing an 'ls -l' workload. The fix should be to invalidate the page cache immediately when we're setting the NFS_INO_ADVISE_RDPLUS bit. Reported-by: Benjamin Coddington Fixes: 311324ad1713 ("NFS: Be more aggressive in using readdirplus...") Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index f1ecfcc..9220bc7 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -477,7 +477,7 @@ void nfs_force_use_readdirplus(struct inode *dir) { if (!list_empty(&NFS_I(dir)->open_files)) { nfs_advise_use_readdirplus(dir); - nfs_zap_mapping(dir, dir->i_mapping); + invalidate_mapping_pages(dir->i_mapping, 0, -1); } } @@ -886,17 +886,6 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc) goto out; } -static bool nfs_dir_mapping_need_revalidate(struct inode *dir) -{ - struct nfs_inode *nfsi = NFS_I(dir); - - if (nfs_attribute_cache_expired(dir)) - return true; - if (nfsi->cache_validity & NFS_INO_INVALID_DATA) - return true; - return false; -} - /* The file offset position represents the dirent entry number. A last cookie cache takes care of the common case of reading the whole directory. @@ -928,7 +917,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) desc->decode = NFS_PROTO(inode)->decode_dirent; desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0; - if (ctx->pos == 0 || nfs_dir_mapping_need_revalidate(inode)) + if (ctx->pos == 0 || nfs_attribute_cache_expired(inode)) res = nfs_revalidate_mapping(inode, file->f_mapping); if (res < 0) goto out; -- cgit v1.1 From 63519fbc67d0d9912c13185b7c1e8c2fcb218cc0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 19 Nov 2016 11:21:54 -0500 Subject: NFS: Be more targeted about readdirplus use when doing lookup/revalidation There is little point in setting NFS_INO_ADVISE_RDPLUS in nfs_lookup and nfs_lookup_revalidate() unless a process is actually doing readdir on the parent directory. Furthermore, there is little point in using readdirplus if we're trying to revalidate a negative dentry. Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 9220bc7..2283515 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -455,14 +455,18 @@ bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx) } /* - * This function is called by the lookup code to request the use of - * readdirplus to accelerate any future lookups in the same + * This function is called by the lookup and getattr code to request the + * use of readdirplus to accelerate any future lookups in the same * directory. */ static void nfs_advise_use_readdirplus(struct inode *dir) { - set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags); + struct nfs_inode *nfsi = NFS_I(dir); + + if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) && + !list_empty(&nfsi->open_files)) + set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags); } /* @@ -475,8 +479,11 @@ void nfs_advise_use_readdirplus(struct inode *dir) */ void nfs_force_use_readdirplus(struct inode *dir) { - if (!list_empty(&NFS_I(dir)->open_files)) { - nfs_advise_use_readdirplus(dir); + struct nfs_inode *nfsi = NFS_I(dir); + + if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) && + !list_empty(&nfsi->open_files)) { + set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags); invalidate_mapping_pages(dir->i_mapping, 0, -1); } } @@ -1150,7 +1157,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) return -ECHILD; goto out_bad; } - goto out_valid_noent; + goto out_valid; } if (is_bad_inode(inode)) { @@ -1173,6 +1180,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) return -ECHILD; goto out_zap_parent; } + nfs_advise_use_readdirplus(dir); goto out_valid; } @@ -1208,12 +1216,12 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) nfs_free_fhandle(fhandle); nfs4_label_free(label); + /* set a readdirplus hint that we had a cache miss */ + nfs_force_use_readdirplus(dir); + out_set_verifier: nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out_valid: - /* Success: notify readdir to use READDIRPLUS */ - nfs_advise_use_readdirplus(dir); - out_valid_noent: if (flags & LOOKUP_RCU) { if (parent != ACCESS_ONCE(dentry->d_parent)) return -ECHILD; @@ -1413,8 +1421,8 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in if (IS_ERR(res)) goto out_label; - /* Success: notify readdir to use READDIRPLUS */ - nfs_advise_use_readdirplus(dir); + /* Notify readdir to use READDIRPLUS */ + nfs_force_use_readdirplus(dir); no_entry: res = d_splice_alias(inode, dentry); -- cgit v1.1 From 1bcf4c5c597d1b1862cf54e65198f1c9e3cad29c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 2 Dec 2016 09:15:37 -0500 Subject: NFS: Allow getattr to also report readdirplus cache hits If the use called stat() on an 'ls -l' workload, and the attribute cache was successfully revalidate by READDIRPLUS, then we want to report that back so that the readdir code continues to use readdirplus. Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 1 - fs/nfs/inode.c | 21 +++++++++++++++++---- fs/nfs/internal.h | 1 + 3 files changed, 18 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 2283515..f570245 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -459,7 +459,6 @@ bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx) * use of readdirplus to accelerate any future lookups in the same * directory. */ -static void nfs_advise_use_readdirplus(struct inode *dir) { struct nfs_inode *nfsi = NFS_I(dir); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index d79e0bac..75f5a9c 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -634,15 +634,28 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, } EXPORT_SYMBOL_GPL(nfs_setattr_update_inode); -static void nfs_request_parent_use_readdirplus(struct dentry *dentry) +static void nfs_readdirplus_parent_cache_miss(struct dentry *dentry) { struct dentry *parent; + if (!nfs_server_capable(d_inode(dentry), NFS_CAP_READDIRPLUS)) + return; parent = dget_parent(dentry); nfs_force_use_readdirplus(d_inode(parent)); dput(parent); } +static void nfs_readdirplus_parent_cache_hit(struct dentry *dentry) +{ + struct dentry *parent; + + if (!nfs_server_capable(d_inode(dentry), NFS_CAP_READDIRPLUS)) + return; + parent = dget_parent(dentry); + nfs_advise_use_readdirplus(d_inode(parent)); + dput(parent); +} + static bool nfs_need_revalidate_inode(struct inode *inode) { if (NFS_I(inode)->cache_validity & @@ -683,10 +696,10 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) if (need_atime || nfs_need_revalidate_inode(inode)) { struct nfs_server *server = NFS_SERVER(inode); - if (server->caps & NFS_CAP_READDIRPLUS) - nfs_request_parent_use_readdirplus(dentry); + nfs_readdirplus_parent_cache_miss(dentry); err = __nfs_revalidate_inode(server, inode); - } + } else + nfs_readdirplus_parent_cache_hit(dentry); if (!err) { generic_fillattr(inode, stat); stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 4622d5f..6b79c2c 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -344,6 +344,7 @@ extern struct nfs_client *nfs_init_client(struct nfs_client *clp, const struct nfs_client_initdata *); /* dir.c */ +extern void nfs_advise_use_readdirplus(struct inode *dir); extern void nfs_force_use_readdirplus(struct inode *dir); extern unsigned long nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc); -- cgit v1.1 From 46c98c6d1bd33f8ae2c3b8f379f1629c472141e3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 25 Nov 2016 13:24:09 -0500 Subject: pNFS/flexfiles: Don't attempt to send layoutstats if there are no entries If the list of mirrors is empty, then don't send an RPC call. Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 90462a2..a6264d6 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -2250,6 +2250,11 @@ ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args) args->num_dev = ff_layout_mirror_prepare_stats(args, &ff_layout->generic_hdr, dev_count); spin_unlock(&args->inode->i_lock); + if (!args->num_dev) { + kfree(args->devinfo); + args->devinfo = NULL; + return -ENOENT; + } return 0; } -- cgit v1.1 From 06946c6a3d8b511a65e4f8b1f44dfd01e37f752d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 25 Nov 2016 13:17:15 -0500 Subject: pNFS/flexfiles: Only send layoutstats updates for mirrors that were updated If there have been no reads or writes to a given mirror since the last layoutstats update, then don't resend the same data. Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 6 ++++++ fs/nfs/flexfilelayout/flexfilelayout.h | 3 +++ 2 files changed, 9 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index a6264d6..540813c 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -705,6 +705,7 @@ nfs4_ff_layout_stat_io_start_read(struct inode *inode, spin_lock(&mirror->lock); report = nfs4_ff_layoutstat_start_io(mirror, &mirror->read_stat, now); nfs4_ff_layout_stat_io_update_requested(&mirror->read_stat, requested); + set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags); spin_unlock(&mirror->lock); if (report) @@ -721,6 +722,7 @@ nfs4_ff_layout_stat_io_end_read(struct rpc_task *task, nfs4_ff_layout_stat_io_update_completed(&mirror->read_stat, requested, completed, ktime_get(), task->tk_start); + set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags); spin_unlock(&mirror->lock); } @@ -734,6 +736,7 @@ nfs4_ff_layout_stat_io_start_write(struct inode *inode, spin_lock(&mirror->lock); report = nfs4_ff_layoutstat_start_io(mirror , &mirror->write_stat, now); nfs4_ff_layout_stat_io_update_requested(&mirror->write_stat, requested); + set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags); spin_unlock(&mirror->lock); if (report) @@ -753,6 +756,7 @@ nfs4_ff_layout_stat_io_end_write(struct rpc_task *task, spin_lock(&mirror->lock); nfs4_ff_layout_stat_io_update_completed(&mirror->write_stat, requested, completed, ktime_get(), task->tk_start); + set_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags); spin_unlock(&mirror->lock); } @@ -2201,6 +2205,8 @@ ff_layout_mirror_prepare_stats(struct nfs42_layoutstat_args *args, break; if (!mirror->mirror_ds) continue; + if (!test_and_clear_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags)) + continue; /* mirror refcount put in cleanup_layoutstats */ if (!atomic_inc_not_zero(&mirror->ref)) continue; diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 3ee0c9f..09f292e 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -81,12 +81,15 @@ struct nfs4_ff_layout_mirror { struct rpc_cred __rcu *rw_cred; atomic_t ref; spinlock_t lock; + unsigned long flags; struct nfs4_ff_layoutstat read_stat; struct nfs4_ff_layoutstat write_stat; ktime_t start_time; u32 report_interval; }; +#define NFS4_FF_MIRROR_STAT_AVAIL (0) + struct nfs4_ff_layout_segment { struct pnfs_layout_segment generic_hdr; u64 stripe_unit; -- cgit v1.1 From 4d796d751cefdb942a54c570bd3087d8be3bb893 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 23 Sep 2016 11:38:08 -0400 Subject: pNFS: Allow layout drivers to manage private data in struct nfs4_layoutreturn Cleanup to allow layout drivers to attach private data to layoutreturn, and manage the data. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 6 ++++++ fs/nfs/nfs4xdr.c | 4 +++- fs/nfs/pnfs.c | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1c629f5..f992281 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3039,6 +3039,7 @@ struct nfs4_closedata { struct { struct nfs4_layoutreturn_args arg; struct nfs4_layoutreturn_res res; + struct nfs4_xdr_opaque_data ld_private; u32 roc_barrier; bool roc; } lr; @@ -3267,6 +3268,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) if (IS_ERR(calldata->arg.seqid)) goto out_free_calldata; calldata->arg.fmode = 0; + calldata->lr.arg.ld_private = &calldata->lr.ld_private; calldata->res.fattr = &calldata->fattr; calldata->res.seqid = calldata->arg.seqid; calldata->res.server = server; @@ -5599,6 +5601,7 @@ struct nfs4_delegreturndata { struct { struct nfs4_layoutreturn_args arg; struct nfs4_layoutreturn_res res; + struct nfs4_xdr_opaque_data ld_private; u32 roc_barrier; bool roc; } lr; @@ -5735,6 +5738,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co data->res.fattr = &data->fattr; data->res.server = server; data->res.lr_ret = -NFS4ERR_NOMATCHING_LAYOUT; + data->lr.arg.ld_private = &data->lr.ld_private; nfs_fattr_init(data->res.fattr); data->timestamp = jiffies; data->rpc_status = 0; @@ -8633,6 +8637,8 @@ static void nfs4_layoutreturn_release(void *calldata) nfs4_sequence_free_slot(&lrp->res.seq_res); pnfs_put_layout_hdr(lrp->args.layout); nfs_iput_and_deactive(lrp->inode); + if (lrp->ld_private.ops && lrp->ld_private.ops->free) + lrp->ld_private.ops->free(&lrp->ld_private); kfree(calldata); dprintk("<-- %s\n", __func__); } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 84fdcf2..1c1768a 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2035,7 +2035,9 @@ encode_layoutreturn(struct xdr_stream *xdr, spin_lock(&args->inode->i_lock); encode_nfs4_stateid(xdr, &args->stateid); spin_unlock(&args->inode->i_lock); - if (lr_ops->encode_layoutreturn) + if (args->ld_private->ops && args->ld_private->ops->encode) + args->ld_private->ops->encode(xdr, args, args->ld_private); + else if (lr_ops->encode_layoutreturn) lr_ops->encode_layoutreturn(xdr, args); else encode_uint32(xdr, 0); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 0b25a1c..4631d15 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1086,6 +1086,7 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid, } pnfs_init_layoutreturn_args(&lrp->args, lo, stateid, iomode); + lrp->args.ld_private = &lrp->ld_private; lrp->clp = NFS_SERVER(ino)->nfs_client; lrp->cred = lo->plh_lc_cred; -- cgit v1.1 From 287bd3e95452e8ad945854bb98a3a7fbdc2a05c9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 2 Dec 2016 16:12:12 -0500 Subject: pNFS: Add a layoutreturn callback to performa layout-private setup Add a callback to allow the flexfiles layout driver to initialise the layout private payload. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 14 +++++++++++++- fs/nfs/pnfs.h | 1 + 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 4631d15..2a34313 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1072,6 +1072,7 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid, enum pnfs_iomode iomode, bool sync) { struct inode *ino = lo->plh_inode; + struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; struct nfs4_layoutreturn *lrp; int status = 0; @@ -1089,6 +1090,8 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid, lrp->args.ld_private = &lrp->ld_private; lrp->clp = NFS_SERVER(ino)->nfs_client; lrp->cred = lo->plh_lc_cred; + if (ld->prepare_layoutreturn) + ld->prepare_layoutreturn(&lrp->args); status = nfs4_proc_layoutreturn(lrp, sync); out: @@ -1310,9 +1313,15 @@ retry: out_noroc: spin_unlock(&ino->i_lock); pnfs_layoutcommit_inode(ino, true); + if (roc) { + struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; + if (ld->prepare_layoutreturn) + ld->prepare_layoutreturn(args); + return true; + } if (layoutreturn) pnfs_send_layoutreturn(lo, &stateid, iomode, true); - return roc; + return false; } void pnfs_roc_release(struct nfs4_layoutreturn_args *args, @@ -1322,6 +1331,7 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args, struct pnfs_layout_hdr *lo = args->layout; const nfs4_stateid *arg_stateid = NULL; const nfs4_stateid *res_stateid = NULL; + struct nfs4_xdr_opaque_data *ld_private = args->ld_private; if (ret == 0) { arg_stateid = &args->stateid; @@ -1330,6 +1340,8 @@ void pnfs_roc_release(struct nfs4_layoutreturn_args *args, } pnfs_layoutreturn_free_lsegs(lo, arg_stateid, &args->range, res_stateid); + if (ld_private && ld_private->ops && ld_private->ops->free) + ld_private->ops->free(ld_private); pnfs_put_layout_hdr(lo); trace_nfs4_layoutreturn_on_close(args->inode, 0); } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 459ac2c..d1e0281 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -172,6 +172,7 @@ struct pnfs_layoutdriver_type { (struct nfs_server *server, struct pnfs_device *pdev, gfp_t gfp_flags); + int (*prepare_layoutreturn) (struct nfs4_layoutreturn_args *); void (*encode_layoutreturn) (struct xdr_stream *xdr, const struct nfs4_layoutreturn_args *args); -- cgit v1.1 From 5b9b3c855a16d04d65fa7728b57143552d5d06a0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 2 Dec 2016 16:15:05 -0500 Subject: pNFS/flexfiles: Refactor encoding of the layoutreturn payload Add the layout error payload to the flexfiles layoutreturn private data, and set up the encoding mechanisms. This is a refactoring in preparation for adding the layout iostats payload. Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 63 +++++++++++++++++++------ fs/nfs/flexfilelayout/flexfilelayout.h | 14 ++++-- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 78 +++++++++++++++++++++++++------ 3 files changed, 124 insertions(+), 31 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 540813c..7f31c35 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -25,6 +25,8 @@ #define NFSDBG_FACILITY NFSDBG_PNFS_LD #define FF_LAYOUT_POLL_RETRY_MAX (15*HZ) +#define FF_LAYOUTRETURN_MAXERR 20 + static struct group_info *ff_zero_group; @@ -1971,24 +1973,18 @@ ff_layout_free_deviceid_node(struct nfs4_deviceid_node *d) static int ff_layout_encode_ioerr(struct nfs4_flexfile_layout *flo, struct xdr_stream *xdr, - const struct nfs4_layoutreturn_args *args) + const struct nfs4_layoutreturn_args *args, + const struct nfs4_flexfile_layoutreturn_args *ff_args) { - struct pnfs_layout_hdr *hdr = &flo->generic_hdr; __be32 *start; - int count = 0, ret = 0; start = xdr_reserve_space(xdr, 4); if (unlikely(!start)) return -E2BIG; + *start = cpu_to_be32(ff_args->num_errors); /* This assume we always return _ALL_ layouts */ - spin_lock(&hdr->plh_inode->i_lock); - ret = ff_layout_encode_ds_ioerr(flo, xdr, &count, &args->range); - spin_unlock(&hdr->plh_inode->i_lock); - - *start = cpu_to_be32(count); - - return ret; + return ff_layout_encode_ds_ioerr(xdr, &ff_args->errors); } /* report nothing for now */ @@ -2017,8 +2013,10 @@ ff_layout_alloc_deviceid_node(struct nfs_server *server, static void ff_layout_encode_layoutreturn(struct xdr_stream *xdr, - const struct nfs4_layoutreturn_args *args) + const void *voidargs, + const struct nfs4_xdr_opaque_data *ff_opaque) { + const struct nfs4_layoutreturn_args *args = voidargs; struct pnfs_layout_hdr *lo = args->layout; struct nfs4_flexfile_layout *flo = FF_LAYOUT_FROM_HDR(lo); __be32 *start; @@ -2027,13 +2025,52 @@ ff_layout_encode_layoutreturn(struct xdr_stream *xdr, start = xdr_reserve_space(xdr, 4); BUG_ON(!start); - ff_layout_encode_ioerr(flo, xdr, args); + ff_layout_encode_ioerr(flo, xdr, args, ff_opaque->data); ff_layout_encode_iostats(flo, xdr, args); *start = cpu_to_be32((xdr->p - start - 1) * 4); dprintk("%s: Return\n", __func__); } +static void +ff_layout_free_layoutreturn(struct nfs4_xdr_opaque_data *args) +{ + struct nfs4_flexfile_layoutreturn_args *ff_args; + + if (!args->data) + return; + ff_args = args->data; + args->data = NULL; + + ff_layout_free_ds_ioerr(&ff_args->errors); + + kfree(ff_args); +} + +const struct nfs4_xdr_opaque_ops layoutreturn_ops = { + .encode = ff_layout_encode_layoutreturn, + .free = ff_layout_free_layoutreturn, +}; + +static int +ff_layout_prepare_layoutreturn(struct nfs4_layoutreturn_args *args) +{ + struct nfs4_flexfile_layoutreturn_args *ff_args; + + ff_args = kmalloc(sizeof(*ff_args), GFP_KERNEL); + if (!ff_args) + return -ENOMEM; + + INIT_LIST_HEAD(&ff_args->errors); + ff_args->num_errors = ff_layout_fetch_ds_ioerr(args->layout, + &args->range, &ff_args->errors, + FF_LAYOUTRETURN_MAXERR); + + args->ld_private->ops = &layoutreturn_ops; + args->ld_private->data = ff_args; + return 0; +} + static int ff_layout_ntop4(const struct sockaddr *sap, char *buf, const size_t buflen) { @@ -2299,7 +2336,7 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = { .read_pagelist = ff_layout_read_pagelist, .write_pagelist = ff_layout_write_pagelist, .alloc_deviceid_node = ff_layout_alloc_deviceid_node, - .encode_layoutreturn = ff_layout_encode_layoutreturn, + .prepare_layoutreturn = ff_layout_prepare_layoutreturn, .sync = pnfs_nfs_generic_sync, .prepare_layoutstats = ff_layout_prepare_layoutstats, .cleanup_layoutstats = ff_layout_cleanup_layoutstats, diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 09f292e..560c837 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -106,6 +106,11 @@ struct nfs4_flexfile_layout { ktime_t last_report_time; /* Layoutstat report times */ }; +struct nfs4_flexfile_layoutreturn_args { + struct list_head errors; + unsigned int num_errors; +}; + static inline struct nfs4_flexfile_layout * FF_LAYOUT_FROM_HDR(struct pnfs_layout_hdr *lo) { @@ -183,9 +188,12 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, struct nfs4_ff_layout_mirror *mirror, u64 offset, u64 length, int status, enum nfs_opnum4 opnum, gfp_t gfp_flags); -int ff_layout_encode_ds_ioerr(struct nfs4_flexfile_layout *flo, - struct xdr_stream *xdr, int *count, - const struct pnfs_layout_range *range); +int ff_layout_encode_ds_ioerr(struct xdr_stream *xdr, const struct list_head *head); +void ff_layout_free_ds_ioerr(struct list_head *head); +unsigned int ff_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo, + const struct pnfs_layout_range *range, + struct list_head *head, + unsigned int maxnum); struct nfs_fh * nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx); diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 75767aa..eb98395 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -447,20 +447,26 @@ nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg, u32 ds_idx, } } +void ff_layout_free_ds_ioerr(struct list_head *head) +{ + struct nfs4_ff_layout_ds_err *err; + + while (!list_empty(head)) { + err = list_first_entry(head, + struct nfs4_ff_layout_ds_err, + list); + list_del(&err->list); + kfree(err); + } +} + /* called with inode i_lock held */ -int ff_layout_encode_ds_ioerr(struct nfs4_flexfile_layout *flo, - struct xdr_stream *xdr, int *count, - const struct pnfs_layout_range *range) +int ff_layout_encode_ds_ioerr(struct xdr_stream *xdr, const struct list_head *head) { - struct nfs4_ff_layout_ds_err *err, *n; + struct nfs4_ff_layout_ds_err *err; __be32 *p; - list_for_each_entry_safe(err, n, &flo->error_list, list) { - if (!pnfs_is_range_intersecting(err->offset, - pnfs_end_offset(err->offset, err->length), - range->offset, - pnfs_end_offset(range->offset, range->length))) - continue; + list_for_each_entry(err, head, list) { /* offset(8) + length(8) + stateid(NFS4_STATEID_SIZE) * + array length + deviceid(NFS4_DEVICEID4_SIZE) * + status(4) + opnum(4) @@ -479,17 +485,59 @@ int ff_layout_encode_ds_ioerr(struct nfs4_flexfile_layout *flo, NFS4_DEVICEID4_SIZE); *p++ = cpu_to_be32(err->status); *p++ = cpu_to_be32(err->opnum); - *count += 1; - list_del(&err->list); - dprintk("%s: offset %llu length %llu status %d op %d count %d\n", + dprintk("%s: offset %llu length %llu status %d op %d\n", __func__, err->offset, err->length, err->status, - err->opnum, *count); - kfree(err); + err->opnum); } return 0; } +static +unsigned int do_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo, + const struct pnfs_layout_range *range, + struct list_head *head, + unsigned int maxnum) +{ + struct nfs4_flexfile_layout *flo = FF_LAYOUT_FROM_HDR(lo); + struct inode *inode = lo->plh_inode; + struct nfs4_ff_layout_ds_err *err, *n; + unsigned int ret = 0; + + spin_lock(&inode->i_lock); + list_for_each_entry_safe(err, n, &flo->error_list, list) { + if (!pnfs_is_range_intersecting(err->offset, + pnfs_end_offset(err->offset, err->length), + range->offset, + pnfs_end_offset(range->offset, range->length))) + continue; + if (!maxnum) + break; + list_move(&err->list, head); + maxnum--; + ret++; + } + spin_unlock(&inode->i_lock); + return ret; +} + +unsigned int ff_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo, + const struct pnfs_layout_range *range, + struct list_head *head, + unsigned int maxnum) +{ + unsigned int ret; + + ret = do_layout_fetch_ds_ioerr(lo, range, head, maxnum); + /* If we're over the max, discard all remaining entries */ + if (ret == maxnum) { + LIST_HEAD(discard); + do_layout_fetch_ds_ioerr(lo, range, &discard, -1); + ff_layout_free_ds_ioerr(&discard); + } + return ret; +} + static bool ff_read_layout_has_available_ds(struct pnfs_layout_segment *lseg) { struct nfs4_ff_layout_mirror *mirror; -- cgit v1.1 From 08e2e5bc6c9a1b0e92da5225080bbb33786abd08 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 29 Sep 2016 09:15:27 -0700 Subject: pNFS/flexfiles: Clean up layoutstats Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 27 +++++++-------------------- 1 file changed, 7 insertions(+), 20 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 7f31c35..f61df16 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -2227,14 +2227,13 @@ ff_layout_encode_layoutstats(struct xdr_stream *xdr, } static int -ff_layout_mirror_prepare_stats(struct nfs42_layoutstat_args *args, - struct pnfs_layout_hdr *lo, +ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo, + struct nfs42_layoutstat_devinfo *devinfo, int dev_limit) { struct nfs4_flexfile_layout *ff_layout = FF_LAYOUT_FROM_HDR(lo); struct nfs4_ff_layout_mirror *mirror; struct nfs4_deviceid_node *dev; - struct nfs42_layoutstat_devinfo *devinfo; int i = 0; list_for_each_entry(mirror, &ff_layout->mirrors, mirrors) { @@ -2248,7 +2247,6 @@ ff_layout_mirror_prepare_stats(struct nfs42_layoutstat_args *args, if (!atomic_inc_not_zero(&mirror->ref)) continue; dev = &mirror->mirror_ds->id_node; - devinfo = &args->devinfo[i]; memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE); devinfo->offset = 0; devinfo->length = NFS4_MAX_UINT64; @@ -2260,6 +2258,7 @@ ff_layout_mirror_prepare_stats(struct nfs42_layoutstat_args *args, devinfo->layoutstats_encode = ff_layout_encode_layoutstats; devinfo->layout_private = mirror; + devinfo++; i++; } return i; @@ -2269,29 +2268,17 @@ static int ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args) { struct nfs4_flexfile_layout *ff_layout; - struct nfs4_ff_layout_mirror *mirror; - int dev_count = 0; + const int dev_count = PNFS_LAYOUTSTATS_MAXDEV; - spin_lock(&args->inode->i_lock); - ff_layout = FF_LAYOUT_FROM_HDR(NFS_I(args->inode)->layout); - list_for_each_entry(mirror, &ff_layout->mirrors, mirrors) { - if (atomic_read(&mirror->ref) != 0) - dev_count ++; - } - spin_unlock(&args->inode->i_lock); /* For now, send at most PNFS_LAYOUTSTATS_MAXDEV statistics */ - if (dev_count > PNFS_LAYOUTSTATS_MAXDEV) { - dprintk("%s: truncating devinfo to limit (%d:%d)\n", - __func__, dev_count, PNFS_LAYOUTSTATS_MAXDEV); - dev_count = PNFS_LAYOUTSTATS_MAXDEV; - } args->devinfo = kmalloc_array(dev_count, sizeof(*args->devinfo), GFP_NOIO); if (!args->devinfo) return -ENOMEM; spin_lock(&args->inode->i_lock); - args->num_dev = ff_layout_mirror_prepare_stats(args, - &ff_layout->generic_hdr, dev_count); + ff_layout = FF_LAYOUT_FROM_HDR(NFS_I(args->inode)->layout); + args->num_dev = ff_layout_mirror_prepare_stats(&ff_layout->generic_hdr, + &args->devinfo[0], dev_count); spin_unlock(&args->inode->i_lock); if (!args->num_dev) { kfree(args->devinfo); -- cgit v1.1 From 2f8220c16ee0055f7ea541782651b30398752ee4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 3 Oct 2016 18:13:20 -0400 Subject: NFS: Fix up read of mirror stats Need to lock while reading in order to ensure 64-bit reads are correct. Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index f61df16..c18afd5 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -2250,10 +2250,12 @@ ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo, memcpy(&devinfo->dev_id, &dev->deviceid, NFS4_DEVICEID4_SIZE); devinfo->offset = 0; devinfo->length = NFS4_MAX_UINT64; + spin_lock(&mirror->lock); devinfo->read_count = mirror->read_stat.io_stat.ops_completed; devinfo->read_bytes = mirror->read_stat.io_stat.bytes_completed; devinfo->write_count = mirror->write_stat.io_stat.ops_completed; devinfo->write_bytes = mirror->write_stat.io_stat.bytes_completed; + spin_unlock(&mirror->lock); devinfo->layout_type = LAYOUT_FLEX_FILES; devinfo->layoutstats_encode = ff_layout_encode_layoutstats; devinfo->layout_private = mirror; -- cgit v1.1 From 422c93c881a1689b5ad99e231a65ee5c51d3b72a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 6 Oct 2016 17:53:20 -0400 Subject: pNFS/flexfiles: Minor refactoring before adding iostats to layoutreturn Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 59 ++++++++++++++++++++-------------- fs/nfs/nfs42proc.c | 9 ++++-- fs/nfs/nfs42xdr.c | 5 +-- fs/nfs/pnfs.h | 1 - 4 files changed, 43 insertions(+), 31 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index c18afd5..e507830 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1988,7 +1988,7 @@ static int ff_layout_encode_ioerr(struct nfs4_flexfile_layout *flo, } /* report nothing for now */ -static void ff_layout_encode_iostats(struct nfs4_flexfile_layout *flo, +static void ff_layout_encode_iostats_array(struct nfs4_flexfile_layout *flo, struct xdr_stream *xdr, const struct nfs4_layoutreturn_args *args) { @@ -2026,7 +2026,7 @@ ff_layout_encode_layoutreturn(struct xdr_stream *xdr, BUG_ON(!start); ff_layout_encode_ioerr(flo, xdr, args, ff_opaque->data); - ff_layout_encode_iostats(flo, xdr, args); + ff_layout_encode_iostats_array(flo, xdr, args); *start = cpu_to_be32((xdr->p - start - 1) * 4); dprintk("%s: Return\n", __func__); @@ -2191,21 +2191,18 @@ ff_layout_encode_io_latency(struct xdr_stream *xdr, } static void -ff_layout_encode_layoutstats(struct xdr_stream *xdr, - struct nfs42_layoutstat_args *args, - struct nfs42_layoutstat_devinfo *devinfo) +ff_layout_encode_ff_layoutupdate(struct xdr_stream *xdr, + const struct nfs42_layoutstat_devinfo *devinfo, + struct nfs4_ff_layout_mirror *mirror) { - struct nfs4_ff_layout_mirror *mirror = devinfo->layout_private; struct nfs4_pnfs_ds_addr *da; struct nfs4_pnfs_ds *ds = mirror->mirror_ds->ds; struct nfs_fh *fh = &mirror->fh_versions[0]; - __be32 *p, *start; + __be32 *p; da = list_first_entry(&ds->ds_addrs, struct nfs4_pnfs_ds_addr, da_node); dprintk("%s: DS %s: encoding address %s\n", __func__, ds->ds_remotestr, da->da_remotestr); - /* layoutupdate length */ - start = xdr_reserve_space(xdr, 4); /* netaddr4 */ ff_layout_encode_netaddr(xdr, da); /* nfs_fh4 */ @@ -2222,10 +2219,36 @@ ff_layout_encode_layoutstats(struct xdr_stream *xdr, /* bool */ p = xdr_reserve_space(xdr, 4); *p = cpu_to_be32(false); +} + +static void +ff_layout_encode_layoutstats(struct xdr_stream *xdr, const void *args, + const struct nfs4_xdr_opaque_data *opaque) +{ + struct nfs42_layoutstat_devinfo *devinfo = container_of(opaque, + struct nfs42_layoutstat_devinfo, ld_private); + __be32 *start; + + /* layoutupdate length */ + start = xdr_reserve_space(xdr, 4); + ff_layout_encode_ff_layoutupdate(xdr, devinfo, opaque->data); *start = cpu_to_be32((xdr->p - start - 1) * 4); } +static void +ff_layout_free_layoutstats(struct nfs4_xdr_opaque_data *opaque) +{ + struct nfs4_ff_layout_mirror *mirror = opaque->data; + + ff_layout_put_mirror(mirror); +} + +static const struct nfs4_xdr_opaque_ops layoutstat_ops = { + .encode = ff_layout_encode_layoutstats, + .free = ff_layout_free_layoutstats, +}; + static int ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo, struct nfs42_layoutstat_devinfo *devinfo, @@ -2257,8 +2280,8 @@ ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo, devinfo->write_bytes = mirror->write_stat.io_stat.bytes_completed; spin_unlock(&mirror->lock); devinfo->layout_type = LAYOUT_FLEX_FILES; - devinfo->layoutstats_encode = ff_layout_encode_layoutstats; - devinfo->layout_private = mirror; + devinfo->ld_private.ops = &layoutstat_ops; + devinfo->ld_private.data = mirror; devinfo++; i++; @@ -2291,19 +2314,6 @@ ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args) return 0; } -static void -ff_layout_cleanup_layoutstats(struct nfs42_layoutstat_data *data) -{ - struct nfs4_ff_layout_mirror *mirror; - int i; - - for (i = 0; i < data->args.num_dev; i++) { - mirror = data->args.devinfo[i].layout_private; - data->args.devinfo[i].layout_private = NULL; - ff_layout_put_mirror(mirror); - } -} - static struct pnfs_layoutdriver_type flexfilelayout_type = { .id = LAYOUT_FLEX_FILES, .name = "LAYOUT_FLEX_FILES", @@ -2328,7 +2338,6 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = { .prepare_layoutreturn = ff_layout_prepare_layoutreturn, .sync = pnfs_nfs_generic_sync, .prepare_layoutstats = ff_layout_prepare_layoutstats, - .cleanup_layoutstats = ff_layout_cleanup_layoutstats, }; static int __init nfs4flexfilelayout_init(void) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 6085019..d12ff93 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -397,10 +397,13 @@ static void nfs42_layoutstat_release(void *calldata) { struct nfs42_layoutstat_data *data = calldata; - struct nfs_server *nfss = NFS_SERVER(data->args.inode); + struct nfs42_layoutstat_devinfo *devinfo = data->args.devinfo; + int i; - if (nfss->pnfs_curr_ld->cleanup_layoutstats) - nfss->pnfs_curr_ld->cleanup_layoutstats(data); + for (i = 0; i < data->args.num_dev; i++) { + if (devinfo[i].ld_private.ops && devinfo[i].ld_private.ops->free) + devinfo[i].ld_private.ops->free(&devinfo[i].ld_private); + } pnfs_put_layout_hdr(NFS_I(data->args.inode)->layout); smp_mb__before_atomic(); diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 8b26058..6c72964 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -181,8 +181,9 @@ static void encode_layoutstats(struct xdr_stream *xdr, NFS4_DEVICEID4_SIZE); /* Encode layoutupdate4 */ *p++ = cpu_to_be32(devinfo->layout_type); - if (devinfo->layoutstats_encode != NULL) - devinfo->layoutstats_encode(xdr, args, devinfo); + if (devinfo->ld_private.ops) + devinfo->ld_private.ops->encode(xdr, args, + &devinfo->ld_private); else encode_uint32(xdr, 0); } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index d1e0281..63f77b4 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -182,7 +182,6 @@ struct pnfs_layoutdriver_type { struct xdr_stream *xdr, const struct nfs4_layoutcommit_args *args); int (*prepare_layoutstats) (struct nfs42_layoutstat_args *args); - void (*cleanup_layoutstats) (struct nfs42_layoutstat_data *data); }; struct pnfs_layout_hdr { -- cgit v1.1 From 230bc962a6ffef8b15ac1fd2664ae9d4b56a64a6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 19 Oct 2016 15:59:28 -0400 Subject: pNFS/flexfiles: Support sending layoutstats in layoutreturn Add the ability to send an array of layoutstats entries as part of layoutreturn. Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 82 +++++++++++++++++++++++++++++++--- fs/nfs/flexfilelayout/flexfilelayout.h | 3 ++ 2 files changed, 79 insertions(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index e507830..ca1012a 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -32,6 +32,12 @@ static struct group_info *ff_zero_group; static void ff_layout_read_record_layoutstats_done(struct rpc_task *task, struct nfs_pgio_header *hdr); +static int ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo, + struct nfs42_layoutstat_devinfo *devinfo, + int dev_limit); +static void ff_layout_encode_ff_layoutupdate(struct xdr_stream *xdr, + const struct nfs42_layoutstat_devinfo *devinfo, + struct nfs4_ff_layout_mirror *mirror); static struct pnfs_layout_hdr * ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) @@ -1987,16 +1993,73 @@ static int ff_layout_encode_ioerr(struct nfs4_flexfile_layout *flo, return ff_layout_encode_ds_ioerr(xdr, &ff_args->errors); } +static void +encode_opaque_fixed(struct xdr_stream *xdr, const void *buf, size_t len) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, len); + xdr_encode_opaque_fixed(p, buf, len); +} + +static void +ff_layout_encode_ff_iostat_head(struct xdr_stream *xdr, + const nfs4_stateid *stateid, + const struct nfs42_layoutstat_devinfo *devinfo) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, 8 + 8); + p = xdr_encode_hyper(p, devinfo->offset); + p = xdr_encode_hyper(p, devinfo->length); + encode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE); + p = xdr_reserve_space(xdr, 4*8); + p = xdr_encode_hyper(p, devinfo->read_count); + p = xdr_encode_hyper(p, devinfo->read_bytes); + p = xdr_encode_hyper(p, devinfo->write_count); + p = xdr_encode_hyper(p, devinfo->write_bytes); + encode_opaque_fixed(xdr, devinfo->dev_id.data, NFS4_DEVICEID4_SIZE); +} + +static void +ff_layout_encode_ff_iostat(struct xdr_stream *xdr, + const nfs4_stateid *stateid, + const struct nfs42_layoutstat_devinfo *devinfo) +{ + ff_layout_encode_ff_iostat_head(xdr, stateid, devinfo); + ff_layout_encode_ff_layoutupdate(xdr, devinfo, + devinfo->ld_private.data); +} + /* report nothing for now */ -static void ff_layout_encode_iostats_array(struct nfs4_flexfile_layout *flo, - struct xdr_stream *xdr, - const struct nfs4_layoutreturn_args *args) +static void ff_layout_encode_iostats_array(struct xdr_stream *xdr, + const struct nfs4_layoutreturn_args *args, + struct nfs4_flexfile_layoutreturn_args *ff_args) { __be32 *p; + int i; p = xdr_reserve_space(xdr, 4); - if (likely(p)) - *p = cpu_to_be32(0); + *p = cpu_to_be32(ff_args->num_dev); + for (i = 0; i < ff_args->num_dev; i++) + ff_layout_encode_ff_iostat(xdr, + &args->layout->plh_stateid, + &ff_args->devinfo[i]); +} + +static void +ff_layout_free_iostats_array(struct nfs42_layoutstat_devinfo *devinfo, + unsigned int num_entries) +{ + unsigned int i; + + for (i = 0; i < num_entries; i++) { + if (!devinfo[i].ld_private.ops) + continue; + if (!devinfo[i].ld_private.ops->free) + continue; + devinfo[i].ld_private.ops->free(&devinfo[i].ld_private); + } } static struct nfs4_deviceid_node * @@ -2026,7 +2089,7 @@ ff_layout_encode_layoutreturn(struct xdr_stream *xdr, BUG_ON(!start); ff_layout_encode_ioerr(flo, xdr, args, ff_opaque->data); - ff_layout_encode_iostats_array(flo, xdr, args); + ff_layout_encode_iostats_array(xdr, args, ff_opaque->data); *start = cpu_to_be32((xdr->p - start - 1) * 4); dprintk("%s: Return\n", __func__); @@ -2043,6 +2106,7 @@ ff_layout_free_layoutreturn(struct nfs4_xdr_opaque_data *args) args->data = NULL; ff_layout_free_ds_ioerr(&ff_args->errors); + ff_layout_free_iostats_array(ff_args->devinfo, ff_args->num_dev); kfree(ff_args); } @@ -2056,6 +2120,7 @@ static int ff_layout_prepare_layoutreturn(struct nfs4_layoutreturn_args *args) { struct nfs4_flexfile_layoutreturn_args *ff_args; + struct nfs4_flexfile_layout *ff_layout = FF_LAYOUT_FROM_HDR(args->layout); ff_args = kmalloc(sizeof(*ff_args), GFP_KERNEL); if (!ff_args) @@ -2066,6 +2131,11 @@ ff_layout_prepare_layoutreturn(struct nfs4_layoutreturn_args *args) &args->range, &ff_args->errors, FF_LAYOUTRETURN_MAXERR); + spin_lock(&args->inode->i_lock); + ff_args->num_dev = ff_layout_mirror_prepare_stats(&ff_layout->generic_hdr, + &ff_args->devinfo[0], ARRAY_SIZE(ff_args->devinfo)); + spin_unlock(&args->inode->i_lock); + args->ld_private->ops = &layoutreturn_ops; args->ld_private->data = ff_args; return 0; diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 560c837..35221fe 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -21,6 +21,7 @@ /* LAYOUTSTATS report interval in ms */ #define FF_LAYOUTSTATS_REPORT_INTERVAL (60000L) +#define FF_LAYOUTSTATS_MAXDEV 4 struct nfs4_ff_ds_version { u32 version; @@ -108,7 +109,9 @@ struct nfs4_flexfile_layout { struct nfs4_flexfile_layoutreturn_args { struct list_head errors; + struct nfs42_layoutstat_devinfo devinfo[FF_LAYOUTSTATS_MAXDEV]; unsigned int num_errors; + unsigned int num_dev; }; static inline struct nfs4_flexfile_layout * -- cgit v1.1 From 10727772b93cc80c859b2e45bb32976d79b21990 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 4 Dec 2016 16:02:43 -0500 Subject: NFS: Fix incorrect mapping revalidation when holding a delegation We should only care about checking the attributes if the page cache is marked as dubious (using NFS_INO_REVAL_PAGECACHE) and the NFS_INO_REVAL_FORCED flag is set. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 75f5a9c..df4d7ec 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1114,9 +1114,15 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map static bool nfs_mapping_need_revalidate_inode(struct inode *inode) { - if (nfs_have_delegated_attributes(inode)) - return false; - return (NFS_I(inode)->cache_validity & NFS_INO_REVAL_PAGECACHE) + unsigned long cache_validity = NFS_I(inode)->cache_validity; + + if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) { + const unsigned long force_reval = + NFS_INO_REVAL_PAGECACHE|NFS_INO_REVAL_FORCED; + return (cache_validity & force_reval) == force_reval; + } + + return (cache_validity & NFS_INO_REVAL_PAGECACHE) || nfs_attribute_timeout(inode) || NFS_STALE(inode); } -- cgit v1.1 From 9310b224f2ecc8bb075b86d753a2f359e3e1ac85 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 4 Dec 2016 18:08:40 -0500 Subject: NFS: Fix incorrect size revalidation when holding a delegation We should only care about checking the attributes if the page cache is marked as dubious (using NFS_INO_REVAL_PAGECACHE) and the NFS_INO_REVAL_FORCED flag is set. Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 9ea85ae..64c11f39 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -102,8 +102,11 @@ static int nfs_revalidate_file_size(struct inode *inode, struct file *filp) { struct nfs_server *server = NFS_SERVER(inode); struct nfs_inode *nfsi = NFS_I(inode); + const unsigned long force_reval = NFS_INO_REVAL_PAGECACHE|NFS_INO_REVAL_FORCED; + unsigned long cache_validity = nfsi->cache_validity; - if (nfs_have_delegated_attributes(inode)) + if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) && + (cache_validity & force_reval) != force_reval) goto out_noreval; if (filp->f_flags & O_DIRECT) -- cgit v1.1 From 1cd9cb05f96e526f41bb4704caa95dc40ed08c5d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 4 Dec 2016 18:34:34 -0500 Subject: NFS: Only look at the change attribute cache state in nfs_check_verifier When looking at whether or not our dcache is valid, we really don't care about the general state of the directory attribute cache. Instead, we we only care about the state of the change attribute. This fixes a performance issue when the client is responsible for changing the directory contents; a number of NFSv4 operations will atomically update the directory change attribute, but may not return all the other attributes. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 14 ++++++-------- fs/nfs/inode.c | 2 +- 2 files changed, 7 insertions(+), 9 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index f570245..7483722 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1030,8 +1030,6 @@ EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate); static int nfs_check_verifier(struct inode *dir, struct dentry *dentry, int rcu_walk) { - int ret; - if (IS_ROOT(dentry)) return 1; if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) @@ -1039,12 +1037,12 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry, if (!nfs_verify_change_attribute(dir, dentry->d_time)) return 0; /* Revalidate nfsi->cache_change_attribute before we declare a match */ - if (rcu_walk) - ret = nfs_revalidate_inode_rcu(NFS_SERVER(dir), dir); - else - ret = nfs_revalidate_inode(NFS_SERVER(dir), dir); - if (ret < 0) - return 0; + if (nfs_mapping_need_revalidate_inode(dir)) { + if (rcu_walk) + return 0; + if (__nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0) + return 0; + } if (!nfs_verify_change_attribute(dir, dentry->d_time)) return 0; return 1; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index df4d7ec..7de345f 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1112,7 +1112,7 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map return 0; } -static bool nfs_mapping_need_revalidate_inode(struct inode *inode) +bool nfs_mapping_need_revalidate_inode(struct inode *inode) { unsigned long cache_validity = NFS_I(inode)->cache_validity; -- cgit v1.1 From 2cf10cdd486c362f983abdce00dc1127e8ab8c59 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 4 Dec 2016 19:26:40 -0500 Subject: NFSv4.1: Handle NFS4ERR_BADSESSION/NFS4ERR_DEADSESSION replies to OP_SEQUENCE In the case where SEQUENCE receives a NFS4ERR_BADSESSION or NFS4ERR_DEADSESSION error, we just want to report the session as needing recovery, and then we want to retry the operation. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f992281..4fd0e2b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -816,6 +816,10 @@ static int nfs41_sequence_process(struct rpc_task *task, case -NFS4ERR_SEQ_FALSE_RETRY: ++slot->seq_nr; goto retry_nowait; + case -NFS4ERR_DEADSESSION: + case -NFS4ERR_BADSESSION: + nfs4_schedule_session_recovery(session, res->sr_status); + goto retry_nowait; default: /* Just update the slot sequence no. */ slot->seq_done = 1; -- cgit v1.1 From d94cbf6c73324a008dfb8576f15d089d7f707f24 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 4 Dec 2016 19:34:38 -0500 Subject: NFSv4.1: Don't schedule lease recovery in nfs4_schedule_session_recovery() If the session has an error, then we want to start by recovering the session, as any SEQUENCE we send is going to fail with a session error. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 26b6b8b..95baf7d 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2193,7 +2193,7 @@ void nfs4_schedule_session_recovery(struct nfs4_session *session, int err) case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state); } - nfs4_schedule_lease_recovery(clp); + nfs4_schedule_state_manager(clp); } EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery); -- cgit v1.1 From 362fb578a573adeb67aa4790d901d851b8f8c8f3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 5 Dec 2016 17:33:07 -0500 Subject: pNFS: Release NFS_LAYOUT_RETURN when invalidating the layout stateid Ensure we release the NFS_LAYOUT_RETURN lock when we invalidate the layout stateid, so that processes and RPC tasks that are waiting on the layout return can continue. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 2a34313..896df7b 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -327,6 +327,15 @@ pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo) clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); } +static void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) +{ + clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); + clear_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags); + smp_mb__after_atomic(); + wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); + rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); +} + static void pnfs_clear_lseg_state(struct pnfs_layout_segment *lseg, struct list_head *free_me) @@ -362,6 +371,9 @@ pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) pnfs_clear_lseg_state(lseg, lseg_list); pnfs_free_returned_lsegs(lo, lseg_list, &range, 0); + if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) && + !test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) + pnfs_clear_layoutreturn_waitbit(lo); return !list_empty(&lo->plh_segs); } @@ -984,15 +996,6 @@ static void pnfs_clear_layoutcommit(struct inode *inode, } } -static void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) -{ - clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); - clear_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags); - smp_mb__after_atomic(); - wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); - rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); -} - void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo, const nfs4_stateid *arg_stateid, const struct pnfs_layout_range *range, -- cgit v1.1 From 7a0566b38c518e98df2359c8c15c2b3f91a4d67e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 6 Dec 2016 15:50:06 -0500 Subject: NFSv4: Add missing nfs_put_lock_context() Otherwise the lock context won't be freed when we're done with it. From: NeilBrown Fixes: 5bd3f817 ("NFSv4: change nfs4_select_rw_stateid to take a lock_context inplace of lock_owner") Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4fd0e2b..d3431ff 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2955,8 +2955,10 @@ static int _nfs4_do_setattr(struct inode *inode, l_ctx = nfs_get_lock_context(ctx); if (IS_ERR(l_ctx)) return PTR_ERR(l_ctx); - if (nfs4_select_rw_stateid(ctx->state, FMODE_WRITE, l_ctx, - &arg->stateid, &delegation_cred) == -EIO) + status = nfs4_select_rw_stateid(ctx->state, FMODE_WRITE, l_ctx, + &arg->stateid, &delegation_cred); + nfs_put_lock_context(l_ctx); + if (status == -EIO) return -EBADF; } else nfs4_stateid_copy(&arg->stateid, &zero_stateid); -- cgit v1.1 From cb067935175ca477380806dc80bf5f0bb51f6f71 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 6 Dec 2016 12:00:51 -0500 Subject: pNFS/flexfiles: Fix ff_layout_add_ds_error_locked() When we're merging an old entry into our new entry, we want to ensure that we add the list entry in the correct place. Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index eb98395..142bfd0 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -254,8 +254,9 @@ ff_layout_add_ds_error_locked(struct nfs4_flexfile_layout *flo, } /* Entries match, so merge "err" into "dserr" */ extend_ds_error(dserr, err->offset, err->length); - list_del(&err->list); + list_replace(&err->list, &dserr->list); kfree(err); + return; } list_add_tail(&dserr->list, head); -- cgit v1.1 From 2f065ddb64193ebf9cd600395d4782287cd0f58e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 7 Dec 2016 12:29:26 -0500 Subject: pNFS: Layoutreturn must free the layout after the layout-private data The layout-private data may depend on the layout and/or the inode still existing when it does post-processing and frees its data, so we need to free them after calling lrp->ld_private.ops->free(). This fixes a mirror list corruption issue in the flexfiles driver. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d3431ff..c5a5086 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8641,10 +8641,10 @@ static void nfs4_layoutreturn_release(void *calldata) pnfs_layoutreturn_free_lsegs(lo, &lrp->args.stateid, &lrp->args.range, lrp->res.lrs_present ? &lrp->res.stateid : NULL); nfs4_sequence_free_slot(&lrp->res.seq_res); - pnfs_put_layout_hdr(lrp->args.layout); - nfs_iput_and_deactive(lrp->inode); if (lrp->ld_private.ops && lrp->ld_private.ops->free) lrp->ld_private.ops->free(&lrp->ld_private); + pnfs_put_layout_hdr(lrp->args.layout); + nfs_iput_and_deactive(lrp->inode); kfree(calldata); dprintk("<-- %s\n", __func__); } -- cgit v1.1 From 65990d1afbd2d6fc23c6ecbd6f1899aa760a024f Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 30 Sep 2016 14:37:41 -0400 Subject: pNFS/flexfiles: Fix a deadlock on LAYOUTGET We encountered a deadlock where the SEQUENCE that accompanied the LAYOUTGET triggered a session drain, while ff_layout_alloc_lseg triggered a GETDEVICEINFO. The GETDEVICEINFO hung waiting for the session drain, while the LAYOUTGET held the slot waiting for alloc_lseg to finish. Avoid this by moving the call to nfs4_find_get_deviceid out of ff_layout_alloc_lseg and into nfs4_ff_layout_prepare_ds. Signed-off-by: Fred Isaman [dros@primarydata.com: pNFS/flexfiles: fix races in ff_layout_mirror_valid] Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 37 ++--------------------- fs/nfs/flexfilelayout/flexfilelayout.h | 2 +- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 50 ++++++++++++++++++++++++------- 3 files changed, 43 insertions(+), 46 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index ca1012a..ef4c9d1 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -183,7 +183,7 @@ ff_layout_add_mirror(struct pnfs_layout_hdr *lo, spin_lock(&inode->i_lock); list_for_each_entry(pos, &ff_layout->mirrors, mirrors) { - if (mirror->mirror_ds != pos->mirror_ds) + if (memcmp(&mirror->devid, &pos->devid, sizeof(pos->devid)) != 0) continue; if (!ff_mirror_match_fh(mirror, pos)) continue; @@ -360,19 +360,6 @@ static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls) } } -static void ff_layout_mark_devices_valid(struct nfs4_ff_layout_segment *fls) -{ - struct nfs4_deviceid_node *node; - int i; - - if (!(fls->flags & FF_FLAGS_NO_IO_THRU_MDS)) - return; - for (i = 0; i < fls->mirror_array_cnt; i++) { - node = &fls->mirror_array[i]->mirror_ds->id_node; - clear_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags); - } -} - static struct pnfs_layout_segment * ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, struct nfs4_layoutget_res *lgr, @@ -426,8 +413,6 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, for (i = 0; i < fls->mirror_array_cnt; i++) { struct nfs4_ff_layout_mirror *mirror; - struct nfs4_deviceid devid; - struct nfs4_deviceid_node *idnode; struct auth_cred acred = { .group_info = ff_zero_group }; struct rpc_cred __rcu *cred; u32 ds_count, fh_count, id; @@ -452,24 +437,10 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, fls->mirror_array[i]->ds_count = ds_count; /* deviceid */ - rc = decode_deviceid(&stream, &devid); + rc = decode_deviceid(&stream, &fls->mirror_array[i]->devid); if (rc) goto out_err_free; - idnode = nfs4_find_get_deviceid(NFS_SERVER(lh->plh_inode), - &devid, lh->plh_lc_cred, - gfp_flags); - /* - * upon success, mirror_ds is allocated by previous - * getdeviceinfo, or newly by .alloc_deviceid_node - * nfs4_find_get_deviceid failure is indeed getdeviceinfo falure - */ - if (idnode) - fls->mirror_array[i]->mirror_ds = - FF_LAYOUT_MIRROR_DS(idnode); - else - goto out_err_free; - /* efficiency */ rc = -EIO; p = xdr_inline_decode(&stream, 4); @@ -567,8 +538,6 @@ out_sort_mirrors: rc = ff_layout_check_layout(lgr); if (rc) goto out_err_free; - ff_layout_mark_devices_valid(fls); - ret = &fls->generic_hdr; dprintk("<-- %s (success)\n", __func__); out_free_page: @@ -2332,7 +2301,7 @@ ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo, list_for_each_entry(mirror, &ff_layout->mirrors, mirrors) { if (i >= dev_limit) break; - if (!mirror->mirror_ds) + if (IS_ERR_OR_NULL(mirror->mirror_ds)) continue; if (!test_and_clear_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags)) continue; diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 35221fe..7223c4e 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -74,6 +74,7 @@ struct nfs4_ff_layout_mirror { struct list_head mirrors; u32 ds_count; u32 efficiency; + struct nfs4_deviceid devid; struct nfs4_ff_layout_ds *mirror_ds; u32 fh_versions_cnt; struct nfs_fh *fh_versions; @@ -211,7 +212,6 @@ nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg, struct inode *inode); struct rpc_cred *ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, u32 ds_idx, struct rpc_cred *mdscred); -bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg); bool ff_layout_avoid_mds_available_ds(struct pnfs_layout_segment *lseg); bool ff_layout_avoid_read_on_rw(struct pnfs_layout_segment *lseg); diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 142bfd0..3cc39d1 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -20,9 +20,11 @@ static unsigned int dataserver_timeo = NFS_DEF_TCP_RETRANS; static unsigned int dataserver_retrans; +static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg); + void nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds) { - if (mirror_ds) + if (!IS_ERR_OR_NULL(mirror_ds)) nfs4_put_deviceid_node(&mirror_ds->id_node); } @@ -182,12 +184,29 @@ static void ff_layout_mark_devid_invalid(struct pnfs_layout_segment *lseg, } static bool ff_layout_mirror_valid(struct pnfs_layout_segment *lseg, - struct nfs4_ff_layout_mirror *mirror) + struct nfs4_ff_layout_mirror *mirror, + bool create) { - if (mirror == NULL || mirror->mirror_ds == NULL) { - pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, - lseg); - return false; + if (mirror == NULL || IS_ERR(mirror->mirror_ds)) + goto outerr; + if (mirror->mirror_ds == NULL) { + if (create) { + struct nfs4_deviceid_node *node; + struct pnfs_layout_hdr *lh = lseg->pls_layout; + struct nfs4_ff_layout_ds *mirror_ds = ERR_PTR(-ENODEV); + + node = nfs4_find_get_deviceid(NFS_SERVER(lh->plh_inode), + &mirror->devid, lh->plh_lc_cred, + GFP_KERNEL); + if (node) + mirror_ds = FF_LAYOUT_MIRROR_DS(node); + + /* check for race with another call to this function */ + if (cmpxchg(&mirror->mirror_ds, NULL, mirror_ds) && + mirror_ds != ERR_PTR(-ENODEV)) + nfs4_put_deviceid_node(node); + } else + goto outerr; } if (mirror->mirror_ds->ds == NULL) { struct nfs4_deviceid_node *devid; @@ -196,6 +215,9 @@ static bool ff_layout_mirror_valid(struct pnfs_layout_segment *lseg, return false; } return true; +outerr: + pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, lseg); + return false; } static void extend_ds_error(struct nfs4_ff_layout_ds_err *err, @@ -323,7 +345,7 @@ nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx) struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, mirror_idx); struct nfs_fh *fh = NULL; - if (!ff_layout_mirror_valid(lseg, mirror)) { + if (!ff_layout_mirror_valid(lseg, mirror, false)) { pr_err_ratelimited("NFS: %s: No data server for mirror offset index %d\n", __func__, mirror_idx); goto out; @@ -363,7 +385,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, struct nfs_server *s = NFS_SERVER(ino); unsigned int max_payload; - if (!ff_layout_mirror_valid(lseg, mirror)) { + if (!ff_layout_mirror_valid(lseg, mirror, true)) { pr_err_ratelimited("NFS: %s: No data server for offset index %d\n", __func__, ds_idx); goto out; @@ -547,7 +569,11 @@ static bool ff_read_layout_has_available_ds(struct pnfs_layout_segment *lseg) for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) { mirror = FF_LAYOUT_COMP(lseg, idx); - if (mirror && mirror->mirror_ds) { + if (mirror) { + if (!mirror->mirror_ds) + return true; + if (IS_ERR(mirror->mirror_ds)) + continue; devid = &mirror->mirror_ds->id_node; if (!ff_layout_test_devid_unavailable(devid)) return true; @@ -565,8 +591,10 @@ static bool ff_rw_layout_has_available_ds(struct pnfs_layout_segment *lseg) for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) { mirror = FF_LAYOUT_COMP(lseg, idx); - if (!mirror || !mirror->mirror_ds) + if (!mirror || IS_ERR(mirror->mirror_ds)) return false; + if (!mirror->mirror_ds) + continue; devid = &mirror->mirror_ds->id_node; if (ff_layout_test_devid_unavailable(devid)) return false; @@ -575,7 +603,7 @@ static bool ff_rw_layout_has_available_ds(struct pnfs_layout_segment *lseg) return FF_LAYOUT_MIRROR_COUNT(lseg) != 0; } -bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg) +static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg) { if (lseg->pls_range.iomode == IOMODE_READ) return ff_read_layout_has_available_ds(lseg); -- cgit v1.1 From 5ba6a09e92342e40b63af1654da8b8bc8b5a83c6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 9 Dec 2016 18:24:18 -0500 Subject: pNFS/flexfiles: Remove a redundant parameter in ff_layout_encode_ioerr() Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index ef4c9d1..4995444 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1946,8 +1946,7 @@ ff_layout_free_deviceid_node(struct nfs4_deviceid_node *d) id_node)); } -static int ff_layout_encode_ioerr(struct nfs4_flexfile_layout *flo, - struct xdr_stream *xdr, +static int ff_layout_encode_ioerr(struct xdr_stream *xdr, const struct nfs4_layoutreturn_args *args, const struct nfs4_flexfile_layoutreturn_args *ff_args) { @@ -2049,16 +2048,15 @@ ff_layout_encode_layoutreturn(struct xdr_stream *xdr, const struct nfs4_xdr_opaque_data *ff_opaque) { const struct nfs4_layoutreturn_args *args = voidargs; - struct pnfs_layout_hdr *lo = args->layout; - struct nfs4_flexfile_layout *flo = FF_LAYOUT_FROM_HDR(lo); + struct nfs4_flexfile_layoutreturn_args *ff_args = ff_opaque->data; __be32 *start; dprintk("%s: Begin\n", __func__); start = xdr_reserve_space(xdr, 4); BUG_ON(!start); - ff_layout_encode_ioerr(flo, xdr, args, ff_opaque->data); - ff_layout_encode_iostats_array(xdr, args, ff_opaque->data); + ff_layout_encode_ioerr(xdr, args, ff_args); + ff_layout_encode_iostats_array(xdr, args, ff_args); *start = cpu_to_be32((xdr->p - start - 1) * 4); dprintk("%s: Return\n", __func__); -- cgit v1.1 From d9152114f7c9abb096275b72db8527c004d57bf9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 9 Dec 2016 18:07:51 -0500 Subject: pNFS/flexfiles: Ensure we have enough buffer for layoutreturn The flexfiles client can piggyback both layout errors and layoutstats as part of the layoutreturn. Both these payloads can get large, with 20 layout error entries taking up about 1.2K, and 4 layoutstats entries taking up another 1K. This patch allows a maximum payload of 4k by allocating a full page. Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 32 ++++++++++++++++++++++++++------ fs/nfs/flexfilelayout/flexfilelayout.h | 1 + 2 files changed, 27 insertions(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 4995444..9e111d0 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -2049,16 +2049,28 @@ ff_layout_encode_layoutreturn(struct xdr_stream *xdr, { const struct nfs4_layoutreturn_args *args = voidargs; struct nfs4_flexfile_layoutreturn_args *ff_args = ff_opaque->data; + struct xdr_buf tmp_buf = { + .head = { + [0] = { + .iov_base = page_address(ff_args->pages[0]), + }, + }, + .buflen = PAGE_SIZE, + }; + struct xdr_stream tmp_xdr; __be32 *start; dprintk("%s: Begin\n", __func__); - start = xdr_reserve_space(xdr, 4); - BUG_ON(!start); - ff_layout_encode_ioerr(xdr, args, ff_args); - ff_layout_encode_iostats_array(xdr, args, ff_args); + xdr_init_encode(&tmp_xdr, &tmp_buf, NULL); + + ff_layout_encode_ioerr(&tmp_xdr, args, ff_args); + ff_layout_encode_iostats_array(&tmp_xdr, args, ff_args); + + start = xdr_reserve_space(xdr, 4); + *start = cpu_to_be32(tmp_buf.len); + xdr_write_pages(xdr, ff_args->pages, 0, tmp_buf.len); - *start = cpu_to_be32((xdr->p - start - 1) * 4); dprintk("%s: Return\n", __func__); } @@ -2075,6 +2087,7 @@ ff_layout_free_layoutreturn(struct nfs4_xdr_opaque_data *args) ff_layout_free_ds_ioerr(&ff_args->errors); ff_layout_free_iostats_array(ff_args->devinfo, ff_args->num_dev); + put_page(ff_args->pages[0]); kfree(ff_args); } @@ -2091,7 +2104,10 @@ ff_layout_prepare_layoutreturn(struct nfs4_layoutreturn_args *args) ff_args = kmalloc(sizeof(*ff_args), GFP_KERNEL); if (!ff_args) - return -ENOMEM; + goto out_nomem; + ff_args->pages[0] = alloc_page(GFP_KERNEL); + if (!ff_args->pages[0]) + goto out_nomem_free; INIT_LIST_HEAD(&ff_args->errors); ff_args->num_errors = ff_layout_fetch_ds_ioerr(args->layout, @@ -2106,6 +2122,10 @@ ff_layout_prepare_layoutreturn(struct nfs4_layoutreturn_args *args) args->ld_private->ops = &layoutreturn_ops; args->ld_private->data = ff_args; return 0; +out_nomem_free: + kfree(ff_args); +out_nomem: + return -ENOMEM; } static int diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 7223c4e..f4f39b0 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -113,6 +113,7 @@ struct nfs4_flexfile_layoutreturn_args { struct nfs42_layoutstat_devinfo devinfo[FF_LAYOUTSTATS_MAXDEV]; unsigned int num_errors; unsigned int num_dev; + struct page *pages[1]; }; static inline struct nfs4_flexfile_layout * -- cgit v1.1 From dff25ddb48086afcb434770caa3d6849a4489b85 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Fri, 2 Dec 2016 22:53:30 -0500 Subject: nfs: add support for the umask attribute Clients can set the umask attribute when creating files to cause the server to apply it always except when inheriting permissions from the parent directory. That way, the new files will end up with the same permissions as files created locally. See https://tools.ietf.org/html/draft-ietf-nfsv4-umask-02 for more details. Signed-off-by: Andreas Gruenbacher Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 7 ++++++- fs/nfs/nfs4proc.c | 16 ++++++++++++---- fs/nfs/nfs4xdr.c | 36 ++++++++++++++++++++++++------------ 3 files changed, 42 insertions(+), 17 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 7483722..cb22a9f 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1529,8 +1529,13 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, return -ENAMETOOLONG; if (open_flags & O_CREAT) { + struct nfs_server *server = NFS_SERVER(dir); + + if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK)) + mode &= ~current_umask(); + attr.ia_valid |= ATTR_MODE; - attr.ia_mode = mode & ~current_umask(); + attr.ia_mode = mode; } if (open_flags & O_TRUNC) { attr.ia_valid |= ATTR_SIZE; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c5a5086..d33242c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1224,6 +1224,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, atomic_inc(&sp->so_count); p->o_arg.open_flags = flags; p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); + p->o_arg.umask = current_umask(); p->o_arg.claim = nfs4_map_atomic_open_claim(server, claim); p->o_arg.share_access = nfs4_map_atomic_open_share(server, fmode, flags); @@ -3337,7 +3338,7 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) #define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL) #define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL) -#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_SECURITY_LABEL - 1UL) +#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_MODE_UMASK - 1UL) static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) { @@ -4010,6 +4011,7 @@ static int nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, int flags) { + struct nfs_server *server = NFS_SERVER(dir); struct nfs4_label l, *ilabel = NULL; struct nfs_open_context *ctx; struct nfs4_state *state; @@ -4021,7 +4023,8 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, ilabel = nfs4_label_init_security(dir, dentry, sattr, &l); - sattr->ia_mode &= ~current_umask(); + if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK)) + sattr->ia_mode &= ~current_umask(); state = nfs4_do_open(dir, ctx, flags, sattr, ilabel, NULL); if (IS_ERR(state)) { status = PTR_ERR(state); @@ -4229,6 +4232,7 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, data->arg.attrs = sattr; data->arg.ftype = ftype; data->arg.bitmask = nfs4_bitmask(server, data->label); + data->arg.umask = current_umask(); data->res.server = server; data->res.fh = &data->fh; data->res.fattr = &data->fattr; @@ -4326,13 +4330,15 @@ out: static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) { + struct nfs_server *server = NFS_SERVER(dir); struct nfs4_exception exception = { }; struct nfs4_label l, *label = NULL; int err; label = nfs4_label_init_security(dir, dentry, sattr, &l); - sattr->ia_mode &= ~current_umask(); + if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK)) + sattr->ia_mode &= ~current_umask(); do { err = _nfs4_proc_mkdir(dir, dentry, sattr, label); trace_nfs4_mkdir(dir, &dentry->d_name, err); @@ -4435,13 +4441,15 @@ out: static int nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, dev_t rdev) { + struct nfs_server *server = NFS_SERVER(dir); struct nfs4_exception exception = { }; struct nfs4_label l, *label = NULL; int err; label = nfs4_label_init_security(dir, dentry, sattr, &l); - sattr->ia_mode &= ~current_umask(); + if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK)) + sattr->ia_mode &= ~current_umask(); do { err = _nfs4_proc_mknod(dir, dentry, sattr, label, rdev); trace_nfs4_mknod(dir, &dentry->d_name, err); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 1c1768a..1af6268 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -52,6 +52,7 @@ #include #include #include +#include #include "nfs4_fs.h" #include "internal.h" @@ -1008,7 +1009,7 @@ static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *ve static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs4_label *label, const struct nfs_server *server, - bool excl_check) + bool excl_check, const umode_t *umask) { char owner_name[IDMAP_NAMESZ]; char owner_group[IDMAP_NAMESZ]; @@ -1022,18 +1023,21 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, /* * We reserve enough space to write the entire attribute buffer at once. - * In the worst-case, this would be - * 16(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime) - * = 40 bytes, plus any contribution from variable-length fields - * such as owner/group. */ if (iap->ia_valid & ATTR_SIZE) { bmval[0] |= FATTR4_WORD0_SIZE; len += 8; } + if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK)) + umask = NULL; if (iap->ia_valid & ATTR_MODE) { - bmval[1] |= FATTR4_WORD1_MODE; - len += 4; + if (umask) { + bmval[2] |= FATTR4_WORD2_MODE_UMASK; + len += 8; + } else { + bmval[1] |= FATTR4_WORD1_MODE; + len += 4; + } } if (iap->ia_valid & ATTR_UID) { owner_namelen = nfs_map_uid_to_name(server, iap->ia_uid, owner_name, IDMAP_NAMESZ); @@ -1134,6 +1138,10 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, *p++ = cpu_to_be32(label->len); p = xdr_encode_opaque_fixed(p, label->label, label->len); } + if (bmval[2] & FATTR4_WORD2_MODE_UMASK) { + *p++ = cpu_to_be32(iap->ia_mode & S_IALLUGO); + *p++ = cpu_to_be32(*umask); + } /* out: */ } @@ -1188,7 +1196,8 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg * } encode_string(xdr, create->name->len, create->name->name); - encode_attrs(xdr, create->attrs, create->label, create->server, false); + encode_attrs(xdr, create->attrs, create->label, create->server, false, + &create->umask); } static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct compound_hdr *hdr) @@ -1408,11 +1417,13 @@ static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_op switch(arg->createmode) { case NFS4_CREATE_UNCHECKED: *p = cpu_to_be32(NFS4_CREATE_UNCHECKED); - encode_attrs(xdr, arg->u.attrs, arg->label, arg->server, false); + encode_attrs(xdr, arg->u.attrs, arg->label, arg->server, false, + &arg->umask); break; case NFS4_CREATE_GUARDED: *p = cpu_to_be32(NFS4_CREATE_GUARDED); - encode_attrs(xdr, arg->u.attrs, arg->label, arg->server, false); + encode_attrs(xdr, arg->u.attrs, arg->label, arg->server, false, + &arg->umask); break; case NFS4_CREATE_EXCLUSIVE: *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE); @@ -1421,7 +1432,8 @@ static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_op case NFS4_CREATE_EXCLUSIVE4_1: *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE4_1); encode_nfs4_verifier(xdr, &arg->u.verifier); - encode_attrs(xdr, arg->u.attrs, arg->label, arg->server, true); + encode_attrs(xdr, arg->u.attrs, arg->label, arg->server, true, + &arg->umask); } } @@ -1677,7 +1689,7 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs { encode_op_hdr(xdr, OP_SETATTR, decode_setattr_maxsz, hdr); encode_nfs4_stateid(xdr, &arg->stateid); - encode_attrs(xdr, arg->iap, arg->label, server, false); + encode_attrs(xdr, arg->iap, arg->label, server, false, NULL); } static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid, struct compound_hdr *hdr) -- cgit v1.1