diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-15 18:47:31 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-15 18:47:31 -0800 |
commit | 73e2e0c9b13c97df1c8565f6e158caac3c481b44 (patch) | |
tree | f3c561172579175a37a3b7908e8af05f4d6a70be /fs/nfs/pnfs.c | |
parent | ed3c5a0be38c180ab0899a0f52719e81f36b87a1 (diff) | |
parent | 2549f307b5997bf5dd91071428e8090d9faa8b1b (diff) | |
download | op-kernel-dev-73e2e0c9b13c97df1c8565f6e158caac3c481b44.zip op-kernel-dev-73e2e0c9b13c97df1c8565f6e158caac3c481b44.tar.gz |
Merge tag 'nfs-for-4.10-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust:
"Highlights include:
Stable bugfixes:
- Fix a pnfs deadlock between read resends and layoutreturn
- Don't invalidate the layout stateid while a layout return is
outstanding
- Don't schedule a layoutreturn if the layout stateid is marked as
invalid
- On a pNFS error, do not send LAYOUTGET until the LAYOUTRETURN is
complete
- SUNRPC: fix refcounting problems with auth_gss messages.
Features:
- Add client support for the NFSv4 umask attribute.
- NFSv4: Correct support for flock() stateids.
- Add a LAYOUTRETURN operation to CLOSE and DELEGRETURN when
return-on-close is specified
- Allow the pNFS/flexfiles layoutstat information to piggyback on
LAYOUTRETURN
- Optimise away redundant GETATTR calls when doing state recovery
and/or when not required by cache revalidation rules or
close-to-open cache consistency.
- Attribute cache improvements
- RPC/RDMA support for SG_GAP devices
Bugfixes:
- NFS: Fix performance regressions in readdir
- pNFS/flexfiles: Fix a deadlock on LAYOUTGET
- NFSv4: Add missing nfs_put_lock_context()
- NFSv4.1: Fix regression in callback retry handling
- Fix false positive NFSv4.0 trunking detection.
- pNFS/flexfiles: Only send layoutstats updates for mirrors that were
updated
- Various layout stateid related bugfixes
- RPC/RDMA bugfixes"
* tag 'nfs-for-4.10-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (82 commits)
SUNRPC: fix refcounting problems with auth_gss messages.
nfs: add support for the umask attribute
pNFS/flexfiles: Ensure we have enough buffer for layoutreturn
pNFS/flexfiles: Remove a redundant parameter in ff_layout_encode_ioerr()
pNFS/flexfiles: Fix a deadlock on LAYOUTGET
pNFS: Layoutreturn must free the layout after the layout-private data
pNFS/flexfiles: Fix ff_layout_add_ds_error_locked()
NFSv4: Add missing nfs_put_lock_context()
pNFS: Release NFS_LAYOUT_RETURN when invalidating the layout stateid
NFSv4.1: Don't schedule lease recovery in nfs4_schedule_session_recovery()
NFSv4.1: Handle NFS4ERR_BADSESSION/NFS4ERR_DEADSESSION replies to OP_SEQUENCE
NFS: Only look at the change attribute cache state in nfs_check_verifier
NFS: Fix incorrect size revalidation when holding a delegation
NFS: Fix incorrect mapping revalidation when holding a delegation
pNFS/flexfiles: Support sending layoutstats in layoutreturn
pNFS/flexfiles: Minor refactoring before adding iostats to layoutreturn
NFS: Fix up read of mirror stats
pNFS/flexfiles: Clean up layoutstats
pNFS/flexfiles: Refactor encoding of the layoutreturn payload
pNFS: Add a layoutreturn callback to performa layout-private setup
...
Diffstat (limited to 'fs/nfs/pnfs.c')
-rw-r--r-- | fs/nfs/pnfs.c | 407 |
1 files changed, 252 insertions, 155 deletions
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 259ef85..896df7b 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -54,6 +54,12 @@ static DEFINE_SPINLOCK(pnfs_spinlock); static LIST_HEAD(pnfs_modules_tbl); static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo); +static void pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo, + struct list_head *free_me, + const struct pnfs_layout_range *range, + u32 seq); +static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, + struct list_head *tmp_list); /* Return the registered pnfs layout driver module matching given id */ static struct pnfs_layoutdriver_type * @@ -299,6 +305,49 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) } } +static void +pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode, + u32 seq) +{ + if (lo->plh_return_iomode != 0 && lo->plh_return_iomode != iomode) + iomode = IOMODE_ANY; + lo->plh_return_iomode = iomode; + set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); + if (seq != 0) { + WARN_ON_ONCE(lo->plh_return_seq != 0 && lo->plh_return_seq != seq); + lo->plh_return_seq = seq; + } +} + +static void +pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo) +{ + lo->plh_return_iomode = 0; + lo->plh_return_seq = 0; + clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); +} + +static void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) +{ + clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); + clear_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags); + smp_mb__after_atomic(); + wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); + rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); +} + +static void +pnfs_clear_lseg_state(struct pnfs_layout_segment *lseg, + struct list_head *free_me) +{ + clear_bit(NFS_LSEG_ROC, &lseg->pls_flags); + clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); + if (test_and_clear_bit(NFS_LSEG_VALID, &lseg->pls_flags)) + pnfs_lseg_dec_and_remove_zero(lseg, free_me); + if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) + pnfs_lseg_dec_and_remove_zero(lseg, free_me); +} + /* * Mark a pnfs_layout_hdr and all associated layout segments as invalid * @@ -315,9 +364,17 @@ pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, .offset = 0, .length = NFS4_MAX_UINT64, }; + struct pnfs_layout_segment *lseg, *next; set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); - return pnfs_mark_matching_lsegs_invalid(lo, lseg_list, &range, 0); + pnfs_clear_layoutreturn_info(lo); + list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) + pnfs_clear_lseg_state(lseg, lseg_list); + pnfs_free_returned_lsegs(lo, lseg_list, &range, 0); + if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) && + !test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) + pnfs_clear_layoutreturn_waitbit(lo); + return !list_empty(&lo->plh_segs); } static int @@ -396,27 +453,42 @@ pnfs_init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg, static void pnfs_free_lseg(struct pnfs_layout_segment *lseg) { - struct inode *ino = lseg->pls_layout->plh_inode; - - NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); + if (lseg != NULL) { + struct inode *inode = lseg->pls_layout->plh_inode; + NFS_SERVER(inode)->pnfs_curr_ld->free_lseg(lseg); + } } static void pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) { - struct inode *inode = lo->plh_inode; - WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); list_del_init(&lseg->pls_list); /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */ atomic_dec(&lo->plh_refcount); - if (list_empty(&lo->plh_segs)) { + if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) + return; + if (list_empty(&lo->plh_segs) && + !test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) && + !test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { if (atomic_read(&lo->plh_outstanding) == 0) set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); } - rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); +} + +static bool +pnfs_cache_lseg_for_layoutreturn(struct pnfs_layout_hdr *lo, + struct pnfs_layout_segment *lseg) +{ + if (test_and_clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags) && + pnfs_layout_is_valid(lo)) { + pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0); + list_move_tail(&lseg->pls_list, &lo->plh_return_segs); + return true; + } + return false; } void @@ -442,6 +514,8 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) } pnfs_get_layout_hdr(lo); pnfs_layout_remove_lseg(lo, lseg); + if (pnfs_cache_lseg_for_layoutreturn(lo, lseg)) + lseg = NULL; spin_unlock(&inode->i_lock); pnfs_free_lseg(lseg); pnfs_put_layout_hdr(lo); @@ -482,22 +556,15 @@ pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg) struct pnfs_layout_hdr *lo = lseg->pls_layout; if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) return; - pnfs_get_layout_hdr(lo); pnfs_layout_remove_lseg(lo, lseg); - pnfs_free_lseg_async(lseg); + if (!pnfs_cache_lseg_for_layoutreturn(lo, lseg)) { + pnfs_get_layout_hdr(lo); + pnfs_free_lseg_async(lseg); + } } } EXPORT_SYMBOL_GPL(pnfs_put_lseg_locked); -static u64 -end_offset(u64 start, u64 len) -{ - u64 end; - - end = start + len; - return end >= start ? end : NFS4_MAX_UINT64; -} - /* * is l2 fully contained in l1? * start1 end1 @@ -510,33 +577,13 @@ pnfs_lseg_range_contained(const struct pnfs_layout_range *l1, const struct pnfs_layout_range *l2) { u64 start1 = l1->offset; - u64 end1 = end_offset(start1, l1->length); + u64 end1 = pnfs_end_offset(start1, l1->length); u64 start2 = l2->offset; - u64 end2 = end_offset(start2, l2->length); + u64 end2 = pnfs_end_offset(start2, l2->length); return (start1 <= start2) && (end1 >= end2); } -/* - * is l1 and l2 intersecting? - * start1 end1 - * [----------------------------------) - * start2 end2 - * [----------------) - */ -static bool -pnfs_lseg_range_intersecting(const struct pnfs_layout_range *l1, - const struct pnfs_layout_range *l2) -{ - u64 start1 = l1->offset; - u64 end1 = end_offset(start1, l1->length); - u64 start2 = l2->offset; - u64 end2 = end_offset(start2, l2->length); - - return (end1 == NFS4_MAX_UINT64 || end1 > start2) && - (end2 == NFS4_MAX_UINT64 || end2 > start1); -} - static bool pnfs_lseg_dec_and_remove_zero(struct pnfs_layout_segment *lseg, struct list_head *tmp_list) { @@ -637,6 +684,20 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, return remaining; } +static void +pnfs_free_returned_lsegs(struct pnfs_layout_hdr *lo, + struct list_head *free_me, + const struct pnfs_layout_range *range, + u32 seq) +{ + struct pnfs_layout_segment *lseg, *next; + + list_for_each_entry_safe(lseg, next, &lo->plh_return_segs, pls_list) { + if (pnfs_match_lseg_recall(lseg, range, seq)) + list_move_tail(&lseg->pls_list, free_me); + } +} + /* note free_me must contain lsegs from a single layout_hdr */ void pnfs_free_lseg_list(struct list_head *free_me) @@ -701,6 +762,8 @@ pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp, struct inode *inode; list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) { + if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) + continue; inode = igrab(lo->plh_inode); if (inode == NULL) continue; @@ -816,14 +879,6 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) pnfs_destroy_layouts_byclid(clp, false); } -static void -pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo) -{ - lo->plh_return_iomode = 0; - lo->plh_return_seq = 0; - clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); -} - /* update lo->plh_stateid with new if is more recent */ void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, @@ -941,12 +996,31 @@ static void pnfs_clear_layoutcommit(struct inode *inode, } } -void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) +void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo, + const nfs4_stateid *arg_stateid, + const struct pnfs_layout_range *range, + const nfs4_stateid *stateid) { - clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); - smp_mb__after_atomic(); - wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); - rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); + struct inode *inode = lo->plh_inode; + LIST_HEAD(freeme); + + spin_lock(&inode->i_lock); + if (!pnfs_layout_is_valid(lo) || !arg_stateid || + !nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid)) + goto out_unlock; + if (stateid) { + u32 seq = be32_to_cpu(arg_stateid->seqid); + + pnfs_mark_matching_lsegs_invalid(lo, &freeme, range, seq); + pnfs_free_returned_lsegs(lo, &freeme, range, seq); + pnfs_set_layout_stateid(lo, stateid, true); + } else + pnfs_mark_layout_stateid_invalid(lo, &freeme); +out_unlock: + pnfs_clear_layoutreturn_waitbit(lo); + spin_unlock(&inode->i_lock); + pnfs_free_lseg_list(&freeme); + } static bool @@ -957,8 +1031,9 @@ pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo, /* Serialise LAYOUTGET/LAYOUTRETURN */ if (atomic_read(&lo->plh_outstanding) != 0) return false; - if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) + if (test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) return false; + set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); pnfs_get_layout_hdr(lo); if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) { if (stateid != NULL) { @@ -978,11 +1053,29 @@ pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo, return true; } +static void +pnfs_init_layoutreturn_args(struct nfs4_layoutreturn_args *args, + struct pnfs_layout_hdr *lo, + const nfs4_stateid *stateid, + enum pnfs_iomode iomode) +{ + struct inode *inode = lo->plh_inode; + + args->layout_type = NFS_SERVER(inode)->pnfs_curr_ld->id; + args->inode = inode; + args->range.iomode = iomode; + args->range.offset = 0; + args->range.length = NFS4_MAX_UINT64; + args->layout = lo; + nfs4_stateid_copy(&args->stateid, stateid); +} + static int pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid, enum pnfs_iomode iomode, bool sync) { struct inode *ino = lo->plh_inode; + struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; struct nfs4_layoutreturn *lrp; int status = 0; @@ -996,15 +1089,12 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid, goto out; } - nfs4_stateid_copy(&lrp->args.stateid, stateid); - lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; - lrp->args.inode = ino; - lrp->args.range.iomode = iomode; - lrp->args.range.offset = 0; - lrp->args.range.length = NFS4_MAX_UINT64; - lrp->args.layout = lo; + pnfs_init_layoutreturn_args(&lrp->args, lo, stateid, iomode); + lrp->args.ld_private = &lrp->ld_private; lrp->clp = NFS_SERVER(ino)->nfs_client; lrp->cred = lo->plh_lc_cred; + if (ld->prepare_layoutreturn) + ld->prepare_layoutreturn(&lrp->args); status = nfs4_proc_layoutreturn(lrp, sync); out: @@ -1067,7 +1157,7 @@ _pnfs_return_layout(struct inode *ino) struct nfs_inode *nfsi = NFS_I(ino); LIST_HEAD(tmp_list); nfs4_stateid stateid; - int status = 0, empty; + int status = 0; bool send; dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino); @@ -1081,7 +1171,14 @@ _pnfs_return_layout(struct inode *ino) } /* Reference matched in nfs4_layoutreturn_release */ pnfs_get_layout_hdr(lo); - empty = list_empty(&lo->plh_segs); + /* Is there an outstanding layoutreturn ? */ + if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) { + spin_unlock(&ino->i_lock); + if (wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, + TASK_UNINTERRUPTIBLE)) + goto out_put_layout_hdr; + spin_lock(&ino->i_lock); + } pnfs_clear_layoutcommit(ino, &tmp_list); pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL, 0); @@ -1095,7 +1192,7 @@ _pnfs_return_layout(struct inode *ino) } /* Don't send a LAYOUTRETURN if list was initially empty */ - if (empty) { + if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) { spin_unlock(&ino->i_lock); dprintk("NFS: %s no layout segments to return\n", __func__); goto out_put_layout_hdr; @@ -1141,21 +1238,36 @@ pnfs_commit_and_return_layout(struct inode *inode) return ret; } -bool pnfs_roc(struct inode *ino) +bool pnfs_roc(struct inode *ino, + struct nfs4_layoutreturn_args *args, + struct nfs4_layoutreturn_res *res, + const struct rpc_cred *cred) { struct nfs_inode *nfsi = NFS_I(ino); struct nfs_open_context *ctx; struct nfs4_state *state; struct pnfs_layout_hdr *lo; - struct pnfs_layout_segment *lseg, *tmp; + struct pnfs_layout_segment *lseg, *next; nfs4_stateid stateid; - LIST_HEAD(tmp_list); - bool found = false, layoutreturn = false, roc = false; + enum pnfs_iomode iomode = 0; + bool layoutreturn = false, roc = false; + if (!nfs_have_layout(ino)) + return false; +retry: spin_lock(&ino->i_lock); lo = nfsi->layout; - if (!lo || test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) + if (!lo || !pnfs_layout_is_valid(lo) || + test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) goto out_noroc; + if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) { + pnfs_get_layout_hdr(lo); + spin_unlock(&ino->i_lock); + wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, + TASK_UNINTERRUPTIBLE); + pnfs_put_layout_hdr(lo); + goto retry; + } /* no roc if we hold a delegation */ if (nfs4_check_delegation(ino, FMODE_READ)) @@ -1168,78 +1280,73 @@ bool pnfs_roc(struct inode *ino) goto out_noroc; } - /* always send layoutreturn if being marked so */ - if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) - layoutreturn = pnfs_prepare_layoutreturn(lo, - &stateid, NULL); - list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) + list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) { /* If we are sending layoutreturn, invalidate all valid lsegs */ - if (layoutreturn || test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { - mark_lseg_invalid(lseg, &tmp_list); - found = true; - } + if (!test_and_clear_bit(NFS_LSEG_ROC, &lseg->pls_flags)) + continue; + /* + * Note: mark lseg for return so pnfs_layout_remove_lseg + * doesn't invalidate the layout for us. + */ + set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); + if (!mark_lseg_invalid(lseg, &lo->plh_return_segs)) + continue; + pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0); + } + + if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) + goto out_noroc; + /* ROC in two conditions: * 1. there are ROC lsegs * 2. we don't send layoutreturn */ - if (found && !layoutreturn) { - /* lo ref dropped in pnfs_roc_release() */ - pnfs_get_layout_hdr(lo); - roc = true; - } + /* lo ref dropped in pnfs_roc_release() */ + layoutreturn = pnfs_prepare_layoutreturn(lo, &stateid, &iomode); + /* If the creds don't match, we can't compound the layoutreturn */ + if (!layoutreturn || cred != lo->plh_lc_cred) + goto out_noroc; + + roc = layoutreturn; + pnfs_init_layoutreturn_args(args, lo, &stateid, iomode); + res->lrs_present = 0; + layoutreturn = false; out_noroc: spin_unlock(&ino->i_lock); - pnfs_free_lseg_list(&tmp_list); pnfs_layoutcommit_inode(ino, true); + if (roc) { + struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; + if (ld->prepare_layoutreturn) + ld->prepare_layoutreturn(args); + return true; + } if (layoutreturn) - pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true); - return roc; -} - -void pnfs_roc_release(struct inode *ino) -{ - struct pnfs_layout_hdr *lo; - - spin_lock(&ino->i_lock); - lo = NFS_I(ino)->layout; - pnfs_clear_layoutreturn_waitbit(lo); - if (atomic_dec_and_test(&lo->plh_refcount)) { - pnfs_detach_layout_hdr(lo); - spin_unlock(&ino->i_lock); - pnfs_free_layout_hdr(lo); - } else - spin_unlock(&ino->i_lock); + pnfs_send_layoutreturn(lo, &stateid, iomode, true); + return false; } -void pnfs_roc_set_barrier(struct inode *ino, u32 barrier) +void pnfs_roc_release(struct nfs4_layoutreturn_args *args, + struct nfs4_layoutreturn_res *res, + int ret) { - struct pnfs_layout_hdr *lo; + struct pnfs_layout_hdr *lo = args->layout; + const nfs4_stateid *arg_stateid = NULL; + const nfs4_stateid *res_stateid = NULL; + struct nfs4_xdr_opaque_data *ld_private = args->ld_private; - spin_lock(&ino->i_lock); - lo = NFS_I(ino)->layout; - if (pnfs_seqid_is_newer(barrier, lo->plh_barrier)) - lo->plh_barrier = barrier; - spin_unlock(&ino->i_lock); - trace_nfs4_layoutreturn_on_close(ino, 0); -} - -void pnfs_roc_get_barrier(struct inode *ino, u32 *barrier) -{ - struct nfs_inode *nfsi = NFS_I(ino); - struct pnfs_layout_hdr *lo; - u32 current_seqid; - - spin_lock(&ino->i_lock); - lo = nfsi->layout; - current_seqid = be32_to_cpu(lo->plh_stateid.seqid); - - /* Since close does not return a layout stateid for use as - * a barrier, we choose the worst-case barrier. - */ - *barrier = current_seqid + atomic_read(&lo->plh_outstanding); - spin_unlock(&ino->i_lock); + if (ret == 0) { + arg_stateid = &args->stateid; + if (res->lrs_present) + res_stateid = &res->stateid; + } + pnfs_layoutreturn_free_lsegs(lo, arg_stateid, &args->range, + res_stateid); + if (ld_private && ld_private->ops && ld_private->ops->free) + ld_private->ops->free(ld_private); + pnfs_put_layout_hdr(lo); + trace_nfs4_layoutreturn_on_close(args->inode, 0); } bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task) @@ -1252,13 +1359,11 @@ bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task) * i_lock */ spin_lock(&ino->i_lock); lo = nfsi->layout; - if (lo && test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) + if (lo && test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { + rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); sleep = true; + } spin_unlock(&ino->i_lock); - - if (sleep) - rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); - return sleep; } @@ -1375,6 +1480,7 @@ alloc_init_layout_hdr(struct inode *ino, atomic_set(&lo->plh_refcount, 1); INIT_LIST_HEAD(&lo->plh_layouts); INIT_LIST_HEAD(&lo->plh_segs); + INIT_LIST_HEAD(&lo->plh_return_segs); INIT_LIST_HEAD(&lo->plh_bulk_destroy); lo->plh_inode = ino; lo->plh_lc_cred = get_rpccred(ctx->cred); @@ -1841,7 +1947,10 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) goto out_forget; } - if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) { + if (!pnfs_layout_is_valid(lo)) { + /* We have a completely new layout */ + pnfs_set_layout_stateid(lo, &res->stateid, true); + } else if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) { /* existing state ID, make sure the sequence number matches. */ if (pnfs_layout_stateid_blocked(lo, &res->stateid)) { dprintk("%s forget reply due to sequence\n", __func__); @@ -1851,12 +1960,10 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) } else { /* * We got an entirely new state ID. Mark all segments for the - * inode invalid, and don't bother validating the stateid - * sequence number. + * inode invalid, and retry the layoutget */ pnfs_mark_layout_stateid_invalid(lo, &free_me); - - pnfs_set_layout_stateid(lo, &res->stateid, true); + goto out_forget; } pnfs_get_lseg(lseg); @@ -1877,20 +1984,6 @@ out_forget: return ERR_PTR(-EAGAIN); } -static void -pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode, - u32 seq) -{ - if (lo->plh_return_iomode != 0 && lo->plh_return_iomode != iomode) - iomode = IOMODE_ANY; - lo->plh_return_iomode = iomode; - set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); - if (seq != 0) { - WARN_ON_ONCE(lo->plh_return_seq != 0 && lo->plh_return_seq != seq); - lo->plh_return_seq = seq; - } -} - /** * pnfs_mark_matching_lsegs_return - Free or return matching layout segments * @lo: pointer to layout header @@ -1945,17 +2038,18 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, .offset = 0, .length = NFS4_MAX_UINT64, }; - LIST_HEAD(free_me); bool return_now = false; spin_lock(&inode->i_lock); pnfs_set_plh_return_info(lo, range.iomode, 0); + /* Block LAYOUTGET */ + set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); /* * mark all matching lsegs so that we are sure to have no live * segments at hand when sending layoutreturn. See pnfs_put_lseg() * for how it works. */ - if (!pnfs_mark_matching_lsegs_return(lo, &free_me, &range, 0)) { + if (!pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, &range, 0)) { nfs4_stateid stateid; enum pnfs_iomode iomode; @@ -1967,7 +2061,6 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, spin_unlock(&inode->i_lock); nfs_commit_inode(inode, 0); } - pnfs_free_lseg_list(&free_me); } EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return); @@ -2063,7 +2156,7 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, * */ if (pgio->pg_lseg) { - seg_end = end_offset(pgio->pg_lseg->pls_range.offset, + seg_end = pnfs_end_offset(pgio->pg_lseg->pls_range.offset, pgio->pg_lseg->pls_range.length); req_start = req_offset(req); WARN_ON_ONCE(req_start >= seg_end); @@ -2286,6 +2379,10 @@ void pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr) struct nfs_pageio_descriptor pgio; if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { + /* Prevent deadlocks with layoutreturn! */ + pnfs_put_lseg(hdr->lseg); + hdr->lseg = NULL; + nfs_pageio_init_read(&pgio, hdr->inode, false, hdr->completion_ops); hdr->task.tk_status = nfs_pageio_resend(&pgio, hdr); |