diff options
author | Trond Myklebust <trond.myklebust@primarydata.com> | 2016-01-04 13:19:55 -0500 |
---|---|---|
committer | Trond Myklebust <trond.myklebust@primarydata.com> | 2016-01-04 13:19:55 -0500 |
commit | 942e3d72a62dcfe5bf1569b179174718bbbcfbc3 (patch) | |
tree | b13189dfe2f9a86123aee14ef365d9e9729d46cb | |
parent | 58baac0ac7cc13a690f6f7cea23accaf84e843a0 (diff) | |
parent | 506c0d68269e90d354b3cbfc7523611b026c88d0 (diff) | |
download | op-kernel-dev-942e3d72a62dcfe5bf1569b179174718bbbcfbc3.zip op-kernel-dev-942e3d72a62dcfe5bf1569b179174718bbbcfbc3.tar.gz |
Merge branch 'pnfs_generic'
* pnfs_generic:
NFSv4.1/pNFS: Cleanup constify struct pnfs_layout_range arguments
NFSv4.1/pnfs: Cleanup copying of pnfs_layout_range structures
NFSv4.1/pNFS: Cleanup pnfs_mark_matching_lsegs_invalid()
NFSv4.1/pNFS: Fix a race in initiate_file_draining()
NFSv4.1/pNFS: pnfs_error_mark_layout_for_return() must always return layout
NFSv4.1/pNFS: pnfs_mark_matching_lsegs_return() should set the iomode
NFSv4.1/pNFS: Use nfs4_stateid_copy for copying stateids
NFSv4.1/pNFS: Don't pass stateids by value to pnfs_send_layoutreturn()
NFS: Relax requirements in nfs_flush_incompatible
NFSv4.1/pNFS: Don't queue up a new commit if the layout segment is invalid
NFS: Allow multiple commit requests in flight per file
NFS/pNFS: Fix up pNFS write reschedule layering violations and bugs
NFSv4: List stateid information in the callback tracepoints
NFSv4.1/pNFS: Don't return NFS4ERR_DELAY unnecessarily in CB_LAYOUTRECALL
NFSv4.1/pNFS: Ensure we enforce RFC5661 Section 12.5.5.2.1
pNFS: If we have to delay the layout callback, mark the layout for return
NFSv4.1/pNFS: Add a helper to mark the layout as returned
pNFS: Ensure nfs4_layoutget_prepare returns the correct error
-rw-r--r-- | fs/nfs/callback_proc.c | 52 | ||||
-rw-r--r-- | fs/nfs/direct.c | 33 | ||||
-rw-r--r-- | fs/nfs/file.c | 2 | ||||
-rw-r--r-- | fs/nfs/flexfilelayout/flexfilelayout.c | 13 | ||||
-rw-r--r-- | fs/nfs/internal.h | 7 | ||||
-rw-r--r-- | fs/nfs/nfs4proc.c | 12 | ||||
-rw-r--r-- | fs/nfs/nfs4trace.h | 69 | ||||
-rw-r--r-- | fs/nfs/nfstrace.h | 1 | ||||
-rw-r--r-- | fs/nfs/pagelist.c | 6 | ||||
-rw-r--r-- | fs/nfs/pnfs.c | 82 | ||||
-rw-r--r-- | fs/nfs/pnfs.h | 33 | ||||
-rw-r--r-- | fs/nfs/pnfs_nfs.c | 10 | ||||
-rw-r--r-- | fs/nfs/write.c | 88 | ||||
-rw-r--r-- | include/linux/nfs_fs.h | 1 | ||||
-rw-r--r-- | include/linux/nfs_xdr.h | 4 |
15 files changed, 296 insertions, 117 deletions
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 807eb6e..f0939d0 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -83,8 +83,11 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy, res = htonl(NFS4ERR_BADHANDLE); inode = nfs_delegation_find_inode(cps->clp, &args->fh); - if (inode == NULL) + if (inode == NULL) { + trace_nfs4_cb_recall(cps->clp, &args->fh, NULL, + &args->stateid, -ntohl(res)); goto out; + } /* Set up a helper thread to actually return the delegation */ switch (nfs_async_inode_return_delegation(inode, &args->stateid)) { case 0: @@ -96,7 +99,8 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy, default: res = htonl(NFS4ERR_RESOURCE); } - trace_nfs4_recall_delegation(inode, -ntohl(res)); + trace_nfs4_cb_recall(cps->clp, &args->fh, inode, + &args->stateid, -ntohl(res)); iput(inode); out: dprintk("%s: exit with status = %d\n", __func__, ntohl(res)); @@ -160,6 +164,22 @@ static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp, return lo; } +/* + * Enforce RFC5661 section 12.5.5.2.1. (Layout Recall and Return Sequencing) + */ +static bool pnfs_check_stateid_sequence(struct pnfs_layout_hdr *lo, + const nfs4_stateid *new) +{ + u32 oldseq, newseq; + + oldseq = be32_to_cpu(lo->plh_stateid.seqid); + newseq = be32_to_cpu(new->seqid); + + if (newseq > oldseq + 1) + return false; + return true; +} + static u32 initiate_file_draining(struct nfs_client *clp, struct cb_layoutrecallargs *args) { @@ -169,34 +189,52 @@ static u32 initiate_file_draining(struct nfs_client *clp, LIST_HEAD(free_me_list); lo = get_layout_by_fh(clp, &args->cbl_fh, &args->cbl_stateid); - if (!lo) + if (!lo) { + trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, NULL, + &args->cbl_stateid, -rv); goto out; + } ino = lo->plh_inode; spin_lock(&ino->i_lock); + if (!pnfs_check_stateid_sequence(lo, &args->cbl_stateid)) { + rv = NFS4ERR_DELAY; + goto unlock; + } pnfs_set_layout_stateid(lo, &args->cbl_stateid, true); spin_unlock(&ino->i_lock); pnfs_layoutcommit_inode(ino, false); spin_lock(&ino->i_lock); - if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || - pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, - &args->cbl_range)) { + /* + * Enforce RFC5661 Section 12.5.5.2.1.5 (Bulk Recall and Return) + */ + if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) { rv = NFS4ERR_DELAY; goto unlock; } + if (pnfs_mark_matching_lsegs_return(lo, &free_me_list, + &args->cbl_range)) { + rv = NFS4_OK; + goto unlock; + } + if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) { NFS_SERVER(ino)->pnfs_curr_ld->return_range(lo, &args->cbl_range); } + pnfs_mark_layout_returned_if_empty(lo); unlock: spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&free_me_list); + /* Free all lsegs that are attached to commit buckets */ + nfs_commit_inode(ino, 0); pnfs_put_layout_hdr(lo); - trace_nfs4_cb_layoutrecall_inode(clp, &args->cbl_fh, ino, -rv); + trace_nfs4_cb_layoutrecall_file(clp, &args->cbl_fh, ino, + &args->cbl_stateid, -rv); iput(ino); out: return rv; diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 2e7142b..7ab7ec9 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -117,12 +117,6 @@ static inline int put_dreq(struct nfs_direct_req *dreq) return atomic_dec_and_test(&dreq->io_count); } -void nfs_direct_set_resched_writes(struct nfs_direct_req *dreq) -{ - dreq->flags = NFS_ODIRECT_RESCHED_WRITES; -} -EXPORT_SYMBOL_GPL(nfs_direct_set_resched_writes); - static void nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr) { @@ -735,14 +729,20 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) nfs_direct_write_complete(dreq, data->inode); } -static void nfs_direct_error_cleanup(struct nfs_inode *nfsi) +static void nfs_direct_resched_write(struct nfs_commit_info *cinfo, + struct nfs_page *req) { - /* There is no lock to clear */ + struct nfs_direct_req *dreq = cinfo->dreq; + + spin_lock(&dreq->lock); + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + spin_unlock(&dreq->lock); + nfs_mark_request_commit(req, NULL, cinfo, 0); } static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = { .completion = nfs_direct_commit_complete, - .error_cleanup = nfs_direct_error_cleanup, + .resched_write = nfs_direct_resched_write, }; static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) @@ -847,10 +847,25 @@ static void nfs_write_sync_pgio_error(struct list_head *head) } } +static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr) +{ + struct nfs_direct_req *dreq = hdr->dreq; + + spin_lock(&dreq->lock); + if (dreq->error == 0) { + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + /* fake unstable write to let common nfs resend pages */ + hdr->verf.committed = NFS_UNSTABLE; + hdr->good_bytes = hdr->args.count; + } + spin_unlock(&dreq->lock); +} + static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { .error_cleanup = nfs_write_sync_pgio_error, .init_hdr = nfs_direct_pgio_init, .completion = nfs_direct_write_completion, + .reschedule_io = nfs_direct_write_reschedule_io, }; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index f188dd0..178ec8d 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -514,7 +514,7 @@ static void nfs_check_dirty_writeback(struct page *page, * so it will not block due to pages that will shortly be freeable. */ nfsi = NFS_I(mapping->host); - if (test_bit(NFS_INO_COMMIT, &nfsi->flags)) { + if (atomic_read(&nfsi->commit_info.rpcs_out)) { *writeback = true; return; } diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 2981cd1..18c329b 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -941,18 +941,7 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs) hdr->args.count, (unsigned long long)hdr->args.offset); - if (!hdr->dreq) { - struct nfs_open_context *ctx; - - ctx = nfs_list_entry(hdr->pages.next)->wb_context; - set_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); - hdr->completion_ops->error_cleanup(&hdr->pages); - } else { - nfs_direct_set_resched_writes(hdr->dreq); - /* fake unstable write to let common nfs resend pages */ - hdr->verf.committed = NFS_UNSTABLE; - hdr->good_bytes = hdr->args.count; - } + hdr->completion_ops->reschedule_io(hdr); return; } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 68f773d..ee81792 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -264,6 +264,12 @@ static inline bool nfs_pgio_has_mirroring(struct nfs_pageio_descriptor *desc) return desc->pg_mirror_count > 1; } +static inline bool nfs_match_open_context(const struct nfs_open_context *ctx1, + const struct nfs_open_context *ctx2) +{ + return ctx1->cred == ctx2->cred && ctx1->state == ctx2->state; +} + /* nfs2xdr.c */ extern struct rpc_procinfo nfs_procedures[]; extern int nfs2_decode_dirent(struct xdr_stream *, @@ -519,7 +525,6 @@ static inline void nfs_inode_dio_wait(struct inode *inode) inode_dio_wait(inode); } extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); -extern void nfs_direct_set_resched_writes(struct nfs_direct_req *dreq); /* nfs4proc.c */ extern void __nfs4_read_done_cb(struct nfs_pgio_header *); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index adae525..5e5062c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7776,6 +7776,7 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata) struct nfs4_layoutget *lgp = calldata; struct nfs_server *server = NFS_SERVER(lgp->args.inode); struct nfs4_session *session = nfs4_get_session(server); + int ret; dprintk("--> %s\n", __func__); /* Note the is a race here, where a CB_LAYOUTRECALL can come in @@ -7786,12 +7787,12 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata) if (nfs41_setup_sequence(session, &lgp->args.seq_args, &lgp->res.seq_res, task)) return; - if (pnfs_choose_layoutget_stateid(&lgp->args.stateid, + ret = pnfs_choose_layoutget_stateid(&lgp->args.stateid, NFS_I(lgp->args.inode)->layout, &lgp->args.range, - lgp->args.ctx->state)) { - rpc_exit(task, NFS4_OK); - } + lgp->args.ctx->state); + if (ret < 0) + rpc_exit(task, ret); } static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) @@ -8073,9 +8074,10 @@ static void nfs4_layoutreturn_release(void *calldata) dprintk("--> %s\n", __func__); spin_lock(&lo->plh_inode->i_lock); + pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range); + pnfs_mark_layout_returned_if_empty(lo); if (lrp->res.lrs_present) pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); - pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range); pnfs_clear_layoutreturn_waitbit(lo); lo->plh_block_lgets--; spin_unlock(&lo->plh_inode->i_lock); diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index d08d0c8..88b6b14 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -982,7 +982,6 @@ DEFINE_NFS4_INODE_EVENT(nfs4_set_acl); DEFINE_NFS4_INODE_EVENT(nfs4_get_security_label); DEFINE_NFS4_INODE_EVENT(nfs4_set_security_label); #endif /* CONFIG_NFS_V4_SECURITY_LABEL */ -DEFINE_NFS4_INODE_EVENT(nfs4_recall_delegation); DECLARE_EVENT_CLASS(nfs4_inode_stateid_event, TP_PROTO( @@ -1145,8 +1144,74 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event, ), \ TP_ARGS(clp, fhandle, inode, error)) DEFINE_NFS4_INODE_CALLBACK_EVENT(nfs4_cb_getattr); -DEFINE_NFS4_INODE_CALLBACK_EVENT(nfs4_cb_layoutrecall_inode); +DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event, + TP_PROTO( + const struct nfs_client *clp, + const struct nfs_fh *fhandle, + const struct inode *inode, + const nfs4_stateid *stateid, + int error + ), + + TP_ARGS(clp, fhandle, inode, stateid, error), + + TP_STRUCT__entry( + __field(int, error) + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __string(dstaddr, clp ? + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_ADDR) : "unknown") + __field(int, stateid_seq) + __field(u32, stateid_hash) + ), + + TP_fast_assign( + __entry->error = error; + __entry->fhandle = nfs_fhandle_hash(fhandle); + if (inode != NULL) { + __entry->fileid = NFS_FILEID(inode); + __entry->dev = inode->i_sb->s_dev; + } else { + __entry->fileid = 0; + __entry->dev = 0; + } + __assign_str(dstaddr, clp ? + rpc_peeraddr2str(clp->cl_rpcclient, + RPC_DISPLAY_ADDR) : "unknown") + __entry->stateid_seq = + be32_to_cpu(stateid->seqid); + __entry->stateid_hash = + nfs_stateid_hash(stateid); + ), + + TP_printk( + "error=%d (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "stateid=%d:0x%08x dstaddr=%s", + __entry->error, + show_nfsv4_errors(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + __entry->stateid_seq, __entry->stateid_hash, + __get_str(dstaddr) + ) +); + +#define DEFINE_NFS4_INODE_STATEID_CALLBACK_EVENT(name) \ + DEFINE_EVENT(nfs4_inode_stateid_callback_event, name, \ + TP_PROTO( \ + const struct nfs_client *clp, \ + const struct nfs_fh *fhandle, \ + const struct inode *inode, \ + const nfs4_stateid *stateid, \ + int error \ + ), \ + TP_ARGS(clp, fhandle, inode, stateid, error)) +DEFINE_NFS4_INODE_STATEID_CALLBACK_EVENT(nfs4_cb_recall); +DEFINE_NFS4_INODE_STATEID_CALLBACK_EVENT(nfs4_cb_layoutrecall_file); DECLARE_EVENT_CLASS(nfs4_idmap_event, TP_PROTO( diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 59f838c..9f80a08 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -39,7 +39,6 @@ { 1 << NFS_INO_INVALIDATING, "INVALIDATING" }, \ { 1 << NFS_INO_FLUSHING, "FLUSHING" }, \ { 1 << NFS_INO_FSCACHE, "FSCACHE" }, \ - { 1 << NFS_INO_COMMIT, "COMMIT" }, \ { 1 << NFS_INO_LAYOUTCOMMIT, "NEED_LAYOUTCOMMIT" }, \ { 1 << NFS_INO_LAYOUTCOMMITTING, "LAYOUTCOMMIT" }) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 7c71b71..eeddbf0 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -899,12 +899,6 @@ static void nfs_pageio_cleanup_mirroring(struct nfs_pageio_descriptor *pgio) pgio->pg_mirrors_dynamic = NULL; } -static bool nfs_match_open_context(const struct nfs_open_context *ctx1, - const struct nfs_open_context *ctx2) -{ - return ctx1->cred == ctx2->cred && ctx1->state == ctx2->state; -} - static bool nfs_match_lock_context(const struct nfs_lock_context *l1, const struct nfs_lock_context *l2) { diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 113c3b3..a3592cc 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -53,7 +53,7 @@ static DEFINE_SPINLOCK(pnfs_spinlock); static LIST_HEAD(pnfs_modules_tbl); static int -pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, +pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid, enum pnfs_iomode iomode, bool sync); /* Return the registered pnfs layout driver module matching given id */ @@ -385,13 +385,13 @@ static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg, enum pnfs_iomode iomode; bool send; - stateid = lo->plh_stateid; + nfs4_stateid_copy(&stateid, &lo->plh_stateid); iomode = lo->plh_return_iomode; send = pnfs_prepare_layoutreturn(lo); spin_unlock(&inode->i_lock); if (send) { /* Send an async layoutreturn so we dont deadlock */ - pnfs_send_layoutreturn(lo, stateid, iomode, false); + pnfs_send_layoutreturn(lo, &stateid, iomode, false); } } else spin_unlock(&inode->i_lock); @@ -566,10 +566,10 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, struct list_head *tmp_list, - struct pnfs_layout_range *recall_range) + const struct pnfs_layout_range *recall_range) { struct pnfs_layout_segment *lseg, *next; - int invalid = 0, removed = 0; + int remaining = 0; dprintk("%s:Begin lo %p\n", __func__, lo); @@ -582,11 +582,11 @@ pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, "offset %llu length %llu\n", __func__, lseg, lseg->pls_range.iomode, lseg->pls_range.offset, lseg->pls_range.length); - invalid++; - removed += mark_lseg_invalid(lseg, tmp_list); + if (!mark_lseg_invalid(lseg, tmp_list)) + remaining++; } - dprintk("%s:Return %i\n", __func__, invalid - removed); - return invalid - removed; + dprintk("%s:Return %i\n", __func__, remaining); + return remaining; } /* note free_me must contain lsegs from a single layout_hdr */ @@ -702,6 +702,8 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, ret = -EAGAIN; spin_unlock(&inode->i_lock); pnfs_free_lseg_list(&lseg_list); + /* Free all lsegs that are attached to commit buckets */ + nfs_commit_inode(inode, 0); pnfs_put_layout_hdr(lo); iput(inode); } @@ -825,7 +827,7 @@ pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo) int pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, - struct pnfs_layout_range *range, + const struct pnfs_layout_range *range, struct nfs4_state *open_state) { int status = 0; @@ -860,7 +862,7 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, static struct pnfs_layout_segment * send_layoutget(struct pnfs_layout_hdr *lo, struct nfs_open_context *ctx, - struct pnfs_layout_range *range, + const struct pnfs_layout_range *range, gfp_t gfp_flags) { struct inode *ino = lo->plh_inode; @@ -893,7 +895,7 @@ send_layoutget(struct pnfs_layout_hdr *lo, lgp->args.minlength = i_size - range->offset; } lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; - lgp->args.range = *range; + pnfs_copy_range(&lgp->args.range, range); lgp->args.type = server->pnfs_curr_ld->id; lgp->args.inode = ino; lgp->args.ctx = get_nfs_open_context(ctx); @@ -936,7 +938,7 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) } static int -pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, +pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid, enum pnfs_iomode iomode, bool sync) { struct inode *ino = lo->plh_inode; @@ -953,7 +955,7 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, goto out; } - lrp->args.stateid = stateid; + nfs4_stateid_copy(&lrp->args.stateid, stateid); lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; lrp->args.inode = ino; lrp->args.range.iomode = iomode; @@ -996,7 +998,7 @@ _pnfs_return_layout(struct inode *ino) dprintk("NFS: %s no layout to return\n", __func__); goto out; } - stateid = nfsi->layout->plh_stateid; + nfs4_stateid_copy(&stateid, &nfsi->layout->plh_stateid); /* Reference matched in nfs4_layoutreturn_release */ pnfs_get_layout_hdr(lo); empty = list_empty(&lo->plh_segs); @@ -1024,7 +1026,7 @@ _pnfs_return_layout(struct inode *ino) spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&tmp_list); if (send) - status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); + status = pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true); out_put_layout_hdr: pnfs_put_layout_hdr(lo); out: @@ -1087,7 +1089,7 @@ bool pnfs_roc(struct inode *ino) goto out_noroc; } - stateid = lo->plh_stateid; + nfs4_stateid_copy(&stateid, &lo->plh_stateid); /* always send layoutreturn if being marked so */ if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags)) @@ -1114,7 +1116,7 @@ out_noroc: pnfs_free_lseg_list(&tmp_list); pnfs_layoutcommit_inode(ino, true); if (layoutreturn) - pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); + pnfs_send_layoutreturn(lo, &stateid, IOMODE_ANY, true); return roc; } @@ -1139,6 +1141,7 @@ void pnfs_roc_set_barrier(struct inode *ino, u32 barrier) spin_lock(&ino->i_lock); lo = NFS_I(ino)->layout; + pnfs_mark_layout_returned_if_empty(lo); if (pnfs_seqid_is_newer(barrier, lo->plh_barrier)) lo->plh_barrier = barrier; spin_unlock(&ino->i_lock); @@ -1734,16 +1737,29 @@ out_forget_reply: } static void +pnfs_set_plh_return_iomode(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode) +{ + if (lo->plh_return_iomode == iomode) + return; + if (lo->plh_return_iomode != 0) + iomode = IOMODE_ANY; + lo->plh_return_iomode = iomode; +} + +int pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, struct list_head *tmp_list, - struct pnfs_layout_range *return_range) + const struct pnfs_layout_range *return_range) { struct pnfs_layout_segment *lseg, *next; + int remaining = 0; dprintk("%s:Begin lo %p\n", __func__, lo); if (list_empty(&lo->plh_segs)) - return; + return 0; + + assert_spin_locked(&lo->plh_inode->i_lock); list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) if (should_free_lseg(&lseg->pls_range, return_range)) { @@ -1753,10 +1769,13 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, lseg->pls_range.offset, lseg->pls_range.length); set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); - mark_lseg_invalid(lseg, tmp_list); + pnfs_set_plh_return_iomode(lo, return_range->iomode); + if (!mark_lseg_invalid(lseg, tmp_list)) + remaining++; set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags); } + return remaining; } void pnfs_error_mark_layout_for_return(struct inode *inode, @@ -1769,19 +1788,28 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, .length = NFS4_MAX_UINT64, }; LIST_HEAD(free_me); + bool return_now = false; spin_lock(&inode->i_lock); - if (lo->plh_return_iomode == 0) - lo->plh_return_iomode = range.iomode; - else if (lo->plh_return_iomode != range.iomode) - lo->plh_return_iomode = IOMODE_ANY; + pnfs_set_plh_return_iomode(lo, range.iomode); /* * mark all matching lsegs so that we are sure to have no live * segments at hand when sending layoutreturn. See pnfs_put_lseg() * for how it works. */ - pnfs_mark_matching_lsegs_return(lo, &free_me, &range); - spin_unlock(&inode->i_lock); + if (!pnfs_mark_matching_lsegs_return(lo, &free_me, &range)) { + nfs4_stateid stateid; + enum pnfs_iomode iomode = lo->plh_return_iomode; + + nfs4_stateid_copy(&stateid, &lo->plh_stateid); + return_now = pnfs_prepare_layoutreturn(lo); + spin_unlock(&inode->i_lock); + if (return_now) + pnfs_send_layoutreturn(lo, &stateid, iomode, false); + } else { + spin_unlock(&inode->i_lock); + nfs_commit_inode(inode, 0); + } pnfs_free_lseg_list(&free_me); } EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 6916ff4..9f4e2a4 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -260,11 +260,14 @@ void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, bool update_barrier); int pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, - struct pnfs_layout_range *range, + const struct pnfs_layout_range *range, struct nfs4_state *open_state); int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, struct list_head *tmp_list, - struct pnfs_layout_range *recall_range); + const struct pnfs_layout_range *recall_range); +int pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, + struct list_head *tmp_list, + const struct pnfs_layout_range *recall_range); bool pnfs_roc(struct inode *ino); void pnfs_roc_release(struct inode *ino); void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); @@ -388,6 +391,12 @@ pnfs_get_lseg(struct pnfs_layout_segment *lseg) return lseg; } +static inline bool +pnfs_is_valid_lseg(struct pnfs_layout_segment *lseg) +{ + return test_bit(NFS_LSEG_VALID, &lseg->pls_flags) != 0; +} + /* Return true if a layout driver is being used for this mountpoint */ static inline int pnfs_enabled_sb(struct nfs_server *nfss) { @@ -535,6 +544,26 @@ pnfs_calc_offset_length(u64 offset, u64 end) return 1 + end - offset; } +/** + * pnfs_mark_layout_returned_if_empty - marks the layout as returned + * @lo: layout header + * + * Note: Caller must hold inode->i_lock + */ +static inline void +pnfs_mark_layout_returned_if_empty(struct pnfs_layout_hdr *lo) +{ + if (list_empty(&lo->plh_segs)) + set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); +} + +static inline void +pnfs_copy_range(struct pnfs_layout_range *dst, + const struct pnfs_layout_range *src) +{ + memcpy(dst, src, sizeof(*dst)); +} + extern unsigned int layoutstats_timer; #ifdef NFS_DEBUG diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 24655b8..81ac648 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -266,17 +266,14 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, } else { nfs_retry_commit(mds_pages, NULL, cinfo, 0); pnfs_generic_retry_commit(cinfo, 0); - cinfo->completion_ops->error_cleanup(NFS_I(inode)); return -ENOMEM; } } nreq += pnfs_generic_alloc_ds_commits(cinfo, &list); - if (nreq == 0) { - cinfo->completion_ops->error_cleanup(NFS_I(inode)); + if (nreq == 0) goto out; - } atomic_add(nreq, &cinfo->mds->rpcs_out); @@ -871,6 +868,11 @@ pnfs_layout_mark_request_commit(struct nfs_page *req, buckets = cinfo->ds->buckets; list = &buckets[ds_commit_idx].written; if (list_empty(list)) { + if (!pnfs_is_valid_lseg(lseg)) { + spin_unlock(cinfo->lock); + cinfo->completion_ops->resched_write(cinfo, req); + return; + } /* Non-empty buckets hold a reference on the lseg. That ref * is normally transferred to the COMMIT call and released * there. It could also be released if the last req is pulled diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 4d25423..94828b3 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -21,6 +21,8 @@ #include <linux/nfs_page.h> #include <linux/backing-dev.h> #include <linux/export.h> +#include <linux/freezer.h> +#include <linux/wait.h> #include <asm/uaccess.h> @@ -1155,7 +1157,8 @@ int nfs_flush_incompatible(struct file *file, struct page *page) if (req == NULL) return 0; l_ctx = req->wb_lock_context; - do_flush = req->wb_page != page || req->wb_context != ctx; + do_flush = req->wb_page != page || + !nfs_match_open_context(req->wb_context, ctx); /* for now, flush if more than 1 request in page_group */ do_flush |= req->wb_this_page != req; if (l_ctx && flctx && @@ -1353,9 +1356,15 @@ static void nfs_async_write_error(struct list_head *head) } } +static void nfs_async_write_reschedule_io(struct nfs_pgio_header *hdr) +{ + nfs_async_write_error(&hdr->pages); +} + static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = { .error_cleanup = nfs_async_write_error, .completion = nfs_write_completion, + .reschedule_io = nfs_async_write_reschedule_io, }; void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, @@ -1556,27 +1565,29 @@ static void nfs_writeback_result(struct rpc_task *task, } } +static int nfs_wait_atomic_killable(atomic_t *key) +{ + if (fatal_signal_pending(current)) + return -ERESTARTSYS; + freezable_schedule_unsafe(); + return 0; +} -static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) +static int wait_on_commit(struct nfs_mds_commit_info *cinfo) { - int ret; + return wait_on_atomic_t(&cinfo->rpcs_out, + nfs_wait_atomic_killable, TASK_KILLABLE); +} - if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags)) - return 1; - if (!may_wait) - return 0; - ret = out_of_line_wait_on_bit_lock(&nfsi->flags, - NFS_INO_COMMIT, - nfs_wait_bit_killable, - TASK_KILLABLE); - return (ret < 0) ? ret : 1; +static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo) +{ + atomic_inc(&cinfo->rpcs_out); } -static void nfs_commit_clear_lock(struct nfs_inode *nfsi) +static void nfs_commit_end(struct nfs_mds_commit_info *cinfo) { - clear_bit(NFS_INO_COMMIT, &nfsi->flags); - smp_mb__after_atomic(); - wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); + if (atomic_dec_and_test(&cinfo->rpcs_out)) + wake_up_atomic_t(&cinfo->rpcs_out); } void nfs_commitdata_release(struct nfs_commit_data *data) @@ -1693,6 +1704,13 @@ void nfs_retry_commit(struct list_head *page_list, } EXPORT_SYMBOL_GPL(nfs_retry_commit); +static void +nfs_commit_resched_write(struct nfs_commit_info *cinfo, + struct nfs_page *req) +{ + __set_page_dirty_nobuffers(req->wb_page); +} + /* * Commit dirty pages */ @@ -1714,7 +1732,6 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, data->mds_ops, how, 0); out_bad: nfs_retry_commit(head, NULL, cinfo, 0); - cinfo->completion_ops->error_cleanup(NFS_I(inode)); return -ENOMEM; } @@ -1776,8 +1793,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); nfs_init_cinfo(&cinfo, data->inode, data->dreq); - if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) - nfs_commit_clear_lock(NFS_I(data->inode)); + nfs_commit_end(cinfo.mds); } static void nfs_commit_release(void *calldata) @@ -1796,7 +1812,7 @@ static const struct rpc_call_ops nfs_commit_ops = { static const struct nfs_commit_completion_ops nfs_commit_completion_ops = { .completion = nfs_commit_release_pages, - .error_cleanup = nfs_commit_clear_lock, + .resched_write = nfs_commit_resched_write, }; int nfs_generic_commit_list(struct inode *inode, struct list_head *head, @@ -1815,30 +1831,25 @@ int nfs_commit_inode(struct inode *inode, int how) LIST_HEAD(head); struct nfs_commit_info cinfo; int may_wait = how & FLUSH_SYNC; + int error = 0; int res; - res = nfs_commit_set_lock(NFS_I(inode), may_wait); - if (res <= 0) - goto out_mark_dirty; nfs_init_cinfo_from_inode(&cinfo, inode); + nfs_commit_begin(cinfo.mds); res = nfs_scan_commit(inode, &head, &cinfo); - if (res) { - int error; - + if (res) error = nfs_generic_commit_list(inode, &head, how, &cinfo); - if (error < 0) - return error; - if (!may_wait) - goto out_mark_dirty; - error = wait_on_bit_action(&NFS_I(inode)->flags, - NFS_INO_COMMIT, - nfs_wait_bit_killable, - TASK_KILLABLE); - if (error < 0) - return error; - } else - nfs_commit_clear_lock(NFS_I(inode)); + nfs_commit_end(cinfo.mds); + if (error < 0) + goto out_error; + if (!may_wait) + goto out_mark_dirty; + error = wait_on_commit(cinfo.mds); + if (error < 0) + return error; return res; +out_error: + res = error; /* Note: If we exit without ensuring that the commit is complete, * we must mark the inode as dirty. Otherwise, future calls to * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure @@ -1848,6 +1859,7 @@ out_mark_dirty: __mark_inode_dirty(inode, I_DIRTY_DATASYNC); return res; } +EXPORT_SYMBOL_GPL(nfs_commit_inode); int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) { diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index b88fc46..9eee972 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -216,7 +216,6 @@ struct nfs_inode { #define NFS_INO_FLUSHING (4) /* inode is flushing out data */ #define NFS_INO_FSCACHE (5) /* inode can be cached by FS-Cache */ #define NFS_INO_FSCACHE_LOCK (6) /* FS-Cache cookie management lock */ -#define NFS_INO_COMMIT (7) /* inode is committing unstable writes */ #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit required */ #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit inflight */ #define NFS_INO_LAYOUTSTATS (11) /* layoutstats inflight */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 7b30ac0..791098a 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1421,11 +1421,12 @@ struct nfs_mds_commit_info { struct list_head list; }; +struct nfs_commit_info; struct nfs_commit_data; struct nfs_inode; struct nfs_commit_completion_ops { - void (*error_cleanup) (struct nfs_inode *nfsi); void (*completion) (struct nfs_commit_data *data); + void (*resched_write) (struct nfs_commit_info *, struct nfs_page *); }; struct nfs_commit_info { @@ -1462,6 +1463,7 @@ struct nfs_pgio_completion_ops { void (*error_cleanup)(struct list_head *head); void (*init_hdr)(struct nfs_pgio_header *hdr); void (*completion)(struct nfs_pgio_header *hdr); + void (*reschedule_io)(struct nfs_pgio_header *hdr); }; struct nfs_unlinkdata { |