From 72cff4494ea981202c8db6fd18940c8506f14db4 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 7 Aug 2014 10:12:38 +0800 Subject: nfs/flexclient: export pnfs_layoutcommit_inode flexfiles needs to start layoutcommit when necessary Signed-off-by: Peng Tao --- fs/nfs/pnfs.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 0a5dda4..2d25670 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1966,6 +1966,7 @@ clear_layoutcommitting: pnfs_clear_layoutcommitting(inode); goto out; } +EXPORT_SYMBOL_GPL(pnfs_layoutcommit_inode); struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) { -- cgit v1.1 From abb9a0079c7f06360b83a5dd27ce74b8dc6d01b6 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 22 Aug 2014 17:37:40 +0800 Subject: nfs41: close a small race window when adding new layout to global list Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/pnfs.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 2d25670..fa00b56 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1288,7 +1288,6 @@ pnfs_update_layout(struct inode *ino, struct nfs_client *clp = server->nfs_client; struct pnfs_layout_hdr *lo; struct pnfs_layout_segment *lseg = NULL; - bool first; if (!pnfs_enabled_sb(NFS_SERVER(ino))) goto out; @@ -1321,16 +1320,15 @@ pnfs_update_layout(struct inode *ino, if (pnfs_layoutgets_blocked(lo, 0)) goto out_unlock; atomic_inc(&lo->plh_outstanding); - - first = list_empty(&lo->plh_layouts) ? true : false; spin_unlock(&ino->i_lock); - if (first) { + if (list_empty(&lo->plh_layouts)) { /* The lo must be on the clp list if there is any * chance of a CB_LAYOUTRECALL(FILE) coming in. */ spin_lock(&clp->cl_lock); - list_add_tail(&lo->plh_layouts, &server->layouts); + if (list_empty(&lo->plh_layouts)) + list_add_tail(&lo->plh_layouts, &server->layouts); spin_unlock(&clp->cl_lock); } -- cgit v1.1 From 9bf87482ddc6f8db884177a2a16b1a1dc12f8777 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 22 Aug 2014 17:37:41 +0800 Subject: nfs41: serialize first layoutget of a file Per RFC 5661 Errata 3208: | A client MAY always forget its layout state and associated | layout stateid at any time (See also section 12.5.5.1). | In such case, the client MUST use a non-layout stateid for the next | LAYOUTGET operation. This will signal the server that the client has | no more layouts on the file and its respective layout state can be | released before issuing a new layout in response to LAYOUTGET. In order to make such a signal unique to server, client needs to serialize all layoutgets using non-layout stateid. We implement this by serializing layoutgets when client has no layout segments at hand. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/pnfs.c | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index fa00b56..7e1bac1 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1288,6 +1288,7 @@ pnfs_update_layout(struct inode *ino, struct nfs_client *clp = server->nfs_client; struct pnfs_layout_hdr *lo; struct pnfs_layout_segment *lseg = NULL; + bool first; if (!pnfs_enabled_sb(NFS_SERVER(ino))) goto out; @@ -1295,6 +1296,8 @@ pnfs_update_layout(struct inode *ino, if (pnfs_within_mdsthreshold(ctx, ino, iomode)) goto out; +lookup_again: + first = false; spin_lock(&ino->i_lock); lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); if (lo == NULL) { @@ -1312,10 +1315,27 @@ pnfs_update_layout(struct inode *ino, if (pnfs_layout_io_test_failed(lo, iomode)) goto out_unlock; - /* Check to see if the layout for the given range already exists */ - lseg = pnfs_find_lseg(lo, &arg); - if (lseg) - goto out_unlock; + first = list_empty(&lo->plh_segs); + if (first) { + /* The first layoutget for the file. Need to serialize per + * RFC 5661 Errata 3208. + */ + if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET, + &lo->plh_flags)) { + spin_unlock(&ino->i_lock); + wait_on_bit(&lo->plh_flags, NFS_LAYOUT_FIRST_LAYOUTGET, + TASK_UNINTERRUPTIBLE); + pnfs_put_layout_hdr(lo); + goto lookup_again; + } + } else { + /* Check to see if the layout for the given range + * already exists + */ + lseg = pnfs_find_lseg(lo, &arg); + if (lseg) + goto out_unlock; + } if (pnfs_layoutgets_blocked(lo, 0)) goto out_unlock; @@ -1343,6 +1363,13 @@ pnfs_update_layout(struct inode *ino, lseg = send_layoutget(lo, ctx, &arg, gfp_flags); atomic_dec(&lo->plh_outstanding); out_put_layout_hdr: + if (first) { + unsigned long *bitlock = &lo->plh_flags; + + clear_bit_unlock(NFS_LAYOUT_FIRST_LAYOUTGET, bitlock); + smp_mb__after_atomic(); + wake_up_bit(bitlock, NFS_LAYOUT_FIRST_LAYOUTGET); + } pnfs_put_layout_hdr(lo); out: dprintk("%s: inode %s/%llu pNFS layout segment %s for " -- cgit v1.1 From 4579d6b897ee1b2557517fd536fb17eeb13481ad Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 6 Sep 2014 00:53:21 +0800 Subject: nfs41: pass iomode through layoutreturn args So that it is possible to return a specific iomode layouts. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/pnfs.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 7e1bac1..1b544c1 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -914,6 +914,7 @@ _pnfs_return_layout(struct inode *ino) lrp->args.stateid = stateid; lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; lrp->args.inode = ino; + lrp->args.iomode = IOMODE_ANY; lrp->args.layout = lo; lrp->clp = NFS_SERVER(ino)->nfs_client; lrp->cred = lo->plh_lc_cred; -- cgit v1.1 From f40eb5d044e2eea3f866eeeeb45ca30753773cda Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 6 Sep 2014 00:53:22 +0800 Subject: nfs41: make a helper function to send layoutreturn It allows to specify different iomode to return. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/pnfs.c | 53 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 20 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 1b544c1..1b97209 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -845,6 +845,38 @@ static void pnfs_clear_layoutcommit(struct inode *inode, } } +static int +pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, + enum pnfs_iomode iomode) +{ + struct inode *ino = lo->plh_inode; + struct nfs4_layoutreturn *lrp; + int status = 0; + + lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); + if (unlikely(lrp == NULL)) { + status = -ENOMEM; + spin_lock(&ino->i_lock); + lo->plh_block_lgets--; + spin_unlock(&ino->i_lock); + pnfs_put_layout_hdr(lo); + goto out; + } + + lrp->args.stateid = stateid; + lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; + lrp->args.inode = ino; + lrp->args.iomode = iomode; + lrp->args.layout = lo; + lrp->clp = NFS_SERVER(ino)->nfs_client; + lrp->cred = lo->plh_lc_cred; + + status = nfs4_proc_layoutreturn(lrp); +out: + dprintk("<-- %s status: %d\n", __func__, status); + return status; +} + /* * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr * when the layout segment list is empty. @@ -859,7 +891,6 @@ _pnfs_return_layout(struct inode *ino) struct pnfs_layout_hdr *lo = NULL; struct nfs_inode *nfsi = NFS_I(ino); LIST_HEAD(tmp_list); - struct nfs4_layoutreturn *lrp; nfs4_stateid stateid; int status = 0, empty; @@ -901,25 +932,7 @@ _pnfs_return_layout(struct inode *ino) spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&tmp_list); - lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); - if (unlikely(lrp == NULL)) { - status = -ENOMEM; - spin_lock(&ino->i_lock); - lo->plh_block_lgets--; - spin_unlock(&ino->i_lock); - pnfs_put_layout_hdr(lo); - goto out; - } - - lrp->args.stateid = stateid; - lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; - lrp->args.inode = ino; - lrp->args.iomode = IOMODE_ANY; - lrp->args.layout = lo; - lrp->clp = NFS_SERVER(ino)->nfs_client; - lrp->cred = lo->plh_lc_cred; - - status = nfs4_proc_layoutreturn(lrp); + status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY); out: dprintk("<-- %s status: %d\n", __func__, status); return status; -- cgit v1.1 From 016256df3a7e9eeb3f4dea5ccd0e21a0b63841eb Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 6 Sep 2014 00:53:23 +0800 Subject: nfs41: add a helper to mark layout for return It marks all matching layout segments as NFS_LSEG_LAYOUTRETURN, which is an indicator for pnfs_put_lseg() to send layoutreturn, and also prevents pnfs_update_layout() from using the returning segments. Once it is set, it never gets cleared. It also sets proper io failure bit so that pnfs path can be retried after PNFS_LAYOUTGET_RETRY_TIMEOUT second. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/pnfs.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 1b97209..0bd149b 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1479,6 +1479,61 @@ out_forget_reply: goto out; } +static void +pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, + struct list_head *tmp_list, + struct pnfs_layout_range *return_range) +{ + struct pnfs_layout_segment *lseg, *next; + + dprintk("%s:Begin lo %p\n", __func__, lo); + + if (list_empty(&lo->plh_segs)) + return; + + list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) + if (should_free_lseg(&lseg->pls_range, return_range)) { + dprintk("%s: marking lseg %p iomode %d " + "offset %llu length %llu\n", __func__, + lseg, lseg->pls_range.iomode, + lseg->pls_range.offset, + lseg->pls_range.length); + set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); + mark_lseg_invalid(lseg, tmp_list); + } +} + +void pnfs_error_mark_layout_for_return(struct inode *inode, + struct pnfs_layout_segment *lseg) +{ + struct pnfs_layout_hdr *lo = NFS_I(inode)->layout; + int iomode = pnfs_iomode_to_fail_bit(lseg->pls_range.iomode); + struct pnfs_layout_range range = { + .iomode = lseg->pls_range.iomode, + .offset = 0, + .length = NFS4_MAX_UINT64, + }; + LIST_HEAD(free_me); + + spin_lock(&inode->i_lock); + /* set failure bit so that pnfs path will be retried later */ + pnfs_layout_set_fail_bit(lo, iomode); + set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); + if (lo->plh_return_iomode == 0) + lo->plh_return_iomode = range.iomode; + else if (lo->plh_return_iomode != range.iomode) + lo->plh_return_iomode = IOMODE_ANY; + /* + * mark all matching lsegs so that we are sure to have no live + * segments at hand when sending layoutreturn. See pnfs_put_lseg() + * for how it works. + */ + pnfs_mark_matching_lsegs_return(lo, &free_me, &range); + spin_unlock(&inode->i_lock); + pnfs_free_lseg_list(&free_me); +} +EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return); + void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { -- cgit v1.1 From ce6ab4f238cb76d356229e97e1fefb7192388e13 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 6 Sep 2014 00:53:24 +0800 Subject: nfs41: don't use a layout if it is marked for returning And if we are to return the same type of layouts, don't bother sending more layoutgets. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/pnfs.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 0bd149b..853b544 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -740,25 +740,37 @@ pnfs_layout_stateid_blocked(const struct pnfs_layout_hdr *lo, return !pnfs_seqid_is_newer(seqid, lo->plh_barrier); } +static bool +pnfs_layout_returning(const struct pnfs_layout_hdr *lo, + struct pnfs_layout_range *range) +{ + return test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) && + (lo->plh_return_iomode == IOMODE_ANY || + lo->plh_return_iomode == range->iomode); +} + /* lget is set to 1 if called from inside send_layoutget call chain */ static bool -pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo, int lget) +pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo, + struct pnfs_layout_range *range, int lget) { return lo->plh_block_lgets || test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || (list_empty(&lo->plh_segs) && - (atomic_read(&lo->plh_outstanding) > lget)); + (atomic_read(&lo->plh_outstanding) > lget)) || + pnfs_layout_returning(lo, range); } int pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, + struct pnfs_layout_range *range, struct nfs4_state *open_state) { int status = 0; dprintk("--> %s\n", __func__); spin_lock(&lo->plh_inode->i_lock); - if (pnfs_layoutgets_blocked(lo, 1)) { + if (pnfs_layoutgets_blocked(lo, range, 1)) { status = -EAGAIN; } else if (!nfs4_valid_open_stateid(open_state)) { status = -EBADF; @@ -1192,6 +1204,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, list_for_each_entry(lseg, &lo->plh_segs, pls_list) { if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && + !test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags) && pnfs_lseg_range_match(&lseg->pls_range, range)) { ret = pnfs_get_lseg(lseg); break; @@ -1351,7 +1364,7 @@ lookup_again: goto out_unlock; } - if (pnfs_layoutgets_blocked(lo, 0)) + if (pnfs_layoutgets_blocked(lo, &arg, 0)) goto out_unlock; atomic_inc(&lo->plh_outstanding); spin_unlock(&ino->i_lock); @@ -1432,7 +1445,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) goto out_forget_reply; } - if (pnfs_layoutgets_blocked(lo, 1)) { + if (pnfs_layoutgets_blocked(lo, &lgp->args.range, 1)) { dprintk("%s forget reply due to state\n", __func__); goto out_forget_reply; } -- cgit v1.1 From aa1e0e3a8e3f16ff50a72a8c623d7e1c467383bc Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 6 Sep 2014 00:53:25 +0800 Subject: nfs41: send layoutreturn in last put_lseg If current lseg is the last lseg marked with NFS_LSEG_LAYOUTRETURN, send layoutreturn. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/pnfs.c | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 853b544..e9acfcf 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -50,6 +50,10 @@ static DEFINE_SPINLOCK(pnfs_spinlock); */ static LIST_HEAD(pnfs_modules_tbl); +static int +pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, + enum pnfs_iomode iomode); + /* Return the registered pnfs layout driver module matching given id */ static struct pnfs_layoutdriver_type * find_pnfs_driver_locked(u32 id) @@ -337,6 +341,29 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); } +/* Return true if layoutreturn is needed */ +static bool +pnfs_layout_need_return(struct pnfs_layout_hdr *lo, + struct pnfs_layout_segment *lseg, + nfs4_stateid *stateid, enum pnfs_iomode *iomode) +{ + struct pnfs_layout_segment *s; + + if (!test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) + return false; + + list_for_each_entry(s, &lo->plh_segs, pls_list) + if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) + return false; + + *stateid = lo->plh_stateid; + *iomode = lo->plh_return_iomode; + /* decreased in pnfs_send_layoutreturn() */ + lo->plh_block_lgets++; + lo->plh_return_iomode = 0; + return true; +} + void pnfs_put_lseg(struct pnfs_layout_segment *lseg) { @@ -352,11 +379,20 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) lo = lseg->pls_layout; inode = lo->plh_inode; if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { + bool need_return; + nfs4_stateid stateid; + enum pnfs_iomode iomode; + pnfs_get_layout_hdr(lo); pnfs_layout_remove_lseg(lo, lseg); + need_return = pnfs_layout_need_return(lo, lseg, + &stateid, &iomode); spin_unlock(&inode->i_lock); pnfs_free_lseg(lseg); - pnfs_put_layout_hdr(lo); + if (need_return) + pnfs_send_layoutreturn(lo, stateid, iomode); + else + pnfs_put_layout_hdr(lo); } } EXPORT_SYMBOL_GPL(pnfs_put_lseg); -- cgit v1.1 From e736a5b98c7aa98fe572990caf5fed9593c72a67 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 6 Sep 2014 00:53:26 +0800 Subject: nfs41: clear NFS_LAYOUT_RETURN if layoutreturn is sent or failed to send So that pnfs path is not disabled for ever. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/pnfs.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index e9acfcf..63992c8 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -921,6 +921,11 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, status = nfs4_proc_layoutreturn(lrp); out: + if (status) { + spin_lock(&ino->i_lock); + clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); + spin_unlock(&ino->i_lock); + } dprintk("<-- %s status: %d\n", __func__, status); return status; } -- cgit v1.1 From 180bb5ec06ce3a95dccc751fbf6bf11d3003da98 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Wed, 10 Sep 2014 15:48:01 -0400 Subject: pnfs: release lseg in pnfs_generic_pg_cleanup This is needed to support mirrored writes - the first write can't just trash the lseg, we need to keep it around until all mirrors have written. Signed-off-by: Weston Andros Adamson --- fs/nfs/pnfs.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 63992c8..2da2e77 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1631,6 +1631,16 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, } EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); +void +pnfs_generic_pg_cleanup(struct nfs_pageio_descriptor *desc) +{ + if (desc->pg_lseg) { + pnfs_put_lseg(desc->pg_lseg); + desc->pg_lseg = NULL; + } +} +EXPORT_SYMBOL_GPL(pnfs_generic_pg_cleanup); + /* * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number * of bytes (maximum @req->wb_bytes) that can be coalesced. @@ -1756,11 +1766,9 @@ pnfs_do_write(struct nfs_pageio_descriptor *desc, struct pnfs_layout_segment *lseg = desc->pg_lseg; enum pnfs_try_status trypnfs; - desc->pg_lseg = NULL; trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how); if (trypnfs == PNFS_NOT_ATTEMPTED) pnfs_write_through_mds(desc, hdr); - pnfs_put_lseg(lseg); } static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) @@ -1779,17 +1787,13 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); if (!hdr) { desc->pg_completion_ops->error_cleanup(&desc->pg_list); - pnfs_put_lseg(desc->pg_lseg); - desc->pg_lseg = NULL; return -ENOMEM; } nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); + hdr->lseg = pnfs_get_lseg(desc->pg_lseg); ret = nfs_generic_pgio(desc, hdr); - if (ret != 0) { - pnfs_put_lseg(desc->pg_lseg); - desc->pg_lseg = NULL; - } else + if (!ret) pnfs_do_write(desc, hdr, desc->pg_ioflags); return ret; } @@ -1874,11 +1878,9 @@ pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) struct pnfs_layout_segment *lseg = desc->pg_lseg; enum pnfs_try_status trypnfs; - desc->pg_lseg = NULL; trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg); if (trypnfs == PNFS_NOT_ATTEMPTED) pnfs_read_through_mds(desc, hdr); - pnfs_put_lseg(lseg); } static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) @@ -1897,18 +1899,12 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); if (!hdr) { desc->pg_completion_ops->error_cleanup(&desc->pg_list); - ret = -ENOMEM; - pnfs_put_lseg(desc->pg_lseg); - desc->pg_lseg = NULL; - return ret; + return -ENOMEM; } nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); hdr->lseg = pnfs_get_lseg(desc->pg_lseg); ret = nfs_generic_pgio(desc, hdr); - if (ret != 0) { - pnfs_put_lseg(desc->pg_lseg); - desc->pg_lseg = NULL; - } else + if (!ret) pnfs_do_read(desc, hdr); return ret; } -- cgit v1.1 From a7d42ddb3099727f58366fa006f850a219cce6c8 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Fri, 19 Sep 2014 10:55:07 -0400 Subject: nfs: add mirroring support to pgio layer This patch adds mirrored write support to the pgio layer. The default is to use one mirror, but pgio callers may define callbacks to change this to any value up to the (arbitrarily selected) limit of 16. The basic idea is to break out members of nfs_pageio_descriptor that cannot be shared between mirrored DSes and put them in a new structure. Signed-off-by: Weston Andros Adamson --- fs/nfs/pnfs.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 2da2e77..5f7c422 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1646,8 +1646,8 @@ EXPORT_SYMBOL_GPL(pnfs_generic_pg_cleanup); * of bytes (maximum @req->wb_bytes) that can be coalesced. */ size_t -pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, - struct nfs_page *req) +pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, + struct nfs_page *prev, struct nfs_page *req) { unsigned int size; u64 seg_end, req_start, seg_left; @@ -1729,10 +1729,12 @@ static void pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { - list_splice_tail_init(&hdr->pages, &desc->pg_list); + list_splice_tail_init(&hdr->pages, &mirror->pg_list); nfs_pageio_reset_write_mds(desc); - desc->pg_recoalesce = 1; + mirror->pg_recoalesce = 1; } nfs_pgio_data_destroy(hdr); } @@ -1781,12 +1783,14 @@ EXPORT_SYMBOL_GPL(pnfs_writehdr_free); int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + struct nfs_pgio_header *hdr; int ret; hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); if (!hdr) { - desc->pg_completion_ops->error_cleanup(&desc->pg_list); + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); return -ENOMEM; } nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); @@ -1795,6 +1799,7 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) ret = nfs_generic_pgio(desc, hdr); if (!ret) pnfs_do_write(desc, hdr, desc->pg_ioflags); + return ret; } EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); @@ -1839,10 +1844,13 @@ static void pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + + if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { - list_splice_tail_init(&hdr->pages, &desc->pg_list); + list_splice_tail_init(&hdr->pages, &mirror->pg_list); nfs_pageio_reset_read_mds(desc); - desc->pg_recoalesce = 1; + mirror->pg_recoalesce = 1; } nfs_pgio_data_destroy(hdr); } @@ -1893,12 +1901,14 @@ EXPORT_SYMBOL_GPL(pnfs_readhdr_free); int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + struct nfs_pgio_header *hdr; int ret; hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); if (!hdr) { - desc->pg_completion_ops->error_cleanup(&desc->pg_list); + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); return -ENOMEM; } nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); -- cgit v1.1 From 566f8737630390b743d79e26e4ac855fe2758129 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 10 Oct 2014 23:25:46 +0800 Subject: nfs41: add a debug warning if we destroy an unempty layout So that we can detect the case if some layout segments are still pinned which is surely a bug that we need to fix. Signed-off-by: Peng Tao --- fs/nfs/pnfs.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 5f7c422..e123cfc 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -242,6 +242,8 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) struct inode *inode = lo->plh_inode; if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { + if (!list_empty(&lo->plh_segs)) + WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n"); pnfs_detach_layout_hdr(lo); spin_unlock(&inode->i_lock); pnfs_free_layout_hdr(lo); -- cgit v1.1 From 48d635f14a544c2b3ca870d2c7349b41160496d2 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Mon, 10 Nov 2014 08:35:35 +0800 Subject: nfs: add nfs_pgio_current_mirror helper Let it return current nfs_pgio_mirror in use depending on pg_mirror_count. For read, we always use pg_mirrors[0], so this effectively gives us freedom to use pg_mirror_idx to track the actual mirror to read from through out the IO stack. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/pnfs.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index e123cfc..b822b17 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1731,7 +1731,7 @@ static void pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { - struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { list_splice_tail_init(&hdr->pages, &mirror->pg_list); @@ -1785,7 +1785,7 @@ EXPORT_SYMBOL_GPL(pnfs_writehdr_free); int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) { - struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); struct nfs_pgio_header *hdr; int ret; @@ -1846,8 +1846,7 @@ static void pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { - struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; - + struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { list_splice_tail_init(&hdr->pages, &mirror->pg_list); @@ -1903,7 +1902,7 @@ EXPORT_SYMBOL_GPL(pnfs_readhdr_free); int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) { - struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); struct nfs_pgio_header *hdr; int ret; -- cgit v1.1 From ceb11e13df3e78b450730c615037133c57b90c3b Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Mon, 10 Nov 2014 08:35:38 +0800 Subject: pnfs: allow LD to ask to resend read through pnfs If current IO cannot be completed due to some transient errors, LD may want to ask generic layer to resend the request through pnfs again. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/pnfs.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index b822b17..685af4f 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1880,15 +1880,28 @@ pnfs_try_to_read_data(struct nfs_pgio_header *hdr, return trypnfs; } +/* Resend all requests through pnfs. */ +int pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr) +{ + struct nfs_pageio_descriptor pgio; + + nfs_pageio_init_read(&pgio, hdr->inode, false, hdr->completion_ops); + return nfs_pageio_resend(&pgio, hdr); +} +EXPORT_SYMBOL_GPL(pnfs_read_resend_pnfs); + static void pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; struct pnfs_layout_segment *lseg = desc->pg_lseg; enum pnfs_try_status trypnfs; + int err = 0; trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg); - if (trypnfs == PNFS_NOT_ATTEMPTED) + if (trypnfs == PNFS_TRY_AGAIN) + err = pnfs_read_resend_pnfs(hdr); + if (trypnfs == PNFS_NOT_ATTEMPTED || err) pnfs_read_through_mds(desc, hdr); } -- cgit v1.1 From 15eb67c15342d212b0c8a540b6d6bd2dfad52a63 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Mon, 17 Nov 2014 09:30:36 +0800 Subject: nfs41: add range to layoutreturn args So that callers can specify which range to return. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/pnfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 685af4f..9549b89 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -916,7 +916,9 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, lrp->args.stateid = stateid; lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; lrp->args.inode = ino; - lrp->args.iomode = iomode; + lrp->args.range.iomode = iomode; + lrp->args.range.offset = 0; + lrp->args.range.length = NFS4_MAX_UINT64; lrp->args.layout = lo; lrp->clp = NFS_SERVER(ino)->nfs_client; lrp->cred = lo->plh_lc_cred; -- cgit v1.1 From 6c16605d6ef0dfb2e154119700d58b85c6b4dc71 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Mon, 17 Nov 2014 09:30:40 +0800 Subject: nfs41: allow async version layoutreturn Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/pnfs.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 9549b89..0a0e209 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -52,7 +52,7 @@ static LIST_HEAD(pnfs_modules_tbl); static int pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, - enum pnfs_iomode iomode); + enum pnfs_iomode iomode, bool sync); /* Return the registered pnfs layout driver module matching given id */ static struct pnfs_layoutdriver_type * @@ -392,7 +392,8 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) spin_unlock(&inode->i_lock); pnfs_free_lseg(lseg); if (need_return) - pnfs_send_layoutreturn(lo, stateid, iomode); + pnfs_send_layoutreturn(lo, stateid, iomode, + true); else pnfs_put_layout_hdr(lo); } @@ -897,7 +898,7 @@ static void pnfs_clear_layoutcommit(struct inode *inode, static int pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, - enum pnfs_iomode iomode) + enum pnfs_iomode iomode, bool sync) { struct inode *ino = lo->plh_inode; struct nfs4_layoutreturn *lrp; @@ -923,7 +924,7 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, lrp->clp = NFS_SERVER(ino)->nfs_client; lrp->cred = lo->plh_lc_cred; - status = nfs4_proc_layoutreturn(lrp); + status = nfs4_proc_layoutreturn(lrp, sync); out: if (status) { spin_lock(&ino->i_lock); @@ -989,7 +990,7 @@ _pnfs_return_layout(struct inode *ino) spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&tmp_list); - status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY); + status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); out: dprintk("<-- %s status: %d\n", __func__, status); return status; -- cgit v1.1 From 193e3aa2ccfb5a53acf7a690b80a1e415b74dbd7 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Mon, 17 Nov 2014 09:30:41 +0800 Subject: nfs41: introduce NFS_LAYOUT_RETURN_BEFORE_CLOSE When it is set, generic pnfs would try to send layoutreturn right before last close/delegation_return regard less NFS_LAYOUT_ROC is set or not. LD can then make sure layoutreturn is always sent rather than being omitted. The difference against NFS_LAYOUT_RETURN is that NFS_LAYOUT_RETURN_BEFORE_CLOSE does not block usage of the layout so LD can set it and expect generic layer to try pnfs path at the same time. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/pnfs.c | 40 +++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 0a0e209..d3c2ca7 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -909,6 +909,7 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, status = -ENOMEM; spin_lock(&ino->i_lock); lo->plh_block_lgets--; + rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq); spin_unlock(&ino->i_lock); pnfs_put_layout_hdr(lo); goto out; @@ -926,11 +927,6 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, status = nfs4_proc_layoutreturn(lrp, sync); out: - if (status) { - spin_lock(&ino->i_lock); - clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); - spin_unlock(&ino->i_lock); - } dprintk("<-- %s status: %d\n", __func__, status); return status; } @@ -1028,8 +1024,9 @@ bool pnfs_roc(struct inode *ino) { struct pnfs_layout_hdr *lo; struct pnfs_layout_segment *lseg, *tmp; + nfs4_stateid stateid; LIST_HEAD(tmp_list); - bool found = false; + bool found = false, layoutreturn = false; spin_lock(&ino->i_lock); lo = NFS_I(ino)->layout; @@ -1050,7 +1047,20 @@ bool pnfs_roc(struct inode *ino) return true; out_nolayout: + if (lo) { + stateid = lo->plh_stateid; + layoutreturn = + test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, + &lo->plh_flags); + if (layoutreturn) { + lo->plh_block_lgets++; + pnfs_get_layout_hdr(lo); + } + } spin_unlock(&ino->i_lock); + if (layoutreturn) + pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, 0, + NFS4_MAX_UINT64, true); return false; } @@ -1085,8 +1095,9 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task) struct nfs_inode *nfsi = NFS_I(ino); struct pnfs_layout_hdr *lo; struct pnfs_layout_segment *lseg; + nfs4_stateid stateid; u32 current_seqid; - bool found = false; + bool found = false, layoutreturn = false; spin_lock(&ino->i_lock); list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list) @@ -1103,7 +1114,22 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task) */ *barrier = current_seqid + atomic_read(&lo->plh_outstanding); out: + if (!found) { + stateid = lo->plh_stateid; + layoutreturn = + test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, + &lo->plh_flags); + if (layoutreturn) { + lo->plh_block_lgets++; + pnfs_get_layout_hdr(lo); + } + } spin_unlock(&ino->i_lock); + if (layoutreturn) { + rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); + pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, 0, + NFS4_MAX_UINT64, false); + } return found; } -- cgit v1.1 From 27b6f53987d61822a858b4680c3727bfb19e620a Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Mon, 20 Oct 2014 14:44:38 +0800 Subject: nfs/flexfiles: send layoutreturn before freeing lseg Otherwise we'll lose error tracking information when encoding layoutreturn. pnfs_put_lseg may be called from rpc callbacks. So we should not call pnfs_send_layoutreturn directly because it can deadlock in the rpc layer. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/pnfs.c | 81 +++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 56 insertions(+), 25 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index d3c2ca7..108a619 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -346,8 +346,7 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, /* Return true if layoutreturn is needed */ static bool pnfs_layout_need_return(struct pnfs_layout_hdr *lo, - struct pnfs_layout_segment *lseg, - nfs4_stateid *stateid, enum pnfs_iomode *iomode) + struct pnfs_layout_segment *lseg) { struct pnfs_layout_segment *s; @@ -355,17 +354,54 @@ pnfs_layout_need_return(struct pnfs_layout_hdr *lo, return false; list_for_each_entry(s, &lo->plh_segs, pls_list) - if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) + if (s != lseg && test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags)) return false; - *stateid = lo->plh_stateid; - *iomode = lo->plh_return_iomode; - /* decreased in pnfs_send_layoutreturn() */ - lo->plh_block_lgets++; - lo->plh_return_iomode = 0; return true; } +static void pnfs_layoutreturn_free_lseg(struct work_struct *work) +{ + struct pnfs_layout_segment *lseg; + struct pnfs_layout_hdr *lo; + struct inode *inode; + + lseg = container_of(work, struct pnfs_layout_segment, pls_work); + WARN_ON(atomic_read(&lseg->pls_refcount)); + lo = lseg->pls_layout; + inode = lo->plh_inode; + + spin_lock(&inode->i_lock); + if (pnfs_layout_need_return(lo, lseg)) { + nfs4_stateid stateid; + enum pnfs_iomode iomode; + + stateid = lo->plh_stateid; + iomode = lo->plh_return_iomode; + /* decreased in pnfs_send_layoutreturn() */ + lo->plh_block_lgets++; + lo->plh_return_iomode = 0; + spin_unlock(&inode->i_lock); + + pnfs_send_layoutreturn(lo, stateid, iomode, true); + spin_lock(&inode->i_lock); + } else + /* match pnfs_get_layout_hdr #2 in pnfs_put_lseg */ + pnfs_put_layout_hdr(lo); + pnfs_layout_remove_lseg(lo, lseg); + spin_unlock(&inode->i_lock); + pnfs_free_lseg(lseg); + /* match pnfs_get_layout_hdr #1 in pnfs_put_lseg */ + pnfs_put_layout_hdr(lo); +} + +static void +pnfs_layoutreturn_free_lseg_async(struct pnfs_layout_segment *lseg) +{ + INIT_WORK(&lseg->pls_work, pnfs_layoutreturn_free_lseg); + queue_work(nfsiod_workqueue, &lseg->pls_work); +} + void pnfs_put_lseg(struct pnfs_layout_segment *lseg) { @@ -381,21 +417,18 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) lo = lseg->pls_layout; inode = lo->plh_inode; if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { - bool need_return; - nfs4_stateid stateid; - enum pnfs_iomode iomode; - pnfs_get_layout_hdr(lo); - pnfs_layout_remove_lseg(lo, lseg); - need_return = pnfs_layout_need_return(lo, lseg, - &stateid, &iomode); - spin_unlock(&inode->i_lock); - pnfs_free_lseg(lseg); - if (need_return) - pnfs_send_layoutreturn(lo, stateid, iomode, - true); - else + if (pnfs_layout_need_return(lo, lseg)) { + spin_unlock(&inode->i_lock); + /* hdr reference dropped in nfs4_layoutreturn_release */ + pnfs_get_layout_hdr(lo); + pnfs_layoutreturn_free_lseg_async(lseg); + } else { + pnfs_layout_remove_lseg(lo, lseg); + spin_unlock(&inode->i_lock); + pnfs_free_lseg(lseg); pnfs_put_layout_hdr(lo); + } } } EXPORT_SYMBOL_GPL(pnfs_put_lseg); @@ -1059,8 +1092,7 @@ out_nolayout: } spin_unlock(&ino->i_lock); if (layoutreturn) - pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, 0, - NFS4_MAX_UINT64, true); + pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); return false; } @@ -1127,8 +1159,7 @@ out: spin_unlock(&ino->i_lock); if (layoutreturn) { rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); - pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, 0, - NFS4_MAX_UINT64, false); + pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, false); } return found; } -- cgit v1.1 From c829013dca33110d57c7f625443b716bd7a17671 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Mon, 1 Dec 2014 08:22:18 +0800 Subject: nfs41: add NFS_LAYOUT_RETRY_LAYOUTGET to layout header flags Use it to indicate that LD wants to retry layoutget. LD can set it whenever it wants the common pnfs code to return and retry pnfs path through a new layout. The bit gets cleared when client does a new layoutget, when client closes the file (ROC case), or when kernel needs to evict the inode (non-ROC case). Signed-off-by: Peng Tao --- fs/nfs/pnfs.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 108a619..893f6b5 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -615,6 +615,7 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) pnfs_get_layout_hdr(lo); pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED); pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); + pnfs_clear_retry_layoutget(lo); spin_unlock(&nfsi->vfs_inode.i_lock); pnfs_free_lseg_list(&tmp_list); pnfs_put_layout_hdr(lo); @@ -1066,6 +1067,7 @@ bool pnfs_roc(struct inode *ino) if (!lo || !test_and_clear_bit(NFS_LAYOUT_ROC, &lo->plh_flags) || test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) goto out_nolayout; + pnfs_clear_retry_layoutget(lo); list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { mark_lseg_invalid(lseg, &tmp_list); @@ -1491,6 +1493,7 @@ lookup_again: arg.length = PAGE_CACHE_ALIGN(arg.length); lseg = send_layoutget(lo, ctx, &arg, gfp_flags); + pnfs_clear_retry_layoutget(lo); atomic_dec(&lo->plh_outstanding); out_put_layout_hdr: if (first) { -- cgit v1.1 From aa8a45ee974dfe3ffe290daaf5db457afae56fde Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Mon, 1 Dec 2014 08:22:23 +0800 Subject: nfs41: wait for LAYOUTRETURN before retrying LAYOUTGET Also take care to stop waiting if someone clears retry bit. Signed-off-by: Peng Tao --- fs/nfs/pnfs.c | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 893f6b5..c4c9fe6 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1398,6 +1398,26 @@ static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx, return ret; } +/* stop waiting if someone clears NFS_LAYOUT_RETRY_LAYOUTGET bit. */ +static int pnfs_layoutget_retry_bit_wait(struct wait_bit_key *key) +{ + if (!test_bit(NFS_LAYOUT_RETRY_LAYOUTGET, key->flags)) + return 1; + return nfs_wait_bit_killable(key); +} + +static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo) +{ + /* + * send layoutcommit as it can hold up layoutreturn due to lseg + * reference + */ + pnfs_layoutcommit_inode(lo->plh_inode, false); + return !wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN, + pnfs_layoutget_retry_bit_wait, + TASK_UNINTERRUPTIBLE); +} + /* * Layout segment is retreived from the server if not cached. * The appropriate layout segment is referenced and returned to the caller. @@ -1444,7 +1464,8 @@ lookup_again: } /* if LAYOUTGET already failed once we don't try again */ - if (pnfs_layout_io_test_failed(lo, iomode)) + if (pnfs_layout_io_test_failed(lo, iomode) && + !pnfs_should_retry_layoutget(lo)) goto out_unlock; first = list_empty(&lo->plh_segs); @@ -1469,6 +1490,22 @@ lookup_again: goto out_unlock; } + /* + * Because we free lsegs before sending LAYOUTRETURN, we need to wait + * for LAYOUTRETURN even if first is true. + */ + if (!lseg && pnfs_should_retry_layoutget(lo) && + test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { + spin_unlock(&ino->i_lock); + dprintk("%s wait for layoutreturn\n", __func__); + if (pnfs_prepare_to_retry_layoutget(lo)) { + pnfs_put_layout_hdr(lo); + dprintk("%s retrying\n", __func__); + goto lookup_again; + } + goto out_put_layout_hdr; + } + if (pnfs_layoutgets_blocked(lo, &arg, 0)) goto out_unlock; atomic_inc(&lo->plh_outstanding); -- cgit v1.1 From d67ae825a59d639e4d8b82413af84d854617a87e Mon Sep 17 00:00:00 2001 From: Tom Haynes Date: Thu, 11 Dec 2014 17:02:04 -0500 Subject: pnfs/flexfiles: Add the FlexFile Layout Driver The flexfile layout is a new layout that extends the file layout. It is currently being drafted as a specification at https://datatracker.ietf.org/doc/draft-ietf-nfsv4-layout-types/ Signed-off-by: Weston Andros Adamson Signed-off-by: Tom Haynes Signed-off-by: Tao Peng --- fs/nfs/pnfs.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index c4c9fe6..0fb0f19 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -910,7 +910,9 @@ send_layoutget(struct pnfs_layout_hdr *lo, pnfs_layout_io_set_failed(lo, range->iomode); } return NULL; - } + } else + pnfs_layout_clear_fail_bit(lo, + pnfs_iomode_to_fail_bit(range->iomode)); return lseg; } @@ -930,6 +932,13 @@ static void pnfs_clear_layoutcommit(struct inode *inode, } } +void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) +{ + clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); + smp_mb__after_atomic(); + wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); +} + static int pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, enum pnfs_iomode iomode, bool sync) @@ -943,6 +952,7 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, status = -ENOMEM; spin_lock(&ino->i_lock); lo->plh_block_lgets--; + pnfs_clear_layoutreturn_waitbit(lo); rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq); spin_unlock(&ino->i_lock); pnfs_put_layout_hdr(lo); @@ -1418,6 +1428,15 @@ static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo) TASK_UNINTERRUPTIBLE); } +static void pnfs_clear_first_layoutget(struct pnfs_layout_hdr *lo) +{ + unsigned long *bitlock = &lo->plh_flags; + + clear_bit_unlock(NFS_LAYOUT_FIRST_LAYOUTGET, bitlock); + smp_mb__after_atomic(); + wake_up_bit(bitlock, NFS_LAYOUT_FIRST_LAYOUTGET); +} + /* * Layout segment is retreived from the server if not cached. * The appropriate layout segment is referenced and returned to the caller. @@ -1499,6 +1518,8 @@ lookup_again: spin_unlock(&ino->i_lock); dprintk("%s wait for layoutreturn\n", __func__); if (pnfs_prepare_to_retry_layoutget(lo)) { + if (first) + pnfs_clear_first_layoutget(lo); pnfs_put_layout_hdr(lo); dprintk("%s retrying\n", __func__); goto lookup_again; @@ -1533,13 +1554,8 @@ lookup_again: pnfs_clear_retry_layoutget(lo); atomic_dec(&lo->plh_outstanding); out_put_layout_hdr: - if (first) { - unsigned long *bitlock = &lo->plh_flags; - - clear_bit_unlock(NFS_LAYOUT_FIRST_LAYOUTGET, bitlock); - smp_mb__after_atomic(); - wake_up_bit(bitlock, NFS_LAYOUT_FIRST_LAYOUTGET); - } + if (first) + pnfs_clear_first_layoutget(lo); pnfs_put_layout_hdr(lo); out: dprintk("%s: inode %s/%llu pNFS layout segment %s for " -- cgit v1.1 From cb5d04bc39e914124e811ea55f3034d2379a5f6c Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 24 Jan 2015 22:14:52 +0800 Subject: nfs41: .init_read and .init_write can be called with valid pg_lseg With pgio refactoring in v3.15, .init_read and .init_write can be called with valid pgio->pg_lseg. file layout was fixed at that time by commit c6194271f (pnfs: filelayout: support non page aligned layouts). But the generic helper still needs to be fixed. Cc: stable@vger.kernel.org # 3.15+ Signed-off-by: Peng Tao --- fs/nfs/pnfs.c | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 0fb0f19..c7be9b9 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1711,19 +1711,19 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r { u64 rd_size = req->wb_bytes; - WARN_ON_ONCE(pgio->pg_lseg != NULL); - - if (pgio->pg_dreq == NULL) - rd_size = i_size_read(pgio->pg_inode) - req_offset(req); - else - rd_size = nfs_dreq_bytes_left(pgio->pg_dreq); - - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, - req->wb_context, - req_offset(req), - rd_size, - IOMODE_READ, - GFP_KERNEL); + if (pgio->pg_lseg == NULL) { + if (pgio->pg_dreq == NULL) + rd_size = i_size_read(pgio->pg_inode) - req_offset(req); + else + rd_size = nfs_dreq_bytes_left(pgio->pg_dreq); + + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, + req->wb_context, + req_offset(req), + rd_size, + IOMODE_READ, + GFP_KERNEL); + } /* If no lseg, fall back to read through mds */ if (pgio->pg_lseg == NULL) nfs_pageio_reset_read_mds(pgio); @@ -1735,14 +1735,13 @@ void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req, u64 wb_size) { - WARN_ON_ONCE(pgio->pg_lseg != NULL); - - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, - req->wb_context, - req_offset(req), - wb_size, - IOMODE_RW, - GFP_NOFS); + if (pgio->pg_lseg == NULL) + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, + req->wb_context, + req_offset(req), + wb_size, + IOMODE_RW, + GFP_NOFS); /* If no lseg, fall back to write through mds */ if (pgio->pg_lseg == NULL) nfs_pageio_reset_write_mds(pgio); -- cgit v1.1 From 7c13789e3e6c66dbcaade1760087429240eb3d27 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Fri, 30 Jan 2015 11:01:02 -0500 Subject: pnfs: lookup new lseg at lseg boundary Before mirroring support was added, the pageio descriptor's pg_lseg was set to null when an RPC was sent. Because of this, pg_init was called at lseg boundaries with pg_lseg = NULL, and it could be set to the new lseg. Signed-off-by: Weston Andros Adamson --- fs/nfs/pnfs.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'fs/nfs/pnfs.c') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index c7be9b9..9304984 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1788,10 +1788,16 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, seg_end = end_offset(pgio->pg_lseg->pls_range.offset, pgio->pg_lseg->pls_range.length); req_start = req_offset(req); - WARN_ON_ONCE(req_start > seg_end); + WARN_ON_ONCE(req_start >= seg_end); /* start of request is past the last byte of this segment */ - if (req_start >= seg_end) + if (req_start >= seg_end) { + /* reference the new lseg */ + if (pgio->pg_ops->pg_cleanup) + pgio->pg_ops->pg_cleanup(pgio); + if (pgio->pg_ops->pg_init) + pgio->pg_ops->pg_init(pgio, req); return 0; + } /* adjust 'size' iff there are fewer bytes left in the * segment than what nfs_generic_pg_test returned */ -- cgit v1.1