From 4dfc7fdb9e261d77818616782ae1841a80e0fc83 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Tue, 15 Apr 2014 17:22:59 +0800 Subject: NFS: Fix memroy leak for double mounts When double mounting same nfs filesystem, the devname saved in d_fsdata will be lost.The second mount should not change the devname that be saved in d_fsdata. Signed-off-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/getroot.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 66984a9..b94f804 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -120,7 +120,8 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh, security_d_instantiate(ret, inode); spin_lock(&ret->d_lock); - if (IS_ROOT(ret) && !(ret->d_flags & DCACHE_NFSFS_RENAMED)) { + if (IS_ROOT(ret) && !ret->d_fsdata && + !(ret->d_flags & DCACHE_NFSFS_RENAMED)) { ret->d_fsdata = name; name = NULL; } -- cgit v1.1 From 43b6535e717d2f656f71d9bd16022136b781c934 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 15 Apr 2014 10:07:57 -0400 Subject: NFS: Don't declare inode uptodate unless all attributes were checked Fix a bug, whereby nfs_update_inode() was declaring the inode to be up to date despite not having checked all the attributes. The bug occurs because the temporary variable in which we cache the validity information is 'sanitised' before reapplying to nfsi->cache_validity. Reported-by: Kinglong Mee Cc: stable@vger.kernel.org # 3.5+ Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 0c43897..c79f3e7 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1575,18 +1575,20 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_version = fattr->change_attr; } } else if (server->caps & NFS_CAP_CHANGE_ATTR) - invalid |= save_cache_validity; + nfsi->cache_validity |= save_cache_validity; if (fattr->valid & NFS_ATTR_FATTR_MTIME) { memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); } else if (server->caps & NFS_CAP_MTIME) - invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_ATTR | NFS_INO_REVAL_FORCED); if (fattr->valid & NFS_ATTR_FATTR_CTIME) { memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); } else if (server->caps & NFS_CAP_CTIME) - invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_ATTR | NFS_INO_REVAL_FORCED); /* Check if our cached file size is stale */ @@ -1608,7 +1610,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) (long long)new_isize); } } else - invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_ATTR | NFS_INO_REVAL_PAGECACHE | NFS_INO_REVAL_FORCED); @@ -1616,7 +1619,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if (fattr->valid & NFS_ATTR_FATTR_ATIME) memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); else if (server->caps & NFS_CAP_ATIME) - invalid |= save_cache_validity & (NFS_INO_INVALID_ATIME + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_ATIME | NFS_INO_REVAL_FORCED); if (fattr->valid & NFS_ATTR_FATTR_MODE) { @@ -1627,7 +1631,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; } } else if (server->caps & NFS_CAP_MODE) - invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_REVAL_FORCED); @@ -1638,7 +1643,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_uid = fattr->uid; } } else if (server->caps & NFS_CAP_OWNER) - invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_REVAL_FORCED); @@ -1649,7 +1655,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_gid = fattr->gid; } } else if (server->caps & NFS_CAP_OWNER_GROUP) - invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_REVAL_FORCED); @@ -1662,7 +1669,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) set_nlink(inode, fattr->nlink); } } else if (server->caps & NFS_CAP_NLINK) - invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + nfsi->cache_validity |= save_cache_validity & + (NFS_INO_INVALID_ATTR | NFS_INO_REVAL_FORCED); if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { -- cgit v1.1 From 1f2edbe3fe2111a59fcd1bb3b9725066bc9ed686 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 13 Apr 2014 11:11:31 -0400 Subject: NFS: Don't ignore suid/sgid bit changes after a successful write If we suspect that the server may have cleared the suid/sgid bit, then mark the inode for revalidation. Reported-by: Kinglong Mee Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 9a3b6a4..cd7c651 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1353,6 +1353,30 @@ static const struct rpc_call_ops nfs_write_common_ops = { .rpc_release = nfs_writeback_release_common, }; +/* + * Special version of should_remove_suid() that ignores capabilities. + */ +static int nfs_should_remove_suid(const struct inode *inode) +{ + umode_t mode = inode->i_mode; + int kill = 0; + + /* suid always must be killed */ + if (unlikely(mode & S_ISUID)) + kill = ATTR_KILL_SUID; + + /* + * sgid without any exec bits is just a mandatory locking mark; leave + * it alone. If some exec bits are set, it's a real sgid; kill it. + */ + if (unlikely((mode & S_ISGID) && (mode & S_IXGRP))) + kill |= ATTR_KILL_SGID; + + if (unlikely(kill && S_ISREG(mode))) + return kill; + + return 0; +} /* * This function is called when the WRITE call is complete. @@ -1401,9 +1425,16 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) } } #endif - if (task->tk_status < 0) + if (task->tk_status < 0) { nfs_set_pgio_error(data->header, task->tk_status, argp->offset); - else if (resp->count < argp->count) { + return; + } + + /* Deal with the suid/sgid bit corner case */ + if (nfs_should_remove_suid(inode)) + nfs_mark_for_revalidate(inode); + + if (resp->count < argp->count) { static unsigned long complain; /* This a short write! */ -- cgit v1.1 From 1b33809ea8c671ccbceeaaa8d842631b441bed54 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 21 Apr 2014 10:29:17 -0700 Subject: nfs: commit layouts in fdatasync "fdatasync() is similar to fsync(), but does not flush modified metadata unless that metadata is needed in order to allow a subsequent data retrieval to be correctly handled." We absolutely need to commit the layouts to be able to retrieve the data in case either the client, the server or the storage subsystem go down. Signed-off-by: Christoph Hellwig Signed-off-by: Trond Myklebust --- fs/nfs/nfs4file.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 8de3407..464db9d 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -100,8 +100,7 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) break; mutex_lock(&inode->i_mutex); ret = nfs_file_fsync_commit(file, start, end, datasync); - if (!ret && !datasync) - /* application has asked for meta-data sync */ + if (!ret) ret = pnfs_layoutcommit_inode(inode, true); mutex_unlock(&inode->i_mutex); /* -- cgit v1.1 From a20c93e3160e37ecccc738d8eef085c8507949ed Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Apr 2014 15:07:21 +0200 Subject: nfs: remove ->write_pageio_init from rpc ops The write_pageio_init method is just a very convoluted way to grab the right nfs_pageio_ops vector. The vector to chose is not a choice of protocol version, but just a pNFS vs MDS I/O choice that can simply be done inside nfs_pageio_init_write based on the presence of a layout driver, and a new force_mds flag to the special case of falling back to MDS I/O on a pNFS-capable volume. Signed-off-by: Christoph Hellwig Tested-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 4 ++-- fs/nfs/internal.h | 2 +- fs/nfs/nfs3proc.c | 1 - fs/nfs/nfs4proc.c | 1 - fs/nfs/pnfs.c | 16 +--------------- fs/nfs/pnfs.h | 8 -------- fs/nfs/proc.c | 1 - fs/nfs/write.c | 21 +++++++++++++-------- 8 files changed, 17 insertions(+), 37 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index b8797ae..6a31102 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -564,7 +564,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) dreq->count = 0; get_dreq(dreq); - NFS_PROTO(dreq->inode)->write_pageio_init(&desc, dreq->inode, FLUSH_STABLE, + nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false, &nfs_direct_write_completion_ops); desc.pg_dreq = dreq; @@ -874,7 +874,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, size_t requested_bytes = 0; unsigned long seg; - NFS_PROTO(inode)->write_pageio_init(&desc, inode, FLUSH_COND_STABLE, + nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false, &nfs_direct_write_completion_ops); desc.pg_dreq = dreq; get_dreq(dreq); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index dd8bfc2..8431083 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -422,7 +422,7 @@ int nfs_remount(struct super_block *sb, int *flags, char *raw_data); /* write.c */ extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, - struct inode *inode, int ioflags, + struct inode *inode, int ioflags, bool force_mds, const struct nfs_pgio_completion_ops *compl_ops); extern struct nfs_write_header *nfs_writehdr_alloc(void); extern void nfs_writehdr_free(struct nfs_pgio_header *hdr); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index db60149..e984880 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -951,7 +951,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .read_rpc_prepare = nfs3_proc_read_rpc_prepare, .read_done = nfs3_read_done, .write_setup = nfs3_proc_write_setup, - .write_pageio_init = nfs_pageio_init_write, .write_rpc_prepare = nfs3_proc_write_rpc_prepare, .write_done = nfs3_write_done, .commit_setup = nfs3_proc_commit_setup, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 397be39..8da0c62 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8437,7 +8437,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .read_rpc_prepare = nfs4_proc_read_rpc_prepare, .read_done = nfs4_read_done, .write_setup = nfs4_proc_write_setup, - .write_pageio_init = pnfs_pageio_init_write, .write_rpc_prepare = nfs4_proc_write_rpc_prepare, .write_done = nfs4_write_done, .commit_setup = nfs4_proc_commit_setup, diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index cb53d45..9edac9f 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1447,20 +1447,6 @@ pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, server->rsize, 0); } -void -pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, - int ioflags, - const struct nfs_pgio_completion_ops *compl_ops) -{ - struct nfs_server *server = NFS_SERVER(inode); - struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; - - if (ld == NULL) - nfs_pageio_init_write(pgio, inode, ioflags, compl_ops); - else - nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops, server->wsize, ioflags); -} - bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) @@ -1496,7 +1482,7 @@ int pnfs_write_done_resend_to_mds(struct inode *inode, LIST_HEAD(failed); /* Resend all requests through the MDS */ - nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, compl_ops); + nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, true, compl_ops); pgio.pg_dreq = dreq; while (!list_empty(head)) { struct nfs_page *req = nfs_list_entry(head->next); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 0237939..e9ac8fb 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -182,8 +182,6 @@ void pnfs_put_lseg(struct pnfs_layout_segment *lseg); void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *, const struct nfs_pgio_completion_ops *); -void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, - int, const struct nfs_pgio_completion_ops *); void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); void unset_pnfs_layoutdriver(struct nfs_server *); @@ -467,12 +465,6 @@ static inline void pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, str nfs_pageio_init_read(pgio, inode, compl_ops); } -static inline void pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, - const struct nfs_pgio_completion_ops *compl_ops) -{ - nfs_pageio_init_write(pgio, inode, ioflags, compl_ops); -} - static inline int pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how, struct nfs_commit_info *cinfo) diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index e55ce9e..f9cc295 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -739,7 +739,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .read_rpc_prepare = nfs_proc_read_rpc_prepare, .read_done = nfs_read_done, .write_setup = nfs_proc_write_setup, - .write_pageio_init = nfs_pageio_init_write, .write_rpc_prepare = nfs_proc_write_rpc_prepare, .write_done = nfs_write_done, .commit_setup = nfs_proc_commit_setup, diff --git a/fs/nfs/write.c b/fs/nfs/write.c index cd7c651..ee6d46f 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -354,10 +354,8 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc struct nfs_pageio_descriptor pgio; int err; - NFS_PROTO(page_file_mapping(page)->host)->write_pageio_init(&pgio, - page->mapping->host, - wb_priority(wbc), - &nfs_async_write_completion_ops); + nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc), + false, &nfs_async_write_completion_ops); err = nfs_do_writepage(page, wbc, &pgio); nfs_pageio_complete(&pgio); if (err < 0) @@ -400,7 +398,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); - NFS_PROTO(inode)->write_pageio_init(&pgio, inode, wb_priority(wbc), &nfs_async_write_completion_ops); + nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false, + &nfs_async_write_completion_ops); err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); nfs_pageio_complete(&pgio); @@ -1282,11 +1281,17 @@ static const struct nfs_pageio_ops nfs_pageio_write_ops = { }; void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, - struct inode *inode, int ioflags, + struct inode *inode, int ioflags, bool force_mds, const struct nfs_pgio_completion_ops *compl_ops) { - nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops, - NFS_SERVER(inode)->wsize, ioflags); + struct nfs_server *server = NFS_SERVER(inode); + const struct nfs_pageio_ops *pg_ops = &nfs_pageio_write_ops; + +#ifdef CONFIG_NFS_V4_1 + if (server->pnfs_curr_ld && !force_mds) + pg_ops = server->pnfs_curr_ld->pg_write_ops; +#endif + nfs_pageio_init(pgio, inode, pg_ops, compl_ops, server->wsize, ioflags); } EXPORT_SYMBOL_GPL(nfs_pageio_init_write); -- cgit v1.1 From fab5fc25d230edcc8ee72367e505955a2fae0cac Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Apr 2014 15:07:22 +0200 Subject: nfs: remove ->read_pageio_init from rpc ops The read_pageio_init method is just a very convoluted way to grab the right nfs_pageio_ops vector. The vector to chose is not a choice of protocol version, but just a pNFS vs MDS I/O choice that can simply be done inside nfs_pageio_init_read based on the presence of a layout driver, and a new force_mds flag to the special case of falling back to MDS I/O on a pNFS-capable volume. Signed-off-by: Christoph Hellwig Tested-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 2 +- fs/nfs/internal.h | 2 +- fs/nfs/nfs3proc.c | 1 - fs/nfs/nfs4proc.c | 1 - fs/nfs/pnfs.c | 15 +-------------- fs/nfs/pnfs.h | 9 --------- fs/nfs/proc.c | 1 - fs/nfs/read.c | 19 ++++++++++++++----- 8 files changed, 17 insertions(+), 33 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 6a31102..bbe688e 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -424,7 +424,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, size_t requested_bytes = 0; unsigned long seg; - NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode, + nfs_pageio_init_read(&desc, dreq->inode, false, &nfs_direct_read_completion_ops); get_dreq(dreq); desc.pg_dreq = dreq; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 8431083..98fe618 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -398,7 +398,7 @@ struct nfs_pgio_completion_ops; extern struct nfs_read_header *nfs_readhdr_alloc(void); extern void nfs_readhdr_free(struct nfs_pgio_header *hdr); extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, - struct inode *inode, + struct inode *inode, bool force_mds, const struct nfs_pgio_completion_ops *compl_ops); extern int nfs_initiate_read(struct rpc_clnt *clnt, struct nfs_read_data *data, diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index e984880..d873241 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -947,7 +947,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .pathconf = nfs3_proc_pathconf, .decode_dirent = nfs3_decode_dirent, .read_setup = nfs3_proc_read_setup, - .read_pageio_init = nfs_pageio_init_read, .read_rpc_prepare = nfs3_proc_read_rpc_prepare, .read_done = nfs3_read_done, .write_setup = nfs3_proc_write_setup, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8da0c62..21cd1f2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8433,7 +8433,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .set_capabilities = nfs4_server_capabilities, .decode_dirent = nfs4_decode_dirent, .read_setup = nfs4_proc_read_setup, - .read_pageio_init = pnfs_pageio_init_read, .read_rpc_prepare = nfs4_proc_read_rpc_prepare, .read_done = nfs4_read_done, .write_setup = nfs4_proc_write_setup, diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 9edac9f..3d5bc2b 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1434,19 +1434,6 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, } EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); -void -pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, - const struct nfs_pgio_completion_ops *compl_ops) -{ - struct nfs_server *server = NFS_SERVER(inode); - struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; - - if (ld == NULL) - nfs_pageio_init_read(pgio, inode, compl_ops); - else - nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, server->rsize, 0); -} - bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) @@ -1641,7 +1628,7 @@ int pnfs_read_done_resend_to_mds(struct inode *inode, LIST_HEAD(failed); /* Resend all requests through the MDS */ - nfs_pageio_init_read(&pgio, inode, compl_ops); + nfs_pageio_init_read(&pgio, inode, true, compl_ops); pgio.pg_dreq = dreq; while (!list_empty(head)) { struct nfs_page *req = nfs_list_entry(head->next); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index e9ac8fb..94a9a18 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -180,9 +180,6 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); void pnfs_put_lseg(struct pnfs_layout_segment *lseg); -void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *, - const struct nfs_pgio_completion_ops *); - void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); void unset_pnfs_layoutdriver(struct nfs_server *); void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); @@ -459,12 +456,6 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s) { } -static inline void pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, - const struct nfs_pgio_completion_ops *compl_ops) -{ - nfs_pageio_init_read(pgio, inode, compl_ops); -} - static inline int pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how, struct nfs_commit_info *cinfo) diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index f9cc295..8cc227f 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -735,7 +735,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .pathconf = nfs_proc_pathconf, .decode_dirent = nfs2_decode_dirent, .read_setup = nfs_proc_read_setup, - .read_pageio_init = nfs_pageio_init_read, .read_rpc_prepare = nfs_proc_read_rpc_prepare, .read_done = nfs_read_done, .write_setup = nfs_proc_write_setup, diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 411aedd..7f87461 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -24,6 +24,7 @@ #include "internal.h" #include "iostat.h" #include "fscache.h" +#include "pnfs.h" #define NFSDBG_FACILITY NFSDBG_PAGECACHE @@ -114,11 +115,17 @@ int nfs_return_empty_page(struct page *page) } void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, - struct inode *inode, + struct inode *inode, bool force_mds, const struct nfs_pgio_completion_ops *compl_ops) { - nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops, - NFS_SERVER(inode)->rsize, 0); + struct nfs_server *server = NFS_SERVER(inode); + const struct nfs_pageio_ops *pg_ops = &nfs_pageio_read_ops; + +#ifdef CONFIG_NFS_V4_1 + if (server->pnfs_curr_ld && !force_mds) + pg_ops = server->pnfs_curr_ld->pg_read_ops; +#endif + nfs_pageio_init(pgio, inode, pg_ops, compl_ops, server->rsize, 0); } EXPORT_SYMBOL_GPL(nfs_pageio_init_read); @@ -147,7 +154,8 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, if (len < PAGE_CACHE_SIZE) zero_user_segment(page, len, PAGE_CACHE_SIZE); - NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops); + nfs_pageio_init_read(&pgio, inode, false, + &nfs_async_read_completion_ops); nfs_pageio_add_request(&pgio, new); nfs_pageio_complete(&pgio); NFS_I(inode)->read_io += pgio.pg_bytes_written; @@ -654,7 +662,8 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, if (ret == 0) goto read_complete; /* all pages were read */ - NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops); + nfs_pageio_init_read(&pgio, inode, false, + &nfs_async_read_completion_ops); ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); -- cgit v1.1 From 3c6b899c49e5e9c2803b59ee553eddaf69cea7f6 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:24 -0400 Subject: NFS: Create a common argument structure for reads and writes Reads and writes have very similar arguments. This patch combines them together and documents the few fields used only by write. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs2xdr.c | 8 ++++---- fs/nfs/nfs3xdr.c | 8 ++++---- fs/nfs/nfs4proc.c | 4 ++-- fs/nfs/nfs4xdr.c | 10 ++++++---- fs/nfs/read.c | 2 +- fs/nfs/write.c | 2 +- 6 files changed, 18 insertions(+), 16 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 62db136..461cd8b 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -613,7 +613,7 @@ static void nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req, * }; */ static void encode_readargs(struct xdr_stream *xdr, - const struct nfs_readargs *args) + const struct nfs_pgio_args *args) { u32 offset = args->offset; u32 count = args->count; @@ -629,7 +629,7 @@ static void encode_readargs(struct xdr_stream *xdr, static void nfs2_xdr_enc_readargs(struct rpc_rqst *req, struct xdr_stream *xdr, - const struct nfs_readargs *args) + const struct nfs_pgio_args *args) { encode_readargs(xdr, args); prepare_reply_buffer(req, args->pages, args->pgbase, @@ -649,7 +649,7 @@ static void nfs2_xdr_enc_readargs(struct rpc_rqst *req, * }; */ static void encode_writeargs(struct xdr_stream *xdr, - const struct nfs_writeargs *args) + const struct nfs_pgio_args *args) { u32 offset = args->offset; u32 count = args->count; @@ -669,7 +669,7 @@ static void encode_writeargs(struct xdr_stream *xdr, static void nfs2_xdr_enc_writeargs(struct rpc_rqst *req, struct xdr_stream *xdr, - const struct nfs_writeargs *args) + const struct nfs_pgio_args *args) { encode_writeargs(xdr, args); xdr->buf->flags |= XDRBUF_WRITE; diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index fa6d721..02f16c2 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -953,7 +953,7 @@ static void nfs3_xdr_enc_readlink3args(struct rpc_rqst *req, * }; */ static void encode_read3args(struct xdr_stream *xdr, - const struct nfs_readargs *args) + const struct nfs_pgio_args *args) { __be32 *p; @@ -966,7 +966,7 @@ static void encode_read3args(struct xdr_stream *xdr, static void nfs3_xdr_enc_read3args(struct rpc_rqst *req, struct xdr_stream *xdr, - const struct nfs_readargs *args) + const struct nfs_pgio_args *args) { encode_read3args(xdr, args); prepare_reply_buffer(req, args->pages, args->pgbase, @@ -992,7 +992,7 @@ static void nfs3_xdr_enc_read3args(struct rpc_rqst *req, * }; */ static void encode_write3args(struct xdr_stream *xdr, - const struct nfs_writeargs *args) + const struct nfs_pgio_args *args) { __be32 *p; @@ -1008,7 +1008,7 @@ static void encode_write3args(struct xdr_stream *xdr, static void nfs3_xdr_enc_write3args(struct rpc_rqst *req, struct xdr_stream *xdr, - const struct nfs_writeargs *args) + const struct nfs_pgio_args *args) { encode_write3args(xdr, args); xdr->buf->flags |= XDRBUF_WRITE; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 21cd1f2..4794ca6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4055,7 +4055,7 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) } static bool nfs4_read_stateid_changed(struct rpc_task *task, - struct nfs_readargs *args) + struct nfs_pgio_args *args) { if (!nfs4_error_stateid_expired(task->tk_status) || @@ -4121,7 +4121,7 @@ static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data } static bool nfs4_write_stateid_changed(struct rpc_task *task, - struct nfs_writeargs *args) + struct nfs_pgio_args *args) { if (!nfs4_error_stateid_expired(task->tk_status) || diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 73ce8d4..032159c 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1556,7 +1556,8 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr) encode_op_hdr(xdr, OP_PUTROOTFH, decode_putrootfh_maxsz, hdr); } -static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr) +static void encode_read(struct xdr_stream *xdr, const struct nfs_pgio_args *args, + struct compound_hdr *hdr) { __be32 *p; @@ -1701,7 +1702,8 @@ static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4 encode_nfs4_verifier(xdr, &arg->confirm); } -static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr) +static void encode_write(struct xdr_stream *xdr, const struct nfs_pgio_args *args, + struct compound_hdr *hdr) { __be32 *p; @@ -2451,7 +2453,7 @@ static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr, * Encode a READ request */ static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr, - struct nfs_readargs *args) + struct nfs_pgio_args *args) { struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), @@ -2513,7 +2515,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, * Encode a WRITE request */ static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr, - struct nfs_writeargs *args) + struct nfs_pgio_args *args) { struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 7f87461..46d5552 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -470,7 +470,7 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data) { - struct nfs_readargs *argp = &data->args; + struct nfs_pgio_args *argp = &data->args; struct nfs_readres *resp = &data->res; /* This is a short read! */ diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ee6d46f..25ba383 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1388,7 +1388,7 @@ static int nfs_should_remove_suid(const struct inode *inode) */ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) { - struct nfs_writeargs *argp = &data->args; + struct nfs_pgio_args *argp = &data->args; struct nfs_writeres *resp = &data->res; struct inode *inode = data->header->inode; int status; -- cgit v1.1 From 9137bdf3d241fc2cbeb2a8ced51d1546150aa6a1 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:25 -0400 Subject: NFS: Create a common results structure for reads and writes Reads and writes have very similar results. This patch combines the two structs together with comments to show where the differing fields are used. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs2xdr.c | 6 +++--- fs/nfs/nfs3xdr.c | 8 ++++---- fs/nfs/nfs4xdr.c | 9 +++++---- fs/nfs/read.c | 2 +- fs/nfs/write.c | 2 +- 5 files changed, 14 insertions(+), 13 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 461cd8b..5f61b83 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -103,7 +103,7 @@ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) /* * typedef opaque nfsdata<>; */ -static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_readres *result) +static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_pgio_res *result) { u32 recvd, count; __be32 *p; @@ -857,7 +857,7 @@ out_default: * }; */ static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr, - struct nfs_readres *result) + struct nfs_pgio_res *result) { enum nfs_stat status; int error; @@ -878,7 +878,7 @@ out_default: } static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr, - struct nfs_writeres *result) + struct nfs_pgio_res *result) { /* All NFSv2 writes are "file sync" writes */ result->verf->committed = NFS_FILE_SYNC; diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 02f16c2..8f4cbe7 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -1589,7 +1589,7 @@ out_default: * }; */ static int decode_read3resok(struct xdr_stream *xdr, - struct nfs_readres *result) + struct nfs_pgio_res *result) { u32 eof, count, ocount, recvd; __be32 *p; @@ -1625,7 +1625,7 @@ out_overflow: } static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr, - struct nfs_readres *result) + struct nfs_pgio_res *result) { enum nfs_stat status; int error; @@ -1673,7 +1673,7 @@ out_status: * }; */ static int decode_write3resok(struct xdr_stream *xdr, - struct nfs_writeres *result) + struct nfs_pgio_res *result) { __be32 *p; @@ -1697,7 +1697,7 @@ out_eio: } static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr, - struct nfs_writeres *result) + struct nfs_pgio_res *result) { enum nfs_stat status; int error; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 032159c..939ae60 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5087,7 +5087,8 @@ static int decode_putrootfh(struct xdr_stream *xdr) return decode_op_hdr(xdr, OP_PUTROOTFH); } -static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_readres *res) +static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, + struct nfs_pgio_res *res) { __be32 *p; uint32_t count, eof, recvd; @@ -5341,7 +5342,7 @@ static int decode_setclientid_confirm(struct xdr_stream *xdr) return decode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM); } -static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res) +static int decode_write(struct xdr_stream *xdr, struct nfs_pgio_res *res) { __be32 *p; int status; @@ -6638,7 +6639,7 @@ out: * Decode Read response */ static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, struct xdr_stream *xdr, - struct nfs_readres *res) + struct nfs_pgio_res *res) { struct compound_hdr hdr; int status; @@ -6663,7 +6664,7 @@ out: * Decode WRITE response */ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr, - struct nfs_writeres *res) + struct nfs_pgio_res *res) { struct compound_hdr hdr; int status; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 46d5552..473bba3 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -471,7 +471,7 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data) { struct nfs_pgio_args *argp = &data->args; - struct nfs_readres *resp = &data->res; + struct nfs_pgio_res *resp = &data->res; /* This is a short read! */ nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 25ba383..d392a70 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1389,7 +1389,7 @@ static int nfs_should_remove_suid(const struct inode *inode) void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) { struct nfs_pgio_args *argp = &data->args; - struct nfs_writeres *resp = &data->res; + struct nfs_pgio_res *resp = &data->res; struct inode *inode = data->header->inode; int status; -- cgit v1.1 From 9c7e1b3d50b56b8d8f6237ed232350b7c6476cd5 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:26 -0400 Subject: NFS: Create a common read and write data struct At this point, the only difference between nfs_read_data and nfs_write_data is the write verifier. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 22 +++++++++++----------- fs/nfs/internal.h | 10 +++++----- fs/nfs/nfs3proc.c | 12 ++++++------ fs/nfs/nfs4_fs.h | 4 ++-- fs/nfs/nfs4filelayout.c | 34 +++++++++++++++++----------------- fs/nfs/nfs4proc.c | 30 +++++++++++++++--------------- fs/nfs/nfs4trace.h | 8 ++++---- fs/nfs/objlayout/objio_osd.c | 6 +++--- fs/nfs/objlayout/objlayout.c | 16 ++++++++-------- fs/nfs/objlayout/objlayout.h | 8 ++++---- fs/nfs/pnfs.c | 26 +++++++++++++------------- fs/nfs/pnfs.h | 10 +++++----- fs/nfs/proc.c | 12 ++++++------ fs/nfs/read.c | 32 ++++++++++++++++---------------- fs/nfs/write.c | 36 ++++++++++++++++++------------------ 15 files changed, 133 insertions(+), 133 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 65d849b..206cc68 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -210,7 +210,7 @@ static void bl_end_io_read(struct bio *bio, int err) SetPageUptodate(bvec->bv_page); if (err) { - struct nfs_read_data *rdata = par->data; + struct nfs_pgio_data *rdata = par->data; struct nfs_pgio_header *header = rdata->header; if (!header->pnfs_error) @@ -224,17 +224,17 @@ static void bl_end_io_read(struct bio *bio, int err) static void bl_read_cleanup(struct work_struct *work) { struct rpc_task *task; - struct nfs_read_data *rdata; + struct nfs_pgio_data *rdata; dprintk("%s enter\n", __func__); task = container_of(work, struct rpc_task, u.tk_work); - rdata = container_of(task, struct nfs_read_data, task); + rdata = container_of(task, struct nfs_pgio_data, task); pnfs_ld_read_done(rdata); } static void bl_end_par_io_read(void *data, int unused) { - struct nfs_read_data *rdata = data; + struct nfs_pgio_data *rdata = data; rdata->task.tk_status = rdata->header->pnfs_error; INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup); @@ -242,7 +242,7 @@ bl_end_par_io_read(void *data, int unused) } static enum pnfs_try_status -bl_read_pagelist(struct nfs_read_data *rdata) +bl_read_pagelist(struct nfs_pgio_data *rdata) { struct nfs_pgio_header *header = rdata->header; int i, hole; @@ -390,7 +390,7 @@ static void bl_end_io_write_zero(struct bio *bio, int err) } if (unlikely(err)) { - struct nfs_write_data *data = par->data; + struct nfs_pgio_data *data = par->data; struct nfs_pgio_header *header = data->header; if (!header->pnfs_error) @@ -405,7 +405,7 @@ static void bl_end_io_write(struct bio *bio, int err) { struct parallel_io *par = bio->bi_private; const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct nfs_write_data *data = par->data; + struct nfs_pgio_data *data = par->data; struct nfs_pgio_header *header = data->header; if (!uptodate) { @@ -423,10 +423,10 @@ static void bl_end_io_write(struct bio *bio, int err) static void bl_write_cleanup(struct work_struct *work) { struct rpc_task *task; - struct nfs_write_data *wdata; + struct nfs_pgio_data *wdata; dprintk("%s enter\n", __func__); task = container_of(work, struct rpc_task, u.tk_work); - wdata = container_of(task, struct nfs_write_data, task); + wdata = container_of(task, struct nfs_pgio_data, task); if (likely(!wdata->header->pnfs_error)) { /* Marks for LAYOUTCOMMIT */ mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg), @@ -438,7 +438,7 @@ static void bl_write_cleanup(struct work_struct *work) /* Called when last of bios associated with a bl_write_pagelist call finishes */ static void bl_end_par_io_write(void *data, int num_se) { - struct nfs_write_data *wdata = data; + struct nfs_pgio_data *wdata = data; if (unlikely(wdata->header->pnfs_error)) { bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval, @@ -673,7 +673,7 @@ check_page: } static enum pnfs_try_status -bl_write_pagelist(struct nfs_write_data *wdata, int sync) +bl_write_pagelist(struct nfs_pgio_data *wdata, int sync) { struct nfs_pgio_header *header = wdata->header; int i, ret, npg_zero, pg_index, last = 0; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 98fe618..af01b80 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -401,13 +401,13 @@ extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, bool force_mds, const struct nfs_pgio_completion_ops *compl_ops); extern int nfs_initiate_read(struct rpc_clnt *clnt, - struct nfs_read_data *data, + struct nfs_pgio_data *data, const struct rpc_call_ops *call_ops, int flags); extern void nfs_read_prepare(struct rpc_task *task, void *calldata); extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr); extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); -extern void nfs_readdata_release(struct nfs_read_data *rdata); +extern void nfs_readdata_release(struct nfs_pgio_data *rdata); /* super.c */ void nfs_clone_super(struct super_block *, struct nfs_mount_info *); @@ -429,10 +429,10 @@ extern void nfs_writehdr_free(struct nfs_pgio_header *hdr); extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr); extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); -extern void nfs_writedata_release(struct nfs_write_data *wdata); +extern void nfs_writedata_release(struct nfs_pgio_data *wdata); extern void nfs_commit_free(struct nfs_commit_data *p); extern int nfs_initiate_write(struct rpc_clnt *clnt, - struct nfs_write_data *data, + struct nfs_pgio_data *data, const struct rpc_call_ops *call_ops, int how, int flags); extern void nfs_write_prepare(struct rpc_task *task, void *calldata); @@ -492,7 +492,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode) extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); /* nfs4proc.c */ -extern void __nfs4_read_done_cb(struct nfs_read_data *); +extern void __nfs4_read_done_cb(struct nfs_pgio_data *); extern struct nfs_client *nfs4_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, const char *ip_addr); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index d873241..d235369 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -795,7 +795,7 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return status; } -static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data) +static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_data *data) { struct inode *inode = data->header->inode; @@ -807,18 +807,18 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data) return 0; } -static void nfs3_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) +static void nfs3_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) { msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ]; } -static int nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) +static int nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) { rpc_call_start(task); return 0; } -static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) +static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_data *data) { struct inode *inode = data->header->inode; @@ -829,12 +829,12 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) return 0; } -static void nfs3_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) +static void nfs3_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) { msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; } -static int nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) +static int nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) { rpc_call_start(task); return 0; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index e1d1bad..f63cb87 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -337,7 +337,7 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode, */ static inline void nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, - struct rpc_message *msg, struct nfs_write_data *wdata) + struct rpc_message *msg, struct nfs_pgio_data *wdata) { if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) && !test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags)) @@ -369,7 +369,7 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_flags, static inline void nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, - struct rpc_message *msg, struct nfs_write_data *wdata) + struct rpc_message *msg, struct nfs_pgio_data *wdata) { } #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index b9a35c0..e693614 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -84,7 +84,7 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset) BUG(); } -static void filelayout_reset_write(struct nfs_write_data *data) +static void filelayout_reset_write(struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; struct rpc_task *task = &data->task; @@ -105,7 +105,7 @@ static void filelayout_reset_write(struct nfs_write_data *data) } } -static void filelayout_reset_read(struct nfs_read_data *data) +static void filelayout_reset_read(struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; struct rpc_task *task = &data->task; @@ -243,7 +243,7 @@ wait_on_recovery: /* NFS_PROTO call done callback routines */ static int filelayout_read_done_cb(struct rpc_task *task, - struct nfs_read_data *data) + struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; int err; @@ -270,7 +270,7 @@ static int filelayout_read_done_cb(struct rpc_task *task, * rfc5661 is not clear about which credential should be used. */ static void -filelayout_set_layoutcommit(struct nfs_write_data *wdata) +filelayout_set_layoutcommit(struct nfs_pgio_data *wdata) { struct nfs_pgio_header *hdr = wdata->header; @@ -305,7 +305,7 @@ filelayout_reset_to_mds(struct pnfs_layout_segment *lseg) */ static void filelayout_read_prepare(struct rpc_task *task, void *data) { - struct nfs_read_data *rdata = data; + struct nfs_pgio_data *rdata = data; if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) { rpc_exit(task, -EIO); @@ -317,7 +317,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data) rpc_exit(task, 0); return; } - rdata->read_done_cb = filelayout_read_done_cb; + rdata->pgio_done_cb = filelayout_read_done_cb; if (nfs41_setup_sequence(rdata->ds_clp->cl_session, &rdata->args.seq_args, @@ -331,7 +331,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data) static void filelayout_read_call_done(struct rpc_task *task, void *data) { - struct nfs_read_data *rdata = data; + struct nfs_pgio_data *rdata = data; dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); @@ -347,14 +347,14 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data) static void filelayout_read_count_stats(struct rpc_task *task, void *data) { - struct nfs_read_data *rdata = data; + struct nfs_pgio_data *rdata = data; rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics); } static void filelayout_read_release(void *data) { - struct nfs_read_data *rdata = data; + struct nfs_pgio_data *rdata = data; struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout; filelayout_fenceme(lo->plh_inode, lo); @@ -363,7 +363,7 @@ static void filelayout_read_release(void *data) } static int filelayout_write_done_cb(struct rpc_task *task, - struct nfs_write_data *data) + struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; int err; @@ -419,7 +419,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task, static void filelayout_write_prepare(struct rpc_task *task, void *data) { - struct nfs_write_data *wdata = data; + struct nfs_pgio_data *wdata = data; if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) { rpc_exit(task, -EIO); @@ -443,7 +443,7 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data) static void filelayout_write_call_done(struct rpc_task *task, void *data) { - struct nfs_write_data *wdata = data; + struct nfs_pgio_data *wdata = data; if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) && task->tk_status == 0) { @@ -457,14 +457,14 @@ static void filelayout_write_call_done(struct rpc_task *task, void *data) static void filelayout_write_count_stats(struct rpc_task *task, void *data) { - struct nfs_write_data *wdata = data; + struct nfs_pgio_data *wdata = data; rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics); } static void filelayout_write_release(void *data) { - struct nfs_write_data *wdata = data; + struct nfs_pgio_data *wdata = data; struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout; filelayout_fenceme(lo->plh_inode, lo); @@ -529,7 +529,7 @@ static const struct rpc_call_ops filelayout_commit_call_ops = { }; static enum pnfs_try_status -filelayout_read_pagelist(struct nfs_read_data *data) +filelayout_read_pagelist(struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; struct pnfs_layout_segment *lseg = hdr->lseg; @@ -575,7 +575,7 @@ filelayout_read_pagelist(struct nfs_read_data *data) /* Perform async writes. */ static enum pnfs_try_status -filelayout_write_pagelist(struct nfs_write_data *data, int sync) +filelayout_write_pagelist(struct nfs_pgio_data *data, int sync) { struct nfs_pgio_header *hdr = data->header; struct pnfs_layout_segment *lseg = hdr->lseg; @@ -600,7 +600,7 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) __func__, hdr->inode->i_ino, sync, (size_t) data->args.count, offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); - data->write_done_cb = filelayout_write_done_cb; + data->pgio_done_cb = filelayout_write_done_cb; atomic_inc(&ds->ds_clp->cl_count); data->ds_clp = ds->ds_clp; fh = nfs4_fl_select_ds_fh(lseg, j); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4794ca6..e793aa9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4033,12 +4033,12 @@ static bool nfs4_error_stateid_expired(int err) return false; } -void __nfs4_read_done_cb(struct nfs_read_data *data) +void __nfs4_read_done_cb(struct nfs_pgio_data *data) { nfs_invalidate_atime(data->header->inode); } -static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) +static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_data *data) { struct nfs_server *server = NFS_SERVER(data->header->inode); @@ -4068,7 +4068,7 @@ static bool nfs4_read_stateid_changed(struct rpc_task *task, return true; } -static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) +static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_data *data) { dprintk("--> %s\n", __func__); @@ -4077,19 +4077,19 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) return -EAGAIN; if (nfs4_read_stateid_changed(task, &data->args)) return -EAGAIN; - return data->read_done_cb ? data->read_done_cb(task, data) : + return data->pgio_done_cb ? data->pgio_done_cb(task, data) : nfs4_read_done_cb(task, data); } -static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) +static void nfs4_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) { data->timestamp = jiffies; - data->read_done_cb = nfs4_read_done_cb; + data->pgio_done_cb = nfs4_read_done_cb; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); } -static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) +static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) { if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), &data->args.seq_args, @@ -4104,7 +4104,7 @@ static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_dat return 0; } -static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data) +static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_pgio_data *data) { struct inode *inode = data->header->inode; @@ -4134,18 +4134,18 @@ static bool nfs4_write_stateid_changed(struct rpc_task *task, return true; } -static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) +static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_data *data) { if (!nfs4_sequence_done(task, &data->res.seq_res)) return -EAGAIN; if (nfs4_write_stateid_changed(task, &data->args)) return -EAGAIN; - return data->write_done_cb ? data->write_done_cb(task, data) : + return data->pgio_done_cb ? data->pgio_done_cb(task, data) : nfs4_write_done_cb(task, data); } static -bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data) +bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data) { const struct nfs_pgio_header *hdr = data->header; @@ -4158,7 +4158,7 @@ bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data) return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0; } -static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) +static void nfs4_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) { struct nfs_server *server = NFS_SERVER(data->header->inode); @@ -4168,8 +4168,8 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag } else data->args.bitmask = server->cache_consistency_bitmask; - if (!data->write_done_cb) - data->write_done_cb = nfs4_write_done_cb; + if (!data->pgio_done_cb) + data->pgio_done_cb = nfs4_write_done_cb; data->res.server = server; data->timestamp = jiffies; @@ -4177,7 +4177,7 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); } -static int nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) +static int nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) { if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), &data->args.seq_args, diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 849cf14..0a744f3 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -932,7 +932,7 @@ DEFINE_NFS4_IDMAP_EVENT(nfs4_map_gid_to_group); DECLARE_EVENT_CLASS(nfs4_read_event, TP_PROTO( - const struct nfs_read_data *data, + const struct nfs_pgio_data *data, int error ), @@ -972,7 +972,7 @@ DECLARE_EVENT_CLASS(nfs4_read_event, #define DEFINE_NFS4_READ_EVENT(name) \ DEFINE_EVENT(nfs4_read_event, name, \ TP_PROTO( \ - const struct nfs_read_data *data, \ + const struct nfs_pgio_data *data, \ int error \ ), \ TP_ARGS(data, error)) @@ -983,7 +983,7 @@ DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read); DECLARE_EVENT_CLASS(nfs4_write_event, TP_PROTO( - const struct nfs_write_data *data, + const struct nfs_pgio_data *data, int error ), @@ -1024,7 +1024,7 @@ DECLARE_EVENT_CLASS(nfs4_write_event, #define DEFINE_NFS4_WRITE_EVENT(name) \ DEFINE_EVENT(nfs4_write_event, name, \ TP_PROTO( \ - const struct nfs_write_data *data, \ + const struct nfs_pgio_data *data, \ int error \ ), \ TP_ARGS(data, error)) diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 5457745..426b366 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -439,7 +439,7 @@ static void _read_done(struct ore_io_state *ios, void *private) objlayout_read_done(&objios->oir, status, objios->sync); } -int objio_read_pagelist(struct nfs_read_data *rdata) +int objio_read_pagelist(struct nfs_pgio_data *rdata) { struct nfs_pgio_header *hdr = rdata->header; struct objio_state *objios; @@ -487,7 +487,7 @@ static void _write_done(struct ore_io_state *ios, void *private) static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) { struct objio_state *objios = priv; - struct nfs_write_data *wdata = objios->oir.rpcdata; + struct nfs_pgio_data *wdata = objios->oir.rpcdata; struct address_space *mapping = wdata->header->inode->i_mapping; pgoff_t index = offset / PAGE_SIZE; struct page *page; @@ -531,7 +531,7 @@ static const struct _ore_r4w_op _r4w_op = { .put_page = &__r4w_put_page, }; -int objio_write_pagelist(struct nfs_write_data *wdata, int how) +int objio_write_pagelist(struct nfs_pgio_data *wdata, int how) { struct nfs_pgio_header *hdr = wdata->header; struct objio_state *objios; diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index e4f9cbf..2f955f6 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -229,11 +229,11 @@ objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index, static void _rpc_read_complete(struct work_struct *work) { struct rpc_task *task; - struct nfs_read_data *rdata; + struct nfs_pgio_data *rdata; dprintk("%s enter\n", __func__); task = container_of(work, struct rpc_task, u.tk_work); - rdata = container_of(task, struct nfs_read_data, task); + rdata = container_of(task, struct nfs_pgio_data, task); pnfs_ld_read_done(rdata); } @@ -241,7 +241,7 @@ static void _rpc_read_complete(struct work_struct *work) void objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) { - struct nfs_read_data *rdata = oir->rpcdata; + struct nfs_pgio_data *rdata = oir->rpcdata; oir->status = rdata->task.tk_status = status; if (status >= 0) @@ -266,7 +266,7 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) * Perform sync or async reads. */ enum pnfs_try_status -objlayout_read_pagelist(struct nfs_read_data *rdata) +objlayout_read_pagelist(struct nfs_pgio_data *rdata) { struct nfs_pgio_header *hdr = rdata->header; struct inode *inode = hdr->inode; @@ -312,11 +312,11 @@ objlayout_read_pagelist(struct nfs_read_data *rdata) static void _rpc_write_complete(struct work_struct *work) { struct rpc_task *task; - struct nfs_write_data *wdata; + struct nfs_pgio_data *wdata; dprintk("%s enter\n", __func__); task = container_of(work, struct rpc_task, u.tk_work); - wdata = container_of(task, struct nfs_write_data, task); + wdata = container_of(task, struct nfs_pgio_data, task); pnfs_ld_write_done(wdata); } @@ -324,7 +324,7 @@ static void _rpc_write_complete(struct work_struct *work) void objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) { - struct nfs_write_data *wdata = oir->rpcdata; + struct nfs_pgio_data *wdata = oir->rpcdata; oir->status = wdata->task.tk_status = status; if (status >= 0) { @@ -351,7 +351,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) * Perform sync or async writes. */ enum pnfs_try_status -objlayout_write_pagelist(struct nfs_write_data *wdata, +objlayout_write_pagelist(struct nfs_pgio_data *wdata, int how) { struct nfs_pgio_header *hdr = wdata->header; diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index 87aa1de..01e0410 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h @@ -119,8 +119,8 @@ extern void objio_free_lseg(struct pnfs_layout_segment *lseg); */ extern void objio_free_result(struct objlayout_io_res *oir); -extern int objio_read_pagelist(struct nfs_read_data *rdata); -extern int objio_write_pagelist(struct nfs_write_data *wdata, int how); +extern int objio_read_pagelist(struct nfs_pgio_data *rdata); +extern int objio_write_pagelist(struct nfs_pgio_data *wdata, int how); /* * callback API @@ -168,10 +168,10 @@ extern struct pnfs_layout_segment *objlayout_alloc_lseg( extern void objlayout_free_lseg(struct pnfs_layout_segment *); extern enum pnfs_try_status objlayout_read_pagelist( - struct nfs_read_data *); + struct nfs_pgio_data *); extern enum pnfs_try_status objlayout_write_pagelist( - struct nfs_write_data *, + struct nfs_pgio_data *, int how); extern void objlayout_encode_layoutcommit( diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 3d5bc2b..e9cea3a 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1492,7 +1492,7 @@ int pnfs_write_done_resend_to_mds(struct inode *inode, } EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); -static void pnfs_ld_handle_write_error(struct nfs_write_data *data) +static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; @@ -1511,7 +1511,7 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data) /* * Called by non rpc-based layout drivers */ -void pnfs_ld_write_done(struct nfs_write_data *data) +void pnfs_ld_write_done(struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; @@ -1527,7 +1527,7 @@ EXPORT_SYMBOL_GPL(pnfs_ld_write_done); static void pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, - struct nfs_write_data *data) + struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; @@ -1540,7 +1540,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, } static enum pnfs_try_status -pnfs_try_to_write_data(struct nfs_write_data *wdata, +pnfs_try_to_write_data(struct nfs_pgio_data *wdata, const struct rpc_call_ops *call_ops, struct pnfs_layout_segment *lseg, int how) @@ -1564,7 +1564,7 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata, static void pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how) { - struct nfs_write_data *data; + struct nfs_pgio_data *data; const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; struct pnfs_layout_segment *lseg = desc->pg_lseg; @@ -1572,7 +1572,7 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he while (!list_empty(head)) { enum pnfs_try_status trypnfs; - data = list_first_entry(head, struct nfs_write_data, list); + data = list_first_entry(head, struct nfs_pgio_data, list); list_del_init(&data->list); trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); @@ -1647,7 +1647,7 @@ int pnfs_read_done_resend_to_mds(struct inode *inode, } EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds); -static void pnfs_ld_handle_read_error(struct nfs_read_data *data) +static void pnfs_ld_handle_read_error(struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; @@ -1666,7 +1666,7 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data) /* * Called by non rpc-based layout drivers */ -void pnfs_ld_read_done(struct nfs_read_data *data) +void pnfs_ld_read_done(struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; @@ -1682,7 +1682,7 @@ EXPORT_SYMBOL_GPL(pnfs_ld_read_done); static void pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, - struct nfs_read_data *data) + struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; @@ -1698,7 +1698,7 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, * Call the appropriate parallel I/O subsystem read function. */ static enum pnfs_try_status -pnfs_try_to_read_data(struct nfs_read_data *rdata, +pnfs_try_to_read_data(struct nfs_pgio_data *rdata, const struct rpc_call_ops *call_ops, struct pnfs_layout_segment *lseg) { @@ -1722,7 +1722,7 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata, static void pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head) { - struct nfs_read_data *data; + struct nfs_pgio_data *data; const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; struct pnfs_layout_segment *lseg = desc->pg_lseg; @@ -1730,7 +1730,7 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea while (!list_empty(head)) { enum pnfs_try_status trypnfs; - data = list_first_entry(head, struct nfs_read_data, list); + data = list_first_entry(head, struct nfs_pgio_data, list); list_del_init(&data->list); trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); @@ -1821,7 +1821,7 @@ void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); void -pnfs_set_layoutcommit(struct nfs_write_data *wdata) +pnfs_set_layoutcommit(struct nfs_pgio_data *wdata) { struct nfs_pgio_header *hdr = wdata->header; struct inode *inode = hdr->inode; diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 94a9a18..0031267 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -113,8 +113,8 @@ struct pnfs_layoutdriver_type { * Return PNFS_ATTEMPTED to indicate the layout code has attempted * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS */ - enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data); - enum pnfs_try_status (*write_pagelist) (struct nfs_write_data *nfs_data, int how); + enum pnfs_try_status (*read_pagelist) (struct nfs_pgio_data *nfs_data); + enum pnfs_try_status (*write_pagelist) (struct nfs_pgio_data *nfs_data, int how); void (*free_deviceid_node) (struct nfs4_deviceid_node *); @@ -212,13 +212,13 @@ bool pnfs_roc(struct inode *ino); void pnfs_roc_release(struct inode *ino); void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); -void pnfs_set_layoutcommit(struct nfs_write_data *wdata); +void pnfs_set_layoutcommit(struct nfs_pgio_data *wdata); void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); int pnfs_layoutcommit_inode(struct inode *inode, bool sync); int _pnfs_return_layout(struct inode *); int pnfs_commit_and_return_layout(struct inode *); -void pnfs_ld_write_done(struct nfs_write_data *); -void pnfs_ld_read_done(struct nfs_read_data *); +void pnfs_ld_write_done(struct nfs_pgio_data *); +void pnfs_ld_read_done(struct nfs_pgio_data *); struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, loff_t pos, diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 8cc227f..c54829e 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -578,7 +578,7 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return 0; } -static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) +static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_data *data) { struct inode *inode = data->header->inode; @@ -594,18 +594,18 @@ static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) return 0; } -static void nfs_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) +static void nfs_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) { msg->rpc_proc = &nfs_procedures[NFSPROC_READ]; } -static int nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data) +static int nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) { rpc_call_start(task); return 0; } -static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) +static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_data *data) { struct inode *inode = data->header->inode; @@ -614,14 +614,14 @@ static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) return 0; } -static void nfs_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) +static void nfs_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) { /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */ data->args.stable = NFS_FILE_SYNC; msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE]; } -static int nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data) +static int nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) { rpc_call_start(task); return 0; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 473bba3..9e426cc 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -51,10 +51,10 @@ struct nfs_read_header *nfs_readhdr_alloc(void) } EXPORT_SYMBOL_GPL(nfs_readhdr_alloc); -static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, +static struct nfs_pgio_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, unsigned int pagecount) { - struct nfs_read_data *data, *prealloc; + struct nfs_pgio_data *data, *prealloc; prealloc = &container_of(hdr, struct nfs_read_header, header)->rpc_data; if (prealloc->header == NULL) @@ -84,7 +84,7 @@ void nfs_readhdr_free(struct nfs_pgio_header *hdr) } EXPORT_SYMBOL_GPL(nfs_readhdr_free); -void nfs_readdata_release(struct nfs_read_data *rdata) +void nfs_readdata_release(struct nfs_pgio_data *rdata) { struct nfs_pgio_header *hdr = rdata->header; struct nfs_read_header *read_header = container_of(hdr, struct nfs_read_header, header); @@ -212,7 +212,7 @@ out: } int nfs_initiate_read(struct rpc_clnt *clnt, - struct nfs_read_data *data, + struct nfs_pgio_data *data, const struct rpc_call_ops *call_ops, int flags) { struct inode *inode = data->header->inode; @@ -255,7 +255,7 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read); /* * Set up the NFS read request struct */ -static void nfs_read_rpcsetup(struct nfs_read_data *data, +static void nfs_read_rpcsetup(struct nfs_pgio_data *data, unsigned int count, unsigned int offset) { struct nfs_page *req = data->header->req; @@ -274,7 +274,7 @@ static void nfs_read_rpcsetup(struct nfs_read_data *data, nfs_fattr_init(&data->fattr); } -static int nfs_do_read(struct nfs_read_data *data, +static int nfs_do_read(struct nfs_pgio_data *data, const struct rpc_call_ops *call_ops) { struct inode *inode = data->header->inode; @@ -286,13 +286,13 @@ static int nfs_do_multiple_reads(struct list_head *head, const struct rpc_call_ops *call_ops) { - struct nfs_read_data *data; + struct nfs_pgio_data *data; int ret = 0; while (!list_empty(head)) { int ret2; - data = list_first_entry(head, struct nfs_read_data, list); + data = list_first_entry(head, struct nfs_pgio_data, list); list_del_init(&data->list); ret2 = nfs_do_read(data, call_ops); @@ -324,8 +324,8 @@ static void nfs_pagein_error(struct nfs_pageio_descriptor *desc, { set_bit(NFS_IOHDR_REDO, &hdr->flags); while (!list_empty(&hdr->rpc_list)) { - struct nfs_read_data *data = list_first_entry(&hdr->rpc_list, - struct nfs_read_data, list); + struct nfs_pgio_data *data = list_first_entry(&hdr->rpc_list, + struct nfs_pgio_data, list); list_del(&data->list); nfs_readdata_release(data); } @@ -350,7 +350,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, { struct nfs_page *req = hdr->req; struct page *page = req->wb_page; - struct nfs_read_data *data; + struct nfs_pgio_data *data; size_t rsize = desc->pg_bsize, nbytes; unsigned int offset; @@ -382,7 +382,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, { struct nfs_page *req; struct page **pages; - struct nfs_read_data *data; + struct nfs_pgio_data *data; struct list_head *head = &desc->pg_list; data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base, @@ -447,7 +447,7 @@ static const struct nfs_pageio_ops nfs_pageio_read_ops = { * This is the callback from RPC telling us whether a reply was * received or some error occurred (timeout or socket shutdown). */ -int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) +int nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data) { struct inode *inode = data->header->inode; int status; @@ -468,7 +468,7 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) return 0; } -static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data) +static void nfs_readpage_retry(struct rpc_task *task, struct nfs_pgio_data *data) { struct nfs_pgio_args *argp = &data->args; struct nfs_pgio_res *resp = &data->res; @@ -490,7 +490,7 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data static void nfs_readpage_result_common(struct rpc_task *task, void *calldata) { - struct nfs_read_data *data = calldata; + struct nfs_pgio_data *data = calldata; struct nfs_pgio_header *hdr = data->header; /* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */ @@ -520,7 +520,7 @@ static void nfs_readpage_release_common(void *calldata) void nfs_read_prepare(struct rpc_task *task, void *calldata) { - struct nfs_read_data *data = calldata; + struct nfs_pgio_data *data = calldata; int err; err = NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data); if (err) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d392a70..3a2fc5c 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -88,10 +88,10 @@ struct nfs_write_header *nfs_writehdr_alloc(void) } EXPORT_SYMBOL_GPL(nfs_writehdr_alloc); -static struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr, +static struct nfs_pgio_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr, unsigned int pagecount) { - struct nfs_write_data *data, *prealloc; + struct nfs_pgio_data *data, *prealloc; prealloc = &container_of(hdr, struct nfs_write_header, header)->rpc_data; if (prealloc->header == NULL) @@ -120,7 +120,7 @@ void nfs_writehdr_free(struct nfs_pgio_header *hdr) } EXPORT_SYMBOL_GPL(nfs_writehdr_free); -void nfs_writedata_release(struct nfs_write_data *wdata) +void nfs_writedata_release(struct nfs_pgio_data *wdata) { struct nfs_pgio_header *hdr = wdata->header; struct nfs_write_header *write_header = container_of(hdr, struct nfs_write_header, header); @@ -582,7 +582,7 @@ nfs_clear_request_commit(struct nfs_page *req) } static inline -int nfs_write_need_commit(struct nfs_write_data *data) +int nfs_write_need_commit(struct nfs_pgio_data *data) { if (data->verf.committed == NFS_DATA_SYNC) return data->header->lseg == NULL; @@ -613,7 +613,7 @@ nfs_clear_request_commit(struct nfs_page *req) } static inline -int nfs_write_need_commit(struct nfs_write_data *data) +int nfs_write_need_commit(struct nfs_pgio_data *data) { return 0; } @@ -990,7 +990,7 @@ static int flush_task_priority(int how) } int nfs_initiate_write(struct rpc_clnt *clnt, - struct nfs_write_data *data, + struct nfs_pgio_data *data, const struct rpc_call_ops *call_ops, int how, int flags) { @@ -1047,7 +1047,7 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write); /* * Set up the argument/result storage required for the RPC call. */ -static void nfs_write_rpcsetup(struct nfs_write_data *data, +static void nfs_write_rpcsetup(struct nfs_pgio_data *data, unsigned int count, unsigned int offset, int how, struct nfs_commit_info *cinfo) { @@ -1082,7 +1082,7 @@ static void nfs_write_rpcsetup(struct nfs_write_data *data, nfs_fattr_init(&data->fattr); } -static int nfs_do_write(struct nfs_write_data *data, +static int nfs_do_write(struct nfs_pgio_data *data, const struct rpc_call_ops *call_ops, int how) { @@ -1095,13 +1095,13 @@ static int nfs_do_multiple_writes(struct list_head *head, const struct rpc_call_ops *call_ops, int how) { - struct nfs_write_data *data; + struct nfs_pgio_data *data; int ret = 0; while (!list_empty(head)) { int ret2; - data = list_first_entry(head, struct nfs_write_data, list); + data = list_first_entry(head, struct nfs_pgio_data, list); list_del_init(&data->list); ret2 = nfs_do_write(data, call_ops, how); @@ -1144,8 +1144,8 @@ static void nfs_flush_error(struct nfs_pageio_descriptor *desc, { set_bit(NFS_IOHDR_REDO, &hdr->flags); while (!list_empty(&hdr->rpc_list)) { - struct nfs_write_data *data = list_first_entry(&hdr->rpc_list, - struct nfs_write_data, list); + struct nfs_pgio_data *data = list_first_entry(&hdr->rpc_list, + struct nfs_pgio_data, list); list_del(&data->list); nfs_writedata_release(data); } @@ -1161,7 +1161,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, { struct nfs_page *req = hdr->req; struct page *page = req->wb_page; - struct nfs_write_data *data; + struct nfs_pgio_data *data; size_t wsize = desc->pg_bsize, nbytes; unsigned int offset; int requests = 0; @@ -1211,7 +1211,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, { struct nfs_page *req; struct page **pages; - struct nfs_write_data *data; + struct nfs_pgio_data *data; struct list_head *head = &desc->pg_list; struct nfs_commit_info cinfo; @@ -1305,7 +1305,7 @@ EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); void nfs_write_prepare(struct rpc_task *task, void *calldata) { - struct nfs_write_data *data = calldata; + struct nfs_pgio_data *data = calldata; int err; err = NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data); if (err) @@ -1328,14 +1328,14 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata) */ static void nfs_writeback_done_common(struct rpc_task *task, void *calldata) { - struct nfs_write_data *data = calldata; + struct nfs_pgio_data *data = calldata; nfs_writeback_done(task, data); } static void nfs_writeback_release_common(void *calldata) { - struct nfs_write_data *data = calldata; + struct nfs_pgio_data *data = calldata; struct nfs_pgio_header *hdr = data->header; int status = data->task.tk_status; @@ -1386,7 +1386,7 @@ static int nfs_should_remove_suid(const struct inode *inode) /* * This function is called when the WRITE call is complete. */ -void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) +void nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data) { struct nfs_pgio_args *argp = &data->args; struct nfs_pgio_res *resp = &data->res; -- cgit v1.1 From c0752cdfbbb691cfe98812f7aed8ce1e766823c4 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:27 -0400 Subject: NFS: Create a common read and write header struct The only difference is the write verifier field, but we can keep that for a little bit longer. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 4 ++-- fs/nfs/pnfs.c | 4 ++-- fs/nfs/read.c | 14 +++++++------- fs/nfs/write.c | 14 +++++++------- 4 files changed, 18 insertions(+), 18 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index af01b80..b0e7a41 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -395,7 +395,7 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool struct nfs_pgio_completion_ops; /* read.c */ -extern struct nfs_read_header *nfs_readhdr_alloc(void); +extern struct nfs_rw_header *nfs_readhdr_alloc(void); extern void nfs_readhdr_free(struct nfs_pgio_header *hdr); extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, bool force_mds, @@ -424,7 +424,7 @@ int nfs_remount(struct super_block *sb, int *flags, char *raw_data); extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, bool force_mds, const struct nfs_pgio_completion_ops *compl_ops); -extern struct nfs_write_header *nfs_writehdr_alloc(void); +extern struct nfs_rw_header *nfs_writehdr_alloc(void); extern void nfs_writehdr_free(struct nfs_pgio_header *hdr); extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index e9cea3a..43cfe11 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1592,7 +1592,7 @@ EXPORT_SYMBOL_GPL(pnfs_writehdr_free); int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) { - struct nfs_write_header *whdr; + struct nfs_rw_header *whdr; struct nfs_pgio_header *hdr; int ret; @@ -1750,7 +1750,7 @@ EXPORT_SYMBOL_GPL(pnfs_readhdr_free); int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) { - struct nfs_read_header *rhdr; + struct nfs_rw_header *rhdr; struct nfs_pgio_header *hdr; int ret; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 9e426cc..d29ca36 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -34,9 +34,9 @@ static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops; static struct kmem_cache *nfs_rdata_cachep; -struct nfs_read_header *nfs_readhdr_alloc(void) +struct nfs_rw_header *nfs_readhdr_alloc(void) { - struct nfs_read_header *rhdr; + struct nfs_rw_header *rhdr; rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); if (rhdr) { @@ -56,7 +56,7 @@ static struct nfs_pgio_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, { struct nfs_pgio_data *data, *prealloc; - prealloc = &container_of(hdr, struct nfs_read_header, header)->rpc_data; + prealloc = &container_of(hdr, struct nfs_rw_header, header)->rpc_data; if (prealloc->header == NULL) data = prealloc; else @@ -78,7 +78,7 @@ out: void nfs_readhdr_free(struct nfs_pgio_header *hdr) { - struct nfs_read_header *rhdr = container_of(hdr, struct nfs_read_header, header); + struct nfs_rw_header *rhdr = container_of(hdr, struct nfs_rw_header, header); kmem_cache_free(nfs_rdata_cachep, rhdr); } @@ -87,7 +87,7 @@ EXPORT_SYMBOL_GPL(nfs_readhdr_free); void nfs_readdata_release(struct nfs_pgio_data *rdata) { struct nfs_pgio_header *hdr = rdata->header; - struct nfs_read_header *read_header = container_of(hdr, struct nfs_read_header, header); + struct nfs_rw_header *read_header = container_of(hdr, struct nfs_rw_header, header); put_nfs_open_context(rdata->args.context); if (rdata->pages.pagevec != rdata->pages.page_array) @@ -417,7 +417,7 @@ EXPORT_SYMBOL_GPL(nfs_generic_pagein); static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) { - struct nfs_read_header *rhdr; + struct nfs_rw_header *rhdr; struct nfs_pgio_header *hdr; int ret; @@ -680,7 +680,7 @@ out: int __init nfs_init_readpagecache(void) { nfs_rdata_cachep = kmem_cache_create("nfs_read_data", - sizeof(struct nfs_read_header), + sizeof(struct nfs_rw_header), 0, SLAB_HWCACHE_ALIGN, NULL); if (nfs_rdata_cachep == NULL) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 3a2fc5c..37c4c98 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -70,9 +70,9 @@ void nfs_commit_free(struct nfs_commit_data *p) } EXPORT_SYMBOL_GPL(nfs_commit_free); -struct nfs_write_header *nfs_writehdr_alloc(void) +struct nfs_rw_header *nfs_writehdr_alloc(void) { - struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); + struct nfs_rw_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); if (p) { struct nfs_pgio_header *hdr = &p->header; @@ -93,7 +93,7 @@ static struct nfs_pgio_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr, { struct nfs_pgio_data *data, *prealloc; - prealloc = &container_of(hdr, struct nfs_write_header, header)->rpc_data; + prealloc = &container_of(hdr, struct nfs_rw_header, header)->rpc_data; if (prealloc->header == NULL) data = prealloc; else @@ -115,7 +115,7 @@ out: void nfs_writehdr_free(struct nfs_pgio_header *hdr) { - struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header); + struct nfs_rw_header *whdr = container_of(hdr, struct nfs_rw_header, header); mempool_free(whdr, nfs_wdata_mempool); } EXPORT_SYMBOL_GPL(nfs_writehdr_free); @@ -123,7 +123,7 @@ EXPORT_SYMBOL_GPL(nfs_writehdr_free); void nfs_writedata_release(struct nfs_pgio_data *wdata) { struct nfs_pgio_header *hdr = wdata->header; - struct nfs_write_header *write_header = container_of(hdr, struct nfs_write_header, header); + struct nfs_rw_header *write_header = container_of(hdr, struct nfs_rw_header, header); put_nfs_open_context(wdata->args.context); if (wdata->pages.pagevec != wdata->pages.page_array) @@ -1253,7 +1253,7 @@ EXPORT_SYMBOL_GPL(nfs_generic_flush); static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) { - struct nfs_write_header *whdr; + struct nfs_rw_header *whdr; struct nfs_pgio_header *hdr; int ret; @@ -1910,7 +1910,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage, int __init nfs_init_writepagecache(void) { nfs_wdata_cachep = kmem_cache_create("nfs_write_data", - sizeof(struct nfs_write_header), + sizeof(struct nfs_rw_header), 0, SLAB_HWCACHE_ALIGN, NULL); if (nfs_wdata_cachep == NULL) -- cgit v1.1 From f79d06f544a797d75cbf5256a5d06c4b3d2759cc Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:28 -0400 Subject: NFS: Move the write verifier into the nfs_pgio_header The header had a pointer to the verifier that was set from the old write data struct. We don't need to keep the pointer around now that we have shared structures. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 4 ++-- fs/nfs/write.c | 7 +++---- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index bbe688e..164b016 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -813,12 +813,12 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) bit = NFS_IOHDR_NEED_RESCHED; else if (dreq->flags == 0) { - memcpy(&dreq->verf, hdr->verf, + memcpy(&dreq->verf, &hdr->verf, sizeof(dreq->verf)); bit = NFS_IOHDR_NEED_COMMIT; dreq->flags = NFS_ODIRECT_DO_COMMIT; } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) { - if (memcmp(&dreq->verf, hdr->verf, sizeof(dreq->verf))) { + if (memcmp(&dreq->verf, &hdr->verf, sizeof(dreq->verf))) { dreq->flags = NFS_ODIRECT_RESCHED_WRITES; bit = NFS_IOHDR_NEED_RESCHED; } else diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 37c4c98..321a791 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -82,7 +82,6 @@ struct nfs_rw_header *nfs_writehdr_alloc(void) INIT_LIST_HEAD(&hdr->rpc_list); spin_lock_init(&hdr->lock); atomic_set(&hdr->refcnt, 0); - hdr->verf = &p->verf; } return p; } @@ -644,7 +643,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) goto next; } if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { - memcpy(&req->wb_verf, &hdr->verf->verifier, sizeof(req->wb_verf)); + memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); nfs_mark_request_commit(req, hdr->lseg, &cinfo); goto next; } @@ -1344,8 +1343,8 @@ static void nfs_writeback_release_common(void *calldata) if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) ; /* Do nothing */ else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) - memcpy(hdr->verf, &data->verf, sizeof(*hdr->verf)); - else if (memcmp(hdr->verf, &data->verf, sizeof(*hdr->verf))) + memcpy(&hdr->verf, &data->verf, sizeof(hdr->verf)); + else if (memcmp(&hdr->verf, &data->verf, sizeof(hdr->verf))) set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags); spin_unlock(&hdr->lock); } -- cgit v1.1 From 00bfa30abe86982ce1929e9cabd703e5546106bd Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:29 -0400 Subject: NFS: Create a common pgio_alloc and pgio_release function These functions are identical for the read and write paths so they can be combined. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 6 +++--- fs/nfs/pagelist.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- fs/nfs/pnfs.c | 4 ++-- fs/nfs/read.c | 54 ++++-------------------------------------------- fs/nfs/write.c | 54 ++++-------------------------------------------- 5 files changed, 74 insertions(+), 106 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index b0e7a41..5ddc142 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -231,13 +231,15 @@ extern void nfs_destroy_writepagecache(void); extern int __init nfs_init_directcache(void); extern void nfs_destroy_directcache(void); -extern bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount); extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr, void (*release)(struct nfs_pgio_header *hdr)); void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos); int nfs_iocounter_wait(struct nfs_io_counter *c); +struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *, unsigned int); +void nfs_pgio_data_release(struct nfs_pgio_data *); + static inline void nfs_iocounter_init(struct nfs_io_counter *c) { c->flags = 0; @@ -407,7 +409,6 @@ extern void nfs_read_prepare(struct rpc_task *task, void *calldata); extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr); extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); -extern void nfs_readdata_release(struct nfs_pgio_data *rdata); /* super.c */ void nfs_clone_super(struct super_block *, struct nfs_mount_info *); @@ -429,7 +430,6 @@ extern void nfs_writehdr_free(struct nfs_pgio_header *hdr); extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr); extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); -extern void nfs_writedata_release(struct nfs_pgio_data *wdata); extern void nfs_commit_free(struct nfs_commit_data *p); extern int nfs_initiate_write(struct rpc_clnt *clnt, struct nfs_pgio_data *data, diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 2ffebf2..a98ccf7 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -26,7 +26,7 @@ static struct kmem_cache *nfs_page_cachep; -bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) +static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) { p->npages = pagecount; if (pagecount <= ARRAY_SIZE(p->page_array)) @@ -295,6 +295,66 @@ bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *pr } EXPORT_SYMBOL_GPL(nfs_generic_pg_test); +static inline struct nfs_rw_header *NFS_RW_HEADER(struct nfs_pgio_header *hdr) +{ + return container_of(hdr, struct nfs_rw_header, header); +} + +/** + * nfs_pgio_data_alloc - Allocate pageio data + * @hdr: The header making a request + * @pagecount: Number of pages to create + */ +struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *hdr, + unsigned int pagecount) +{ + struct nfs_pgio_data *data, *prealloc; + + prealloc = &NFS_RW_HEADER(hdr)->rpc_data; + if (prealloc->header == NULL) + data = prealloc; + else + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + goto out; + + if (nfs_pgarray_set(&data->pages, pagecount)) { + data->header = hdr; + atomic_inc(&hdr->refcnt); + } else { + if (data != prealloc) + kfree(data); + data = NULL; + } +out: + return data; +} + +/** + * nfs_pgio_data_release - Properly free pageio data + * @data: The data to release + */ +void nfs_pgio_data_release(struct nfs_pgio_data *data) +{ + struct nfs_pgio_header *hdr = data->header; + struct nfs_rw_header *pageio_header = NFS_RW_HEADER(hdr); + + put_nfs_open_context(data->args.context); + if (data->pages.pagevec != data->pages.page_array) + kfree(data->pages.pagevec); + if (data == &pageio_header->rpc_data) { + data->header = NULL; + data = NULL; + } + if (atomic_dec_and_test(&hdr->refcnt)) + hdr->completion_ops->completion(hdr); + /* Note: we only free the rpc_task after callbacks are done. + * See the comment in rpc_free_task() for why + */ + kfree(data); +} +EXPORT_SYMBOL_GPL(nfs_pgio_data_release); + /** * nfs_pageio_init - initialise a page io descriptor * @desc: pointer to descriptor diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 43cfe11..e192ba6 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1536,7 +1536,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, nfs_pageio_reset_write_mds(desc); desc->pg_recoalesce = 1; } - nfs_writedata_release(data); + nfs_pgio_data_release(data); } static enum pnfs_try_status @@ -1691,7 +1691,7 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, nfs_pageio_reset_read_mds(desc); desc->pg_recoalesce = 1; } - nfs_readdata_release(data); + nfs_pgio_data_release(data); } /* diff --git a/fs/nfs/read.c b/fs/nfs/read.c index d29ca36..ab4c1a5 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -51,31 +51,6 @@ struct nfs_rw_header *nfs_readhdr_alloc(void) } EXPORT_SYMBOL_GPL(nfs_readhdr_alloc); -static struct nfs_pgio_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, - unsigned int pagecount) -{ - struct nfs_pgio_data *data, *prealloc; - - prealloc = &container_of(hdr, struct nfs_rw_header, header)->rpc_data; - if (prealloc->header == NULL) - data = prealloc; - else - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (!data) - goto out; - - if (nfs_pgarray_set(&data->pages, pagecount)) { - data->header = hdr; - atomic_inc(&hdr->refcnt); - } else { - if (data != prealloc) - kfree(data); - data = NULL; - } -out: - return data; -} - void nfs_readhdr_free(struct nfs_pgio_header *hdr) { struct nfs_rw_header *rhdr = container_of(hdr, struct nfs_rw_header, header); @@ -84,27 +59,6 @@ void nfs_readhdr_free(struct nfs_pgio_header *hdr) } EXPORT_SYMBOL_GPL(nfs_readhdr_free); -void nfs_readdata_release(struct nfs_pgio_data *rdata) -{ - struct nfs_pgio_header *hdr = rdata->header; - struct nfs_rw_header *read_header = container_of(hdr, struct nfs_rw_header, header); - - put_nfs_open_context(rdata->args.context); - if (rdata->pages.pagevec != rdata->pages.page_array) - kfree(rdata->pages.pagevec); - if (rdata == &read_header->rpc_data) { - rdata->header = NULL; - rdata = NULL; - } - if (atomic_dec_and_test(&hdr->refcnt)) - hdr->completion_ops->completion(hdr); - /* Note: we only free the rpc_task after callbacks are done. - * See the comment in rpc_free_task() for why - */ - kfree(rdata); -} -EXPORT_SYMBOL_GPL(nfs_readdata_release); - static int nfs_return_empty_page(struct page *page) { @@ -327,7 +281,7 @@ static void nfs_pagein_error(struct nfs_pageio_descriptor *desc, struct nfs_pgio_data *data = list_first_entry(&hdr->rpc_list, struct nfs_pgio_data, list); list_del(&data->list); - nfs_readdata_release(data); + nfs_pgio_data_release(data); } desc->pg_completion_ops->error_cleanup(&desc->pg_list); } @@ -359,7 +313,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, do { size_t len = min(nbytes,rsize); - data = nfs_readdata_alloc(hdr, 1); + data = nfs_pgio_data_alloc(hdr, 1); if (!data) { nfs_pagein_error(desc, hdr); return -ENOMEM; @@ -385,7 +339,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct nfs_pgio_data *data; struct list_head *head = &desc->pg_list; - data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base, + data = nfs_pgio_data_alloc(hdr, nfs_page_array_len(desc->pg_base, desc->pg_count)); if (!data) { nfs_pagein_error(desc, hdr); @@ -515,7 +469,7 @@ static void nfs_readpage_result_common(struct rpc_task *task, void *calldata) static void nfs_readpage_release_common(void *calldata) { - nfs_readdata_release(calldata); + nfs_pgio_data_release(calldata); } void nfs_read_prepare(struct rpc_task *task, void *calldata) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 321a791..0dc4d6a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -87,31 +87,6 @@ struct nfs_rw_header *nfs_writehdr_alloc(void) } EXPORT_SYMBOL_GPL(nfs_writehdr_alloc); -static struct nfs_pgio_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr, - unsigned int pagecount) -{ - struct nfs_pgio_data *data, *prealloc; - - prealloc = &container_of(hdr, struct nfs_rw_header, header)->rpc_data; - if (prealloc->header == NULL) - data = prealloc; - else - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (!data) - goto out; - - if (nfs_pgarray_set(&data->pages, pagecount)) { - data->header = hdr; - atomic_inc(&hdr->refcnt); - } else { - if (data != prealloc) - kfree(data); - data = NULL; - } -out: - return data; -} - void nfs_writehdr_free(struct nfs_pgio_header *hdr) { struct nfs_rw_header *whdr = container_of(hdr, struct nfs_rw_header, header); @@ -119,27 +94,6 @@ void nfs_writehdr_free(struct nfs_pgio_header *hdr) } EXPORT_SYMBOL_GPL(nfs_writehdr_free); -void nfs_writedata_release(struct nfs_pgio_data *wdata) -{ - struct nfs_pgio_header *hdr = wdata->header; - struct nfs_rw_header *write_header = container_of(hdr, struct nfs_rw_header, header); - - put_nfs_open_context(wdata->args.context); - if (wdata->pages.pagevec != wdata->pages.page_array) - kfree(wdata->pages.pagevec); - if (wdata == &write_header->rpc_data) { - wdata->header = NULL; - wdata = NULL; - } - if (atomic_dec_and_test(&hdr->refcnt)) - hdr->completion_ops->completion(hdr); - /* Note: we only free the rpc_task after callbacks are done. - * See the comment in rpc_free_task() for why - */ - kfree(wdata); -} -EXPORT_SYMBOL_GPL(nfs_writedata_release); - static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) { ctx->error = error; @@ -1146,7 +1100,7 @@ static void nfs_flush_error(struct nfs_pageio_descriptor *desc, struct nfs_pgio_data *data = list_first_entry(&hdr->rpc_list, struct nfs_pgio_data, list); list_del(&data->list); - nfs_writedata_release(data); + nfs_pgio_data_release(data); } desc->pg_completion_ops->error_cleanup(&desc->pg_list); } @@ -1179,7 +1133,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, do { size_t len = min(nbytes, wsize); - data = nfs_writedata_alloc(hdr, 1); + data = nfs_pgio_data_alloc(hdr, 1); if (!data) { nfs_flush_error(desc, hdr); return -ENOMEM; @@ -1214,7 +1168,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *head = &desc->pg_list; struct nfs_commit_info cinfo; - data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base, + data = nfs_pgio_data_alloc(hdr, nfs_page_array_len(desc->pg_base, desc->pg_count)); if (!data) { nfs_flush_error(desc, hdr); @@ -1348,7 +1302,7 @@ static void nfs_writeback_release_common(void *calldata) set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags); spin_unlock(&hdr->lock); } - nfs_writedata_release(data); + nfs_pgio_data_release(data); } static const struct rpc_call_ops nfs_write_common_ops = { -- cgit v1.1 From 4a0de55c565a36cac8422b76a948c4634a90781e Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:30 -0400 Subject: NFS: Create a common rw_header_alloc and rw_header_free function I create a new struct nfs_rw_ops to decide the differences between reads and writes. This struct will be set when initializing a new nfs_pgio_descriptor, and then passed on to the nfs_rw_header when a new header is allocated. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 6 ++---- fs/nfs/pagelist.c | 33 +++++++++++++++++++++++++++++++++ fs/nfs/pnfs.c | 8 ++++---- fs/nfs/read.c | 34 +++++++++++++--------------------- fs/nfs/write.c | 28 ++++++++++++---------------- 5 files changed, 64 insertions(+), 45 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 5ddc142..9d6a40e 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -237,6 +237,8 @@ extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos); int nfs_iocounter_wait(struct nfs_io_counter *c); +struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *); +void nfs_rw_header_free(struct nfs_pgio_header *); struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *, unsigned int); void nfs_pgio_data_release(struct nfs_pgio_data *); @@ -397,8 +399,6 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool struct nfs_pgio_completion_ops; /* read.c */ -extern struct nfs_rw_header *nfs_readhdr_alloc(void); -extern void nfs_readhdr_free(struct nfs_pgio_header *hdr); extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, bool force_mds, const struct nfs_pgio_completion_ops *compl_ops); @@ -425,8 +425,6 @@ int nfs_remount(struct super_block *sb, int *flags, char *raw_data); extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, bool force_mds, const struct nfs_pgio_completion_ops *compl_ops); -extern struct nfs_rw_header *nfs_writehdr_alloc(void); -extern void nfs_writehdr_free(struct nfs_pgio_header *hdr); extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr); extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index a98ccf7..ca356fe 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -301,6 +301,37 @@ static inline struct nfs_rw_header *NFS_RW_HEADER(struct nfs_pgio_header *hdr) } /** + * nfs_rw_header_alloc - Allocate a header for a read or write + * @ops: Read or write function vector + */ +struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *ops) +{ + struct nfs_rw_header *header = ops->rw_alloc_header(); + + if (header) { + struct nfs_pgio_header *hdr = &header->header; + + INIT_LIST_HEAD(&hdr->pages); + INIT_LIST_HEAD(&hdr->rpc_list); + spin_lock_init(&hdr->lock); + atomic_set(&hdr->refcnt, 0); + hdr->rw_ops = ops; + } + return header; +} +EXPORT_SYMBOL_GPL(nfs_rw_header_alloc); + +/* + * nfs_rw_header_free - Free a read or write header + * @hdr: The header to free + */ +void nfs_rw_header_free(struct nfs_pgio_header *hdr) +{ + hdr->rw_ops->rw_free_header(NFS_RW_HEADER(hdr)); +} +EXPORT_SYMBOL_GPL(nfs_rw_header_free); + +/** * nfs_pgio_data_alloc - Allocate pageio data * @hdr: The header making a request * @pagecount: Number of pages to create @@ -367,6 +398,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, struct inode *inode, const struct nfs_pageio_ops *pg_ops, const struct nfs_pgio_completion_ops *compl_ops, + const struct nfs_rw_ops *rw_ops, size_t bsize, int io_flags) { @@ -380,6 +412,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_inode = inode; desc->pg_ops = pg_ops; desc->pg_completion_ops = compl_ops; + desc->pg_rw_ops = rw_ops; desc->pg_ioflags = io_flags; desc->pg_error = 0; desc->pg_lseg = NULL; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index e192ba6..54c84c1 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1585,7 +1585,7 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) { pnfs_put_lseg(hdr->lseg); - nfs_writehdr_free(hdr); + nfs_rw_header_free(hdr); } EXPORT_SYMBOL_GPL(pnfs_writehdr_free); @@ -1596,7 +1596,7 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) struct nfs_pgio_header *hdr; int ret; - whdr = nfs_writehdr_alloc(); + whdr = nfs_rw_header_alloc(desc->pg_rw_ops); if (!whdr) { desc->pg_completion_ops->error_cleanup(&desc->pg_list); pnfs_put_lseg(desc->pg_lseg); @@ -1743,7 +1743,7 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) { pnfs_put_lseg(hdr->lseg); - nfs_readhdr_free(hdr); + nfs_rw_header_free(hdr); } EXPORT_SYMBOL_GPL(pnfs_readhdr_free); @@ -1754,7 +1754,7 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) struct nfs_pgio_header *hdr; int ret; - rhdr = nfs_readhdr_alloc(); + rhdr = nfs_rw_header_alloc(desc->pg_rw_ops); if (!rhdr) { desc->pg_completion_ops->error_cleanup(&desc->pg_list); ret = -ENOMEM; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index ab4c1a5..4cf3577 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -31,33 +31,19 @@ static const struct nfs_pageio_ops nfs_pageio_read_ops; static const struct rpc_call_ops nfs_read_common_ops; static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops; +static const struct nfs_rw_ops nfs_rw_read_ops; static struct kmem_cache *nfs_rdata_cachep; -struct nfs_rw_header *nfs_readhdr_alloc(void) +static struct nfs_rw_header *nfs_readhdr_alloc(void) { - struct nfs_rw_header *rhdr; - - rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); - if (rhdr) { - struct nfs_pgio_header *hdr = &rhdr->header; - - INIT_LIST_HEAD(&hdr->pages); - INIT_LIST_HEAD(&hdr->rpc_list); - spin_lock_init(&hdr->lock); - atomic_set(&hdr->refcnt, 0); - } - return rhdr; + return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); } -EXPORT_SYMBOL_GPL(nfs_readhdr_alloc); -void nfs_readhdr_free(struct nfs_pgio_header *hdr) +static void nfs_readhdr_free(struct nfs_rw_header *rhdr) { - struct nfs_rw_header *rhdr = container_of(hdr, struct nfs_rw_header, header); - kmem_cache_free(nfs_rdata_cachep, rhdr); } -EXPORT_SYMBOL_GPL(nfs_readhdr_free); static int nfs_return_empty_page(struct page *page) @@ -79,7 +65,8 @@ void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, if (server->pnfs_curr_ld && !force_mds) pg_ops = server->pnfs_curr_ld->pg_read_ops; #endif - nfs_pageio_init(pgio, inode, pg_ops, compl_ops, server->rsize, 0); + nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_read_ops, + server->rsize, 0); } EXPORT_SYMBOL_GPL(nfs_pageio_init_read); @@ -375,13 +362,13 @@ static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) struct nfs_pgio_header *hdr; int ret; - rhdr = nfs_readhdr_alloc(); + rhdr = nfs_rw_header_alloc(desc->pg_rw_ops); if (!rhdr) { desc->pg_completion_ops->error_cleanup(&desc->pg_list); return -ENOMEM; } hdr = &rhdr->header; - nfs_pgheader_init(desc, hdr, nfs_readhdr_free); + nfs_pgheader_init(desc, hdr, nfs_rw_header_free); atomic_inc(&hdr->refcnt); ret = nfs_generic_pagein(desc, hdr); if (ret == 0) @@ -647,3 +634,8 @@ void nfs_destroy_readpagecache(void) { kmem_cache_destroy(nfs_rdata_cachep); } + +static const struct nfs_rw_ops nfs_rw_read_ops = { + .rw_alloc_header = nfs_readhdr_alloc, + .rw_free_header = nfs_readhdr_free, +}; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0dc4d6a..9c5cde3 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -46,6 +46,7 @@ static const struct rpc_call_ops nfs_write_common_ops; static const struct rpc_call_ops nfs_commit_ops; static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops; static const struct nfs_commit_completion_ops nfs_commit_completion_ops; +static const struct nfs_rw_ops nfs_rw_write_ops; static struct kmem_cache *nfs_wdata_cachep; static mempool_t *nfs_wdata_mempool; @@ -70,29 +71,19 @@ void nfs_commit_free(struct nfs_commit_data *p) } EXPORT_SYMBOL_GPL(nfs_commit_free); -struct nfs_rw_header *nfs_writehdr_alloc(void) +static struct nfs_rw_header *nfs_writehdr_alloc(void) { struct nfs_rw_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); - if (p) { - struct nfs_pgio_header *hdr = &p->header; - + if (p) memset(p, 0, sizeof(*p)); - INIT_LIST_HEAD(&hdr->pages); - INIT_LIST_HEAD(&hdr->rpc_list); - spin_lock_init(&hdr->lock); - atomic_set(&hdr->refcnt, 0); - } return p; } -EXPORT_SYMBOL_GPL(nfs_writehdr_alloc); -void nfs_writehdr_free(struct nfs_pgio_header *hdr) +static void nfs_writehdr_free(struct nfs_rw_header *whdr) { - struct nfs_rw_header *whdr = container_of(hdr, struct nfs_rw_header, header); mempool_free(whdr, nfs_wdata_mempool); } -EXPORT_SYMBOL_GPL(nfs_writehdr_free); static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) { @@ -1210,13 +1201,13 @@ static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) struct nfs_pgio_header *hdr; int ret; - whdr = nfs_writehdr_alloc(); + whdr = nfs_rw_header_alloc(desc->pg_rw_ops); if (!whdr) { desc->pg_completion_ops->error_cleanup(&desc->pg_list); return -ENOMEM; } hdr = &whdr->header; - nfs_pgheader_init(desc, hdr, nfs_writehdr_free); + nfs_pgheader_init(desc, hdr, nfs_rw_header_free); atomic_inc(&hdr->refcnt); ret = nfs_generic_flush(desc, hdr); if (ret == 0) @@ -1244,7 +1235,8 @@ void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, if (server->pnfs_curr_ld && !force_mds) pg_ops = server->pnfs_curr_ld->pg_write_ops; #endif - nfs_pageio_init(pgio, inode, pg_ops, compl_ops, server->wsize, ioflags); + nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_write_ops, + server->wsize, ioflags); } EXPORT_SYMBOL_GPL(nfs_pageio_init_write); @@ -1925,3 +1917,7 @@ void nfs_destroy_writepagecache(void) kmem_cache_destroy(nfs_wdata_cachep); } +static const struct nfs_rw_ops nfs_rw_write_ops = { + .rw_alloc_header = nfs_writehdr_alloc, + .rw_free_header = nfs_writehdr_free, +}; -- cgit v1.1 From a4cdda59111f92000297e0d3edb1e0e08ba3549b Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:31 -0400 Subject: NFS: Create a common pgio_rpc_prepare function The read and write paths do exactly the same thing for the rpc_prepare rpc_op. This patch combines them together into a single function. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 2 ++ fs/nfs/nfs3proc.c | 11 ++--------- fs/nfs/nfs4proc.c | 22 +++------------------- fs/nfs/pagelist.c | 26 ++++++++++++++++++++++++++ fs/nfs/proc.c | 11 ++--------- fs/nfs/read.c | 19 +++---------------- fs/nfs/write.c | 19 +++++-------------- 7 files changed, 43 insertions(+), 67 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 9d6a40e..1959260 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -241,6 +241,8 @@ struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *); void nfs_rw_header_free(struct nfs_pgio_header *); struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *, unsigned int); void nfs_pgio_data_release(struct nfs_pgio_data *); +void nfs_pgio_prepare(struct rpc_task *, void *); +void nfs_pgio_release(void *); static inline void nfs_iocounter_init(struct nfs_io_counter *c) { diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index d235369..e7daa42 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -812,7 +812,7 @@ static void nfs3_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ]; } -static int nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) { rpc_call_start(task); return 0; @@ -834,12 +834,6 @@ static void nfs3_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; } -static int nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) -{ - rpc_call_start(task); - return 0; -} - static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) { rpc_call_start(task); @@ -946,11 +940,10 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .fsinfo = nfs3_proc_fsinfo, .pathconf = nfs3_proc_pathconf, .decode_dirent = nfs3_decode_dirent, + .pgio_rpc_prepare = nfs3_proc_pgio_rpc_prepare, .read_setup = nfs3_proc_read_setup, - .read_rpc_prepare = nfs3_proc_read_rpc_prepare, .read_done = nfs3_read_done, .write_setup = nfs3_proc_write_setup, - .write_rpc_prepare = nfs3_proc_write_rpc_prepare, .write_done = nfs3_write_done, .commit_setup = nfs3_proc_commit_setup, .commit_rpc_prepare = nfs3_proc_commit_rpc_prepare, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e793aa9..44fb93a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4089,7 +4089,7 @@ static void nfs4_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); } -static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) { if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), &data->args.seq_args, @@ -4097,7 +4097,7 @@ static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_pgio_dat task)) return 0; if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context, - data->args.lock_context, FMODE_READ) == -EIO) + data->args.lock_context, data->header->rw_ops->rw_mode) == -EIO) return -EIO; if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) return -EIO; @@ -4177,21 +4177,6 @@ static void nfs4_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); } -static int nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) -{ - if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), - &data->args.seq_args, - &data->res.seq_res, - task)) - return 0; - if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context, - data->args.lock_context, FMODE_WRITE) == -EIO) - return -EIO; - if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) - return -EIO; - return 0; -} - static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) { nfs4_setup_sequence(NFS_SERVER(data->inode), @@ -8432,11 +8417,10 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .pathconf = nfs4_proc_pathconf, .set_capabilities = nfs4_server_capabilities, .decode_dirent = nfs4_decode_dirent, + .pgio_rpc_prepare = nfs4_proc_pgio_rpc_prepare, .read_setup = nfs4_proc_read_setup, - .read_rpc_prepare = nfs4_proc_read_rpc_prepare, .read_done = nfs4_read_done, .write_setup = nfs4_proc_write_setup, - .write_rpc_prepare = nfs4_proc_write_rpc_prepare, .write_done = nfs4_write_done, .commit_setup = nfs4_proc_commit_setup, .commit_rpc_prepare = nfs4_proc_commit_rpc_prepare, diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index ca356fe..0fa211d 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -387,6 +387,32 @@ void nfs_pgio_data_release(struct nfs_pgio_data *data) EXPORT_SYMBOL_GPL(nfs_pgio_data_release); /** + * nfs_pgio_prepare - Prepare pageio data to go over the wire + * @task: The current task + * @calldata: pageio data to prepare + */ +void nfs_pgio_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs_pgio_data *data = calldata; + int err; + err = NFS_PROTO(data->header->inode)->pgio_rpc_prepare(task, data); + if (err) + rpc_exit(task, err); +} + +/** + * nfs_pgio_release - Release pageio data + * @calldata: The pageio data to release + */ +void nfs_pgio_release(void *calldata) +{ + struct nfs_pgio_data *data = calldata; + if (data->header->rw_ops->rw_release) + data->header->rw_ops->rw_release(data); + nfs_pgio_data_release(data); +} + +/** * nfs_pageio_init - initialise a page io descriptor * @desc: pointer to descriptor * @inode: pointer to inode diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index c54829e..c171ce1 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -599,7 +599,7 @@ static void nfs_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message * msg->rpc_proc = &nfs_procedures[NFSPROC_READ]; } -static int nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) { rpc_call_start(task); return 0; @@ -621,12 +621,6 @@ static void nfs_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE]; } -static int nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) -{ - rpc_call_start(task); - return 0; -} - static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) { BUG(); @@ -734,11 +728,10 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .fsinfo = nfs_proc_fsinfo, .pathconf = nfs_proc_pathconf, .decode_dirent = nfs2_decode_dirent, + .pgio_rpc_prepare = nfs_proc_pgio_rpc_prepare, .read_setup = nfs_proc_read_setup, - .read_rpc_prepare = nfs_proc_read_rpc_prepare, .read_done = nfs_read_done, .write_setup = nfs_proc_write_setup, - .write_rpc_prepare = nfs_proc_write_rpc_prepare, .write_done = nfs_write_done, .commit_setup = nfs_proc_commit_setup, .commit_rpc_prepare = nfs_proc_commit_rpc_prepare, diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 4cf3577..cfa15e8 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -454,24 +454,10 @@ static void nfs_readpage_result_common(struct rpc_task *task, void *calldata) nfs_readpage_retry(task, data); } -static void nfs_readpage_release_common(void *calldata) -{ - nfs_pgio_data_release(calldata); -} - -void nfs_read_prepare(struct rpc_task *task, void *calldata) -{ - struct nfs_pgio_data *data = calldata; - int err; - err = NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data); - if (err) - rpc_exit(task, err); -} - static const struct rpc_call_ops nfs_read_common_ops = { - .rpc_call_prepare = nfs_read_prepare, + .rpc_call_prepare = nfs_pgio_prepare, .rpc_call_done = nfs_readpage_result_common, - .rpc_release = nfs_readpage_release_common, + .rpc_release = nfs_pgio_release, }; /* @@ -636,6 +622,7 @@ void nfs_destroy_readpagecache(void) } static const struct nfs_rw_ops nfs_rw_read_ops = { + .rw_mode = FMODE_READ, .rw_alloc_header = nfs_readhdr_alloc, .rw_free_header = nfs_readhdr_free, }; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 9c5cde3..ae799c9 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1248,15 +1248,6 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); -void nfs_write_prepare(struct rpc_task *task, void *calldata) -{ - struct nfs_pgio_data *data = calldata; - int err; - err = NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data); - if (err) - rpc_exit(task, err); -} - void nfs_commit_prepare(struct rpc_task *task, void *calldata) { struct nfs_commit_data *data = calldata; @@ -1278,9 +1269,8 @@ static void nfs_writeback_done_common(struct rpc_task *task, void *calldata) nfs_writeback_done(task, data); } -static void nfs_writeback_release_common(void *calldata) +static void nfs_writeback_release_common(struct nfs_pgio_data *data) { - struct nfs_pgio_data *data = calldata; struct nfs_pgio_header *hdr = data->header; int status = data->task.tk_status; @@ -1294,13 +1284,12 @@ static void nfs_writeback_release_common(void *calldata) set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags); spin_unlock(&hdr->lock); } - nfs_pgio_data_release(data); } static const struct rpc_call_ops nfs_write_common_ops = { - .rpc_call_prepare = nfs_write_prepare, + .rpc_call_prepare = nfs_pgio_prepare, .rpc_call_done = nfs_writeback_done_common, - .rpc_release = nfs_writeback_release_common, + .rpc_release = nfs_pgio_release, }; /* @@ -1918,6 +1907,8 @@ void nfs_destroy_writepagecache(void) } static const struct nfs_rw_ops nfs_rw_write_ops = { + .rw_mode = FMODE_WRITE, .rw_alloc_header = nfs_writehdr_alloc, .rw_free_header = nfs_writehdr_free, + .rw_release = nfs_writeback_release_common, }; -- cgit v1.1 From 0eecb2145c1ce18e36617008424a93836ad0a3bd Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:32 -0400 Subject: NFS: Create a common nfs_pgio_result_common function Combining these functions will let me make a single nfs_rw_common_ops struct (see the next patch). Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 1 + fs/nfs/pagelist.c | 23 +++++++++++++++++++++++ fs/nfs/read.c | 25 ++++++++----------------- fs/nfs/write.c | 51 ++++++++++++++++++++------------------------------- 4 files changed, 52 insertions(+), 48 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 1959260..7c0ae36 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -243,6 +243,7 @@ struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *, unsigned int void nfs_pgio_data_release(struct nfs_pgio_data *); void nfs_pgio_prepare(struct rpc_task *, void *); void nfs_pgio_release(void *); +void nfs_pgio_result(struct rpc_task *, void *); static inline void nfs_iocounter_init(struct nfs_io_counter *c) { diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 0fa211d..f74df87 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -24,6 +24,8 @@ #include "internal.h" #include "pnfs.h" +#define NFSDBG_FACILITY NFSDBG_PAGECACHE + static struct kmem_cache *nfs_page_cachep; static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) @@ -447,6 +449,27 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, } EXPORT_SYMBOL_GPL(nfs_pageio_init); +/** + * nfs_pgio_result - Basic pageio error handling + * @task: The task that ran + * @calldata: Pageio data to check + */ +void nfs_pgio_result(struct rpc_task *task, void *calldata) +{ + struct nfs_pgio_data *data = calldata; + struct inode *inode = data->header->inode; + + dprintk("NFS: %s: %5u, (status %d)\n", __func__, + task->tk_pid, task->tk_status); + + if (data->header->rw_ops->rw_done(task, data, inode) != 0) + return; + if (task->tk_status < 0) + nfs_set_pgio_error(data->header, task->tk_status, data->args.offset); + else + data->header->rw_ops->rw_result(task, data); +} + static bool nfs_match_open_context(const struct nfs_open_context *ctx1, const struct nfs_open_context *ctx2) { diff --git a/fs/nfs/read.c b/fs/nfs/read.c index cfa15e8..bc78bd2 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -388,15 +388,10 @@ static const struct nfs_pageio_ops nfs_pageio_read_ops = { * This is the callback from RPC telling us whether a reply was * received or some error occurred (timeout or socket shutdown). */ -int nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data, + struct inode *inode) { - struct inode *inode = data->header->inode; - int status; - - dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid, - task->tk_status); - - status = NFS_PROTO(inode)->read_done(task, data); + int status = NFS_PROTO(inode)->read_done(task, data); if (status != 0) return status; @@ -429,17 +424,11 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_pgio_data *data rpc_restart_call_prepare(task); } -static void nfs_readpage_result_common(struct rpc_task *task, void *calldata) +static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data) { - struct nfs_pgio_data *data = calldata; struct nfs_pgio_header *hdr = data->header; - /* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */ - if (nfs_readpage_result(task, data) != 0) - return; - if (task->tk_status < 0) - nfs_set_pgio_error(hdr, task->tk_status, data->args.offset); - else if (data->res.eof) { + if (data->res.eof) { loff_t bound; bound = data->args.offset + data->res.count; @@ -456,7 +445,7 @@ static void nfs_readpage_result_common(struct rpc_task *task, void *calldata) static const struct rpc_call_ops nfs_read_common_ops = { .rpc_call_prepare = nfs_pgio_prepare, - .rpc_call_done = nfs_readpage_result_common, + .rpc_call_done = nfs_pgio_result, .rpc_release = nfs_pgio_release, }; @@ -625,4 +614,6 @@ static const struct nfs_rw_ops nfs_rw_read_ops = { .rw_mode = FMODE_READ, .rw_alloc_header = nfs_readhdr_alloc, .rw_free_header = nfs_readhdr_free, + .rw_done = nfs_readpage_done, + .rw_result = nfs_readpage_result, }; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ae799c9..1d3e1d7 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1255,20 +1255,6 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata) NFS_PROTO(data->inode)->commit_rpc_prepare(task, data); } -/* - * Handle a write reply that flushes a whole page. - * - * FIXME: There is an inherent race with invalidate_inode_pages and - * writebacks since the page->count is kept > 1 for as long - * as the page has a write request pending. - */ -static void nfs_writeback_done_common(struct rpc_task *task, void *calldata) -{ - struct nfs_pgio_data *data = calldata; - - nfs_writeback_done(task, data); -} - static void nfs_writeback_release_common(struct nfs_pgio_data *data) { struct nfs_pgio_header *hdr = data->header; @@ -1288,7 +1274,7 @@ static void nfs_writeback_release_common(struct nfs_pgio_data *data) static const struct rpc_call_ops nfs_write_common_ops = { .rpc_call_prepare = nfs_pgio_prepare, - .rpc_call_done = nfs_writeback_done_common, + .rpc_call_done = nfs_pgio_result, .rpc_release = nfs_pgio_release, }; @@ -1320,16 +1306,11 @@ static int nfs_should_remove_suid(const struct inode *inode) /* * This function is called when the WRITE call is complete. */ -void nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data) +static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data, + struct inode *inode) { - struct nfs_pgio_args *argp = &data->args; - struct nfs_pgio_res *resp = &data->res; - struct inode *inode = data->header->inode; int status; - dprintk("NFS: %5u nfs_writeback_done (status %d)\n", - task->tk_pid, task->tk_status); - /* * ->write_done will attempt to use post-op attributes to detect * conflicting writes by other clients. A strict interpretation @@ -1339,11 +1320,11 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data) */ status = NFS_PROTO(inode)->write_done(task, data); if (status != 0) - return; - nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count); + return status; + nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, data->res.count); #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) - if (resp->verf->committed < argp->stable && task->tk_status >= 0) { + if (data->res.verf->committed < data->args.stable && task->tk_status >= 0) { /* We tried a write call, but the server did not * commit data to stable storage even though we * requested it. @@ -1359,25 +1340,31 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data) dprintk("NFS: faulty NFS server %s:" " (committed = %d) != (stable = %d)\n", NFS_SERVER(inode)->nfs_client->cl_hostname, - resp->verf->committed, argp->stable); + data->res.verf->committed, data->args.stable); complain = jiffies + 300 * HZ; } } #endif - if (task->tk_status < 0) { - nfs_set_pgio_error(data->header, task->tk_status, argp->offset); - return; - } /* Deal with the suid/sgid bit corner case */ if (nfs_should_remove_suid(inode)) nfs_mark_for_revalidate(inode); + return 0; +} + +/* + * This function is called when the WRITE call is complete. + */ +static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *data) +{ + struct nfs_pgio_args *argp = &data->args; + struct nfs_pgio_res *resp = &data->res; if (resp->count < argp->count) { static unsigned long complain; /* This a short write! */ - nfs_inc_stats(inode, NFSIOS_SHORTWRITE); + nfs_inc_stats(data->header->inode, NFSIOS_SHORTWRITE); /* Has the server at least made some progress? */ if (resp->count == 0) { @@ -1911,4 +1898,6 @@ static const struct nfs_rw_ops nfs_rw_write_ops = { .rw_alloc_header = nfs_writehdr_alloc, .rw_free_header = nfs_writehdr_free, .rw_release = nfs_writeback_release_common, + .rw_done = nfs_writeback_done, + .rw_result = nfs_writeback_result, }; -- cgit v1.1 From 6f92fa4581f1c26562f80dc686b3c9ea76556911 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:33 -0400 Subject: NFS: Create a common rpc_call_ops struct The read and write paths set up this struct in exactly the same way, so create a single shared struct. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 4 +--- fs/nfs/pagelist.c | 11 ++++++++--- fs/nfs/read.c | 11 ++--------- fs/nfs/write.c | 11 ++--------- 4 files changed, 13 insertions(+), 24 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 7c0ae36..e34220f 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -237,13 +237,11 @@ extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos); int nfs_iocounter_wait(struct nfs_io_counter *c); +extern const struct rpc_call_ops nfs_pgio_common_ops; struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *); void nfs_rw_header_free(struct nfs_pgio_header *); struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *, unsigned int); void nfs_pgio_data_release(struct nfs_pgio_data *); -void nfs_pgio_prepare(struct rpc_task *, void *); -void nfs_pgio_release(void *); -void nfs_pgio_result(struct rpc_task *, void *); static inline void nfs_iocounter_init(struct nfs_io_counter *c) { diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index f74df87..aabff78 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -393,7 +393,7 @@ EXPORT_SYMBOL_GPL(nfs_pgio_data_release); * @task: The current task * @calldata: pageio data to prepare */ -void nfs_pgio_prepare(struct rpc_task *task, void *calldata) +static void nfs_pgio_prepare(struct rpc_task *task, void *calldata) { struct nfs_pgio_data *data = calldata; int err; @@ -406,7 +406,7 @@ void nfs_pgio_prepare(struct rpc_task *task, void *calldata) * nfs_pgio_release - Release pageio data * @calldata: The pageio data to release */ -void nfs_pgio_release(void *calldata) +static void nfs_pgio_release(void *calldata) { struct nfs_pgio_data *data = calldata; if (data->header->rw_ops->rw_release) @@ -454,7 +454,7 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init); * @task: The task that ran * @calldata: Pageio data to check */ -void nfs_pgio_result(struct rpc_task *task, void *calldata) +static void nfs_pgio_result(struct rpc_task *task, void *calldata) { struct nfs_pgio_data *data = calldata; struct inode *inode = data->header->inode; @@ -677,3 +677,8 @@ void nfs_destroy_nfspagecache(void) kmem_cache_destroy(nfs_page_cachep); } +const struct rpc_call_ops nfs_pgio_common_ops = { + .rpc_call_prepare = nfs_pgio_prepare, + .rpc_call_done = nfs_pgio_result, + .rpc_release = nfs_pgio_release, +}; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index bc78bd2..a33490c 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -29,7 +29,6 @@ #define NFSDBG_FACILITY NFSDBG_PAGECACHE static const struct nfs_pageio_ops nfs_pageio_read_ops; -static const struct rpc_call_ops nfs_read_common_ops; static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops; static const struct nfs_rw_ops nfs_rw_read_ops; @@ -314,7 +313,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, nfs_list_remove_request(req); nfs_list_add_request(req, &hdr->pages); - desc->pg_rpc_callops = &nfs_read_common_ops; + desc->pg_rpc_callops = &nfs_pgio_common_ops; return 0; } @@ -343,7 +342,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, nfs_read_rpcsetup(data, desc->pg_count, 0); list_add(&data->list, &hdr->rpc_list); - desc->pg_rpc_callops = &nfs_read_common_ops; + desc->pg_rpc_callops = &nfs_pgio_common_ops; return 0; } @@ -443,12 +442,6 @@ static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *dat nfs_readpage_retry(task, data); } -static const struct rpc_call_ops nfs_read_common_ops = { - .rpc_call_prepare = nfs_pgio_prepare, - .rpc_call_done = nfs_pgio_result, - .rpc_release = nfs_pgio_release, -}; - /* * Read a page over NFS. * We read the page synchronously in the following case: diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 1d3e1d7..d877f15 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -42,7 +42,6 @@ * Local function declarations */ static void nfs_redirty_request(struct nfs_page *req); -static const struct rpc_call_ops nfs_write_common_ops; static const struct rpc_call_ops nfs_commit_ops; static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops; static const struct nfs_commit_completion_ops nfs_commit_completion_ops; @@ -1138,7 +1137,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, } while (nbytes != 0); nfs_list_remove_request(req); nfs_list_add_request(req, &hdr->pages); - desc->pg_rpc_callops = &nfs_write_common_ops; + desc->pg_rpc_callops = &nfs_pgio_common_ops; return 0; } @@ -1182,7 +1181,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, /* Set up the argument struct */ nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo); list_add(&data->list, &hdr->rpc_list); - desc->pg_rpc_callops = &nfs_write_common_ops; + desc->pg_rpc_callops = &nfs_pgio_common_ops; return 0; } @@ -1272,12 +1271,6 @@ static void nfs_writeback_release_common(struct nfs_pgio_data *data) } } -static const struct rpc_call_ops nfs_write_common_ops = { - .rpc_call_prepare = nfs_pgio_prepare, - .rpc_call_done = nfs_pgio_result, - .rpc_release = nfs_pgio_release, -}; - /* * Special version of should_remove_suid() that ignores capabilities. */ -- cgit v1.1 From ce59515c1484d3a01bc2f3e7043dc488d25efe34 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:34 -0400 Subject: NFS: Create a common rpcsetup function for reads and writes Write adds a little bit of code dealing with flush flags, but since "how" will always be 0 when reading we can share the code. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 3 +++ fs/nfs/pagelist.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/read.c | 26 ++------------------------ fs/nfs/write.c | 46 ++++------------------------------------------ 4 files changed, 53 insertions(+), 66 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index e34220f..519864b 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -242,6 +242,8 @@ struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *); void nfs_rw_header_free(struct nfs_pgio_header *); struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *, unsigned int); void nfs_pgio_data_release(struct nfs_pgio_data *); +void nfs_pgio_rpcsetup(struct nfs_pgio_data *, unsigned int, unsigned int, int, + struct nfs_commit_info *); static inline void nfs_iocounter_init(struct nfs_io_counter *c) { @@ -446,6 +448,7 @@ extern void nfs_init_commit(struct nfs_commit_data *data, struct nfs_commit_info *cinfo); int nfs_scan_commit_list(struct list_head *src, struct list_head *dst, struct nfs_commit_info *cinfo, int max); +unsigned long nfs_reqs_to_commit(struct nfs_commit_info *); int nfs_scan_commit(struct inode *inode, struct list_head *dst, struct nfs_commit_info *cinfo); void nfs_mark_request_commit(struct nfs_page *req, diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index aabff78..0ccd951 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -389,6 +389,50 @@ void nfs_pgio_data_release(struct nfs_pgio_data *data) EXPORT_SYMBOL_GPL(nfs_pgio_data_release); /** + * nfs_pgio_rpcsetup - Set up arguments for a pageio call + * @data: The pageio data + * @count: Number of bytes to read + * @offset: Initial offset + * @how: How to commit data (writes only) + * @cinfo: Commit information for the call (writes only) + */ +void nfs_pgio_rpcsetup(struct nfs_pgio_data *data, + unsigned int count, unsigned int offset, + int how, struct nfs_commit_info *cinfo) +{ + struct nfs_page *req = data->header->req; + + /* Set up the RPC argument and reply structs + * NB: take care not to mess about with data->commit et al. */ + + data->args.fh = NFS_FH(data->header->inode); + data->args.offset = req_offset(req) + offset; + /* pnfs_set_layoutcommit needs this */ + data->mds_offset = data->args.offset; + data->args.pgbase = req->wb_pgbase + offset; + data->args.pages = data->pages.pagevec; + data->args.count = count; + data->args.context = get_nfs_open_context(req->wb_context); + data->args.lock_context = req->wb_lock_context; + data->args.stable = NFS_UNSTABLE; + switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { + case 0: + break; + case FLUSH_COND_STABLE: + if (nfs_reqs_to_commit(cinfo)) + break; + default: + data->args.stable = NFS_FILE_SYNC; + } + + data->res.fattr = &data->fattr; + data->res.count = count; + data->res.eof = 0; + data->res.verf = &data->verf; + nfs_fattr_init(&data->fattr); +} + +/** * nfs_pgio_prepare - Prepare pageio data to go over the wire * @task: The current task * @calldata: pageio data to prepare diff --git a/fs/nfs/read.c b/fs/nfs/read.c index a33490c..0c88c60 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -192,28 +192,6 @@ int nfs_initiate_read(struct rpc_clnt *clnt, } EXPORT_SYMBOL_GPL(nfs_initiate_read); -/* - * Set up the NFS read request struct - */ -static void nfs_read_rpcsetup(struct nfs_pgio_data *data, - unsigned int count, unsigned int offset) -{ - struct nfs_page *req = data->header->req; - - data->args.fh = NFS_FH(data->header->inode); - data->args.offset = req_offset(req) + offset; - data->args.pgbase = req->wb_pgbase + offset; - data->args.pages = data->pages.pagevec; - data->args.count = count; - data->args.context = get_nfs_open_context(req->wb_context); - data->args.lock_context = req->wb_lock_context; - - data->res.fattr = &data->fattr; - data->res.count = count; - data->res.eof = 0; - nfs_fattr_init(&data->fattr); -} - static int nfs_do_read(struct nfs_pgio_data *data, const struct rpc_call_ops *call_ops) { @@ -305,7 +283,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, return -ENOMEM; } data->pages.pagevec[0] = page; - nfs_read_rpcsetup(data, len, offset); + nfs_pgio_rpcsetup(data, len, offset, 0, NULL); list_add(&data->list, &hdr->rpc_list); nbytes -= len; offset += len; @@ -340,7 +318,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, *pages++ = req->wb_page; } - nfs_read_rpcsetup(data, desc->pg_count, 0); + nfs_pgio_rpcsetup(data, desc->pg_count, 0, 0, NULL); list_add(&data->list, &hdr->rpc_list); desc->pg_rpc_callops = &nfs_pgio_common_ops; return 0; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d877f15..0d367aa 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -603,7 +603,7 @@ out: } #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) -static unsigned long +unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) { return cinfo->mds->ncommit; @@ -660,7 +660,7 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, } #else -static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) +unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) { return 0; } @@ -987,44 +987,6 @@ out: } EXPORT_SYMBOL_GPL(nfs_initiate_write); -/* - * Set up the argument/result storage required for the RPC call. - */ -static void nfs_write_rpcsetup(struct nfs_pgio_data *data, - unsigned int count, unsigned int offset, - int how, struct nfs_commit_info *cinfo) -{ - struct nfs_page *req = data->header->req; - - /* Set up the RPC argument and reply structs - * NB: take care not to mess about with data->commit et al. */ - - data->args.fh = NFS_FH(data->header->inode); - data->args.offset = req_offset(req) + offset; - /* pnfs_set_layoutcommit needs this */ - data->mds_offset = data->args.offset; - data->args.pgbase = req->wb_pgbase + offset; - data->args.pages = data->pages.pagevec; - data->args.count = count; - data->args.context = get_nfs_open_context(req->wb_context); - data->args.lock_context = req->wb_lock_context; - data->args.stable = NFS_UNSTABLE; - switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { - case 0: - break; - case FLUSH_COND_STABLE: - if (nfs_reqs_to_commit(cinfo)) - break; - default: - data->args.stable = NFS_FILE_SYNC; - } - - data->res.fattr = &data->fattr; - data->res.count = count; - data->res.verf = &data->verf; - nfs_fattr_init(&data->fattr); -} - static int nfs_do_write(struct nfs_pgio_data *data, const struct rpc_call_ops *call_ops, int how) @@ -1129,7 +1091,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, return -ENOMEM; } data->pages.pagevec[0] = page; - nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo); + nfs_pgio_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo); list_add(&data->list, &hdr->rpc_list); requests++; nbytes -= len; @@ -1179,7 +1141,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, desc->pg_ioflags &= ~FLUSH_COND_STABLE; /* Set up the argument struct */ - nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo); + nfs_pgio_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo); list_add(&data->list, &hdr->rpc_list); desc->pg_rpc_callops = &nfs_pgio_common_ops; return 0; -- cgit v1.1 From 844c9e691d8723853ca8f2de0207683538645824 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:35 -0400 Subject: NFS: Create a common pgio_error function At this point, the read and write versions of this function look identical so both should use the same function. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 1 + fs/nfs/pagelist.c | 20 ++++++++++++++++++++ fs/nfs/read.c | 25 ++++--------------------- fs/nfs/write.c | 25 ++++--------------------- 4 files changed, 29 insertions(+), 42 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 519864b..a4b9e75 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -242,6 +242,7 @@ struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *); void nfs_rw_header_free(struct nfs_pgio_header *); struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *, unsigned int); void nfs_pgio_data_release(struct nfs_pgio_data *); +int nfs_pgio_error(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); void nfs_pgio_rpcsetup(struct nfs_pgio_data *, unsigned int, unsigned int, int, struct nfs_commit_info *); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 0ccd951..b0a98da 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -447,6 +447,26 @@ static void nfs_pgio_prepare(struct rpc_task *task, void *calldata) } /** + * nfs_pgio_error - Clean up from a pageio error + * @desc: IO descriptor + * @hdr: pageio header + */ +int nfs_pgio_error(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr) +{ + struct nfs_pgio_data *data; + + set_bit(NFS_IOHDR_REDO, &hdr->flags); + while (!list_empty(&hdr->rpc_list)) { + data = list_first_entry(&hdr->rpc_list, struct nfs_pgio_data, list); + list_del(&data->list); + nfs_pgio_data_release(data); + } + desc->pg_completion_ops->error_cleanup(&desc->pg_list); + return -ENOMEM; +} + +/** * nfs_pgio_release - Release pageio data * @calldata: The pageio data to release */ diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 0c88c60..64f8eef 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -237,19 +237,6 @@ static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = { .completion = nfs_read_completion, }; -static void nfs_pagein_error(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_header *hdr) -{ - set_bit(NFS_IOHDR_REDO, &hdr->flags); - while (!list_empty(&hdr->rpc_list)) { - struct nfs_pgio_data *data = list_first_entry(&hdr->rpc_list, - struct nfs_pgio_data, list); - list_del(&data->list); - nfs_pgio_data_release(data); - } - desc->pg_completion_ops->error_cleanup(&desc->pg_list); -} - /* * Generate multiple requests to fill a single page. * @@ -278,10 +265,8 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, size_t len = min(nbytes,rsize); data = nfs_pgio_data_alloc(hdr, 1); - if (!data) { - nfs_pagein_error(desc, hdr); - return -ENOMEM; - } + if (!data) + return nfs_pgio_error(desc, hdr); data->pages.pagevec[0] = page; nfs_pgio_rpcsetup(data, len, offset, 0, NULL); list_add(&data->list, &hdr->rpc_list); @@ -305,10 +290,8 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, data = nfs_pgio_data_alloc(hdr, nfs_page_array_len(desc->pg_base, desc->pg_count)); - if (!data) { - nfs_pagein_error(desc, hdr); - return -ENOMEM; - } + if (!data) + return nfs_pgio_error(desc, hdr); pages = data->pages.pagevec; while (!list_empty(head)) { diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0d367aa..02d088b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1044,19 +1044,6 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = { .completion = nfs_write_completion, }; -static void nfs_flush_error(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_header *hdr) -{ - set_bit(NFS_IOHDR_REDO, &hdr->flags); - while (!list_empty(&hdr->rpc_list)) { - struct nfs_pgio_data *data = list_first_entry(&hdr->rpc_list, - struct nfs_pgio_data, list); - list_del(&data->list); - nfs_pgio_data_release(data); - } - desc->pg_completion_ops->error_cleanup(&desc->pg_list); -} - /* * Generate multiple small requests to write out a single * contiguous dirty area on one page. @@ -1086,10 +1073,8 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, size_t len = min(nbytes, wsize); data = nfs_pgio_data_alloc(hdr, 1); - if (!data) { - nfs_flush_error(desc, hdr); - return -ENOMEM; - } + if (!data) + return nfs_pgio_error(desc, hdr); data->pages.pagevec[0] = page; nfs_pgio_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo); list_add(&data->list, &hdr->rpc_list); @@ -1122,10 +1107,8 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, data = nfs_pgio_data_alloc(hdr, nfs_page_array_len(desc->pg_base, desc->pg_count)); - if (!data) { - nfs_flush_error(desc, hdr); - return -ENOMEM; - } + if (!data) + return nfs_pgio_error(desc, hdr); nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); pages = data->pages.pagevec; -- cgit v1.1 From ef2c488c073f4f0b3a200745dd8d608c01d69c39 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:36 -0400 Subject: NFS: Create a generic_pgio function These functions are almost identical on both the read and write side. FLUSH_COND_STABLE will never be set for the read path, so leaving it in the generic code won't hurt anything. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 10 +----- fs/nfs/pagelist.c | 106 +++++++++++++++++++++++++++++++++++++++++++++++++++--- fs/nfs/pnfs.c | 4 +-- fs/nfs/read.c | 81 +---------------------------------------- fs/nfs/write.c | 97 +------------------------------------------------ 5 files changed, 106 insertions(+), 192 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index a4b9e75..365cdb1 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -237,14 +237,10 @@ extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos); int nfs_iocounter_wait(struct nfs_io_counter *c); -extern const struct rpc_call_ops nfs_pgio_common_ops; struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *); void nfs_rw_header_free(struct nfs_pgio_header *); -struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *, unsigned int); void nfs_pgio_data_release(struct nfs_pgio_data *); -int nfs_pgio_error(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); -void nfs_pgio_rpcsetup(struct nfs_pgio_data *, unsigned int, unsigned int, int, - struct nfs_commit_info *); +int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); static inline void nfs_iocounter_init(struct nfs_io_counter *c) { @@ -410,8 +406,6 @@ extern int nfs_initiate_read(struct rpc_clnt *clnt, struct nfs_pgio_data *data, const struct rpc_call_ops *call_ops, int flags); extern void nfs_read_prepare(struct rpc_task *task, void *calldata); -extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_header *hdr); extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); /* super.c */ @@ -429,8 +423,6 @@ int nfs_remount(struct super_block *sb, int *flags, char *raw_data); extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, bool force_mds, const struct nfs_pgio_completion_ops *compl_ops); -extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_header *hdr); extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_commit_free(struct nfs_commit_data *p); extern int nfs_initiate_write(struct rpc_clnt *clnt, diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index b0a98da..d8d25a4 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -27,6 +27,7 @@ #define NFSDBG_FACILITY NFSDBG_PAGECACHE static struct kmem_cache *nfs_page_cachep; +static const struct rpc_call_ops nfs_pgio_common_ops; static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) { @@ -338,8 +339,8 @@ EXPORT_SYMBOL_GPL(nfs_rw_header_free); * @hdr: The header making a request * @pagecount: Number of pages to create */ -struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *hdr, - unsigned int pagecount) +static struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *hdr, + unsigned int pagecount) { struct nfs_pgio_data *data, *prealloc; @@ -396,7 +397,7 @@ EXPORT_SYMBOL_GPL(nfs_pgio_data_release); * @how: How to commit data (writes only) * @cinfo: Commit information for the call (writes only) */ -void nfs_pgio_rpcsetup(struct nfs_pgio_data *data, +static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data, unsigned int count, unsigned int offset, int how, struct nfs_commit_info *cinfo) { @@ -451,7 +452,7 @@ static void nfs_pgio_prepare(struct rpc_task *task, void *calldata) * @desc: IO descriptor * @hdr: pageio header */ -int nfs_pgio_error(struct nfs_pageio_descriptor *desc, +static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { struct nfs_pgio_data *data; @@ -534,6 +535,101 @@ static void nfs_pgio_result(struct rpc_task *task, void *calldata) data->header->rw_ops->rw_result(task, data); } +/* + * Generate multiple small requests to read or write a single + * contiguous dirty on one page. + */ +static int nfs_pgio_multi(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr) +{ + struct nfs_page *req = hdr->req; + struct page *page = req->wb_page; + struct nfs_pgio_data *data; + size_t wsize = desc->pg_bsize, nbytes; + unsigned int offset; + int requests = 0; + struct nfs_commit_info cinfo; + + nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); + + if ((desc->pg_ioflags & FLUSH_COND_STABLE) && + (desc->pg_moreio || nfs_reqs_to_commit(&cinfo) || + desc->pg_count > wsize)) + desc->pg_ioflags &= ~FLUSH_COND_STABLE; + + offset = 0; + nbytes = desc->pg_count; + do { + size_t len = min(nbytes, wsize); + + data = nfs_pgio_data_alloc(hdr, 1); + if (!data) + return nfs_pgio_error(desc, hdr); + data->pages.pagevec[0] = page; + nfs_pgio_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo); + list_add(&data->list, &hdr->rpc_list); + requests++; + nbytes -= len; + offset += len; + } while (nbytes != 0); + + nfs_list_remove_request(req); + nfs_list_add_request(req, &hdr->pages); + desc->pg_rpc_callops = &nfs_pgio_common_ops; + return 0; +} + +/* + * Create an RPC task for the given read or write request and kick it. + * The page must have been locked by the caller. + * + * It may happen that the page we're passed is not marked dirty. + * This is the case if nfs_updatepage detects a conflicting request + * that has been written but not committed. + */ +static int nfs_pgio_one(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr) +{ + struct nfs_page *req; + struct page **pages; + struct nfs_pgio_data *data; + struct list_head *head = &desc->pg_list; + struct nfs_commit_info cinfo; + + data = nfs_pgio_data_alloc(hdr, nfs_page_array_len(desc->pg_base, + desc->pg_count)); + if (!data) + return nfs_pgio_error(desc, hdr); + + nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); + pages = data->pages.pagevec; + while (!list_empty(head)) { + req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + nfs_list_add_request(req, &hdr->pages); + *pages++ = req->wb_page; + } + + if ((desc->pg_ioflags & FLUSH_COND_STABLE) && + (desc->pg_moreio || nfs_reqs_to_commit(&cinfo))) + desc->pg_ioflags &= ~FLUSH_COND_STABLE; + + /* Set up the argument struct */ + nfs_pgio_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo); + list_add(&data->list, &hdr->rpc_list); + desc->pg_rpc_callops = &nfs_pgio_common_ops; + return 0; +} + +int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr) +{ + if (desc->pg_bsize < PAGE_CACHE_SIZE) + return nfs_pgio_multi(desc, hdr); + return nfs_pgio_one(desc, hdr); +} +EXPORT_SYMBOL_GPL(nfs_generic_pgio); + static bool nfs_match_open_context(const struct nfs_open_context *ctx1, const struct nfs_open_context *ctx2) { @@ -741,7 +837,7 @@ void nfs_destroy_nfspagecache(void) kmem_cache_destroy(nfs_page_cachep); } -const struct rpc_call_ops nfs_pgio_common_ops = { +static const struct rpc_call_ops nfs_pgio_common_ops = { .rpc_call_prepare = nfs_pgio_prepare, .rpc_call_done = nfs_pgio_result, .rpc_release = nfs_pgio_release, diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 54c84c1..0fe6701 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1607,7 +1607,7 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); hdr->lseg = pnfs_get_lseg(desc->pg_lseg); atomic_inc(&hdr->refcnt); - ret = nfs_generic_flush(desc, hdr); + ret = nfs_generic_pgio(desc, hdr); if (ret != 0) { pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; @@ -1766,7 +1766,7 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); hdr->lseg = pnfs_get_lseg(desc->pg_lseg); atomic_inc(&hdr->refcnt); - ret = nfs_generic_pagein(desc, hdr); + ret = nfs_generic_pgio(desc, hdr); if (ret != 0) { pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 64f8eef..4fcef82 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -237,85 +237,6 @@ static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = { .completion = nfs_read_completion, }; -/* - * Generate multiple requests to fill a single page. - * - * We optimize to reduce the number of read operations on the wire. If we - * detect that we're reading a page, or an area of a page, that is past the - * end of file, we do not generate NFS read operations but just clear the - * parts of the page that would have come back zero from the server anyway. - * - * We rely on the cached value of i_size to make this determination; another - * client can fill pages on the server past our cached end-of-file, but we - * won't see the new data until our attribute cache is updated. This is more - * or less conventional NFS client behavior. - */ -static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_header *hdr) -{ - struct nfs_page *req = hdr->req; - struct page *page = req->wb_page; - struct nfs_pgio_data *data; - size_t rsize = desc->pg_bsize, nbytes; - unsigned int offset; - - offset = 0; - nbytes = desc->pg_count; - do { - size_t len = min(nbytes,rsize); - - data = nfs_pgio_data_alloc(hdr, 1); - if (!data) - return nfs_pgio_error(desc, hdr); - data->pages.pagevec[0] = page; - nfs_pgio_rpcsetup(data, len, offset, 0, NULL); - list_add(&data->list, &hdr->rpc_list); - nbytes -= len; - offset += len; - } while (nbytes != 0); - - nfs_list_remove_request(req); - nfs_list_add_request(req, &hdr->pages); - desc->pg_rpc_callops = &nfs_pgio_common_ops; - return 0; -} - -static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_header *hdr) -{ - struct nfs_page *req; - struct page **pages; - struct nfs_pgio_data *data; - struct list_head *head = &desc->pg_list; - - data = nfs_pgio_data_alloc(hdr, nfs_page_array_len(desc->pg_base, - desc->pg_count)); - if (!data) - return nfs_pgio_error(desc, hdr); - - pages = data->pages.pagevec; - while (!list_empty(head)) { - req = nfs_list_entry(head->next); - nfs_list_remove_request(req); - nfs_list_add_request(req, &hdr->pages); - *pages++ = req->wb_page; - } - - nfs_pgio_rpcsetup(data, desc->pg_count, 0, 0, NULL); - list_add(&data->list, &hdr->rpc_list); - desc->pg_rpc_callops = &nfs_pgio_common_ops; - return 0; -} - -int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_header *hdr) -{ - if (desc->pg_bsize < PAGE_CACHE_SIZE) - return nfs_pagein_multi(desc, hdr); - return nfs_pagein_one(desc, hdr); -} -EXPORT_SYMBOL_GPL(nfs_generic_pagein); - static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) { struct nfs_rw_header *rhdr; @@ -330,7 +251,7 @@ static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) hdr = &rhdr->header; nfs_pgheader_init(desc, hdr, nfs_rw_header_free); atomic_inc(&hdr->refcnt); - ret = nfs_generic_pagein(desc, hdr); + ret = nfs_generic_pgio(desc, hdr); if (ret == 0) ret = nfs_do_multiple_reads(&hdr->rpc_list, desc->pg_rpc_callops); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 02d088b..0e34c70 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1044,101 +1044,6 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = { .completion = nfs_write_completion, }; -/* - * Generate multiple small requests to write out a single - * contiguous dirty area on one page. - */ -static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_header *hdr) -{ - struct nfs_page *req = hdr->req; - struct page *page = req->wb_page; - struct nfs_pgio_data *data; - size_t wsize = desc->pg_bsize, nbytes; - unsigned int offset; - int requests = 0; - struct nfs_commit_info cinfo; - - nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); - - if ((desc->pg_ioflags & FLUSH_COND_STABLE) && - (desc->pg_moreio || nfs_reqs_to_commit(&cinfo) || - desc->pg_count > wsize)) - desc->pg_ioflags &= ~FLUSH_COND_STABLE; - - - offset = 0; - nbytes = desc->pg_count; - do { - size_t len = min(nbytes, wsize); - - data = nfs_pgio_data_alloc(hdr, 1); - if (!data) - return nfs_pgio_error(desc, hdr); - data->pages.pagevec[0] = page; - nfs_pgio_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo); - list_add(&data->list, &hdr->rpc_list); - requests++; - nbytes -= len; - offset += len; - } while (nbytes != 0); - nfs_list_remove_request(req); - nfs_list_add_request(req, &hdr->pages); - desc->pg_rpc_callops = &nfs_pgio_common_ops; - return 0; -} - -/* - * Create an RPC task for the given write request and kick it. - * The page must have been locked by the caller. - * - * It may happen that the page we're passed is not marked dirty. - * This is the case if nfs_updatepage detects a conflicting request - * that has been written but not committed. - */ -static int nfs_flush_one(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_header *hdr) -{ - struct nfs_page *req; - struct page **pages; - struct nfs_pgio_data *data; - struct list_head *head = &desc->pg_list; - struct nfs_commit_info cinfo; - - data = nfs_pgio_data_alloc(hdr, nfs_page_array_len(desc->pg_base, - desc->pg_count)); - if (!data) - return nfs_pgio_error(desc, hdr); - - nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); - pages = data->pages.pagevec; - while (!list_empty(head)) { - req = nfs_list_entry(head->next); - nfs_list_remove_request(req); - nfs_list_add_request(req, &hdr->pages); - *pages++ = req->wb_page; - } - - if ((desc->pg_ioflags & FLUSH_COND_STABLE) && - (desc->pg_moreio || nfs_reqs_to_commit(&cinfo))) - desc->pg_ioflags &= ~FLUSH_COND_STABLE; - - /* Set up the argument struct */ - nfs_pgio_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo); - list_add(&data->list, &hdr->rpc_list); - desc->pg_rpc_callops = &nfs_pgio_common_ops; - return 0; -} - -int nfs_generic_flush(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_header *hdr) -{ - if (desc->pg_bsize < PAGE_CACHE_SIZE) - return nfs_flush_multi(desc, hdr); - return nfs_flush_one(desc, hdr); -} -EXPORT_SYMBOL_GPL(nfs_generic_flush); - static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) { struct nfs_rw_header *whdr; @@ -1153,7 +1058,7 @@ static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) hdr = &whdr->header; nfs_pgheader_init(desc, hdr, nfs_rw_header_free); atomic_inc(&hdr->refcnt); - ret = nfs_generic_flush(desc, hdr); + ret = nfs_generic_pgio(desc, hdr); if (ret == 0) ret = nfs_do_multiple_writes(&hdr->rpc_list, desc->pg_rpc_callops, -- cgit v1.1 From 1ed26f33008e954a8e91d26f97d4380dea8145db Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:37 -0400 Subject: NFS: Create a common initiate_pgio() function Most of this code is the same for both the read and write paths, so combine everything and use the rw_ops when necessary. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 9 ++------ fs/nfs/nfs4filelayout.c | 6 +++--- fs/nfs/pagelist.c | 46 +++++++++++++++++++++++++++++++++++++++++ fs/nfs/read.c | 42 ++++++------------------------------- fs/nfs/write.c | 55 +++++++------------------------------------------ 5 files changed, 64 insertions(+), 94 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 365cdb1..be4f2a7 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -241,6 +241,8 @@ struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *); void nfs_rw_header_free(struct nfs_pgio_header *); void nfs_pgio_data_release(struct nfs_pgio_data *); int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); +int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *, + const struct rpc_call_ops *, int, int); static inline void nfs_iocounter_init(struct nfs_io_counter *c) { @@ -402,9 +404,6 @@ struct nfs_pgio_completion_ops; extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, bool force_mds, const struct nfs_pgio_completion_ops *compl_ops); -extern int nfs_initiate_read(struct rpc_clnt *clnt, - struct nfs_pgio_data *data, - const struct rpc_call_ops *call_ops, int flags); extern void nfs_read_prepare(struct rpc_task *task, void *calldata); extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); @@ -425,10 +424,6 @@ extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, const struct nfs_pgio_completion_ops *compl_ops); extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); extern void nfs_commit_free(struct nfs_commit_data *p); -extern int nfs_initiate_write(struct rpc_clnt *clnt, - struct nfs_pgio_data *data, - const struct rpc_call_ops *call_ops, - int how, int flags); extern void nfs_write_prepare(struct rpc_task *task, void *calldata); extern void nfs_commit_prepare(struct rpc_task *task, void *calldata); extern int nfs_initiate_commit(struct rpc_clnt *clnt, diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index e693614..7954e16 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -568,8 +568,8 @@ filelayout_read_pagelist(struct nfs_pgio_data *data) data->mds_offset = offset; /* Perform an asynchronous read to ds */ - nfs_initiate_read(ds_clnt, data, - &filelayout_read_call_ops, RPC_TASK_SOFTCONN); + nfs_initiate_pgio(ds_clnt, data, + &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN); return PNFS_ATTEMPTED; } @@ -613,7 +613,7 @@ filelayout_write_pagelist(struct nfs_pgio_data *data, int sync) data->args.offset = filelayout_get_dserver_offset(lseg, offset); /* Perform an asynchronous write */ - nfs_initiate_write(ds_clnt, data, + nfs_initiate_pgio(ds_clnt, data, &filelayout_write_call_ops, sync, RPC_TASK_SOFTCONN); return PNFS_ATTEMPTED; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index d8d25a4..ab5b185 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -447,6 +447,52 @@ static void nfs_pgio_prepare(struct rpc_task *task, void *calldata) rpc_exit(task, err); } +int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_data *data, + const struct rpc_call_ops *call_ops, int how, int flags) +{ + struct rpc_task *task; + struct rpc_message msg = { + .rpc_argp = &data->args, + .rpc_resp = &data->res, + .rpc_cred = data->header->cred, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = clnt, + .task = &data->task, + .rpc_message = &msg, + .callback_ops = call_ops, + .callback_data = data, + .workqueue = nfsiod_workqueue, + .flags = RPC_TASK_ASYNC | flags, + }; + int ret = 0; + + data->header->rw_ops->rw_initiate(data, &msg, &task_setup_data, how); + + dprintk("NFS: %5u initiated pgio call " + "(req %s/%llu, %u bytes @ offset %llu)\n", + data->task.tk_pid, + data->header->inode->i_sb->s_id, + (unsigned long long)NFS_FILEID(data->header->inode), + data->args.count, + (unsigned long long)data->args.offset); + + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) { + ret = PTR_ERR(task); + goto out; + } + if (how & FLUSH_SYNC) { + ret = rpc_wait_for_completion_task(task); + if (ret == 0) + ret = task->tk_status; + } + rpc_put_task(task); +out: + return ret; +} +EXPORT_SYMBOL_GPL(nfs_initiate_pgio); + /** * nfs_pgio_error - Clean up from a pageio error * @desc: IO descriptor diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 4fcef82..0359b0d 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -151,53 +151,22 @@ out: hdr->release(hdr); } -int nfs_initiate_read(struct rpc_clnt *clnt, - struct nfs_pgio_data *data, - const struct rpc_call_ops *call_ops, int flags) +static void nfs_initiate_read(struct nfs_pgio_data *data, struct rpc_message *msg, + struct rpc_task_setup *task_setup_data, int how) { struct inode *inode = data->header->inode; int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; - struct rpc_task *task; - struct rpc_message msg = { - .rpc_argp = &data->args, - .rpc_resp = &data->res, - .rpc_cred = data->header->cred, - }; - struct rpc_task_setup task_setup_data = { - .task = &data->task, - .rpc_client = clnt, - .rpc_message = &msg, - .callback_ops = call_ops, - .callback_data = data, - .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC | swap_flags | flags, - }; - /* Set up the initial task struct. */ - NFS_PROTO(inode)->read_setup(data, &msg); - - dprintk("NFS: %5u initiated read call (req %s/%llu, %u bytes @ " - "offset %llu)\n", - data->task.tk_pid, - inode->i_sb->s_id, - (unsigned long long)NFS_FILEID(inode), - data->args.count, - (unsigned long long)data->args.offset); - - task = rpc_run_task(&task_setup_data); - if (IS_ERR(task)) - return PTR_ERR(task); - rpc_put_task(task); - return 0; + task_setup_data->flags |= swap_flags; + NFS_PROTO(inode)->read_setup(data, msg); } -EXPORT_SYMBOL_GPL(nfs_initiate_read); static int nfs_do_read(struct nfs_pgio_data *data, const struct rpc_call_ops *call_ops) { struct inode *inode = data->header->inode; - return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops, 0); + return nfs_initiate_pgio(NFS_CLIENT(inode), data, call_ops, 0, 0); } static int @@ -491,4 +460,5 @@ static const struct nfs_rw_ops nfs_rw_read_ops = { .rw_free_header = nfs_readhdr_free, .rw_done = nfs_readpage_done, .rw_result = nfs_readpage_result, + .rw_initiate = nfs_initiate_read, }; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0e34c70..e46a1fc 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -932,60 +932,18 @@ static int flush_task_priority(int how) return RPC_PRIORITY_NORMAL; } -int nfs_initiate_write(struct rpc_clnt *clnt, - struct nfs_pgio_data *data, - const struct rpc_call_ops *call_ops, - int how, int flags) +static void nfs_initiate_write(struct nfs_pgio_data *data, struct rpc_message *msg, + struct rpc_task_setup *task_setup_data, int how) { struct inode *inode = data->header->inode; int priority = flush_task_priority(how); - struct rpc_task *task; - struct rpc_message msg = { - .rpc_argp = &data->args, - .rpc_resp = &data->res, - .rpc_cred = data->header->cred, - }; - struct rpc_task_setup task_setup_data = { - .rpc_client = clnt, - .task = &data->task, - .rpc_message = &msg, - .callback_ops = call_ops, - .callback_data = data, - .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC | flags, - .priority = priority, - }; - int ret = 0; - /* Set up the initial task struct. */ - NFS_PROTO(inode)->write_setup(data, &msg); - - dprintk("NFS: %5u initiated write call " - "(req %s/%llu, %u bytes @ offset %llu)\n", - data->task.tk_pid, - inode->i_sb->s_id, - (unsigned long long)NFS_FILEID(inode), - data->args.count, - (unsigned long long)data->args.offset); + task_setup_data->priority = priority; + NFS_PROTO(inode)->write_setup(data, msg); nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client, - &task_setup_data.rpc_client, &msg, data); - - task = rpc_run_task(&task_setup_data); - if (IS_ERR(task)) { - ret = PTR_ERR(task); - goto out; - } - if (how & FLUSH_SYNC) { - ret = rpc_wait_for_completion_task(task); - if (ret == 0) - ret = task->tk_status; - } - rpc_put_task(task); -out: - return ret; + &task_setup_data->rpc_client, msg, data); } -EXPORT_SYMBOL_GPL(nfs_initiate_write); static int nfs_do_write(struct nfs_pgio_data *data, const struct rpc_call_ops *call_ops, @@ -993,7 +951,7 @@ static int nfs_do_write(struct nfs_pgio_data *data, { struct inode *inode = data->header->inode; - return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how, 0); + return nfs_initiate_pgio(NFS_CLIENT(inode), data, call_ops, how, 0); } static int nfs_do_multiple_writes(struct list_head *head, @@ -1743,4 +1701,5 @@ static const struct nfs_rw_ops nfs_rw_write_ops = { .rw_release = nfs_writeback_release_common, .rw_done = nfs_writeback_done, .rw_result = nfs_writeback_result, + .rw_initiate = nfs_initiate_write, }; -- cgit v1.1 From c3766276f26090f4459329839cdcc8506dfbced5 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:38 -0400 Subject: NFS: Create a common multiple_pgios() function Once again, these two functions look identical in the read and write case. Time to combine them together! Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 1 + fs/nfs/pagelist.c | 21 +++++++++++++++++++++ fs/nfs/read.c | 32 ++------------------------------ fs/nfs/write.c | 31 +------------------------------ 4 files changed, 25 insertions(+), 60 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index be4f2a7..886273a 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -243,6 +243,7 @@ void nfs_pgio_data_release(struct nfs_pgio_data *); int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *, const struct rpc_call_ops *, int, int); +int nfs_do_multiple_pgios(struct list_head *, const struct rpc_call_ops *, int); static inline void nfs_iocounter_init(struct nfs_io_counter *c) { diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index ab5b185..2471e02 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -493,6 +493,27 @@ out: } EXPORT_SYMBOL_GPL(nfs_initiate_pgio); +int nfs_do_multiple_pgios(struct list_head *head, + const struct rpc_call_ops *call_ops, + int how) +{ + struct nfs_pgio_data *data; + int ret = 0; + + while (!list_empty(head)) { + int ret2; + + data = list_first_entry(head, struct nfs_pgio_data, list); + list_del_init(&data->list); + + ret2 = nfs_initiate_pgio(NFS_CLIENT(data->header->inode), + data, call_ops, how, 0); + if (ret == 0) + ret = ret2; + } + return ret; +} + /** * nfs_pgio_error - Clean up from a pageio error * @desc: IO descriptor diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 0359b0d..b834d45 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -161,34 +161,6 @@ static void nfs_initiate_read(struct nfs_pgio_data *data, struct rpc_message *ms NFS_PROTO(inode)->read_setup(data, msg); } -static int nfs_do_read(struct nfs_pgio_data *data, - const struct rpc_call_ops *call_ops) -{ - struct inode *inode = data->header->inode; - - return nfs_initiate_pgio(NFS_CLIENT(inode), data, call_ops, 0, 0); -} - -static int -nfs_do_multiple_reads(struct list_head *head, - const struct rpc_call_ops *call_ops) -{ - struct nfs_pgio_data *data; - int ret = 0; - - while (!list_empty(head)) { - int ret2; - - data = list_first_entry(head, struct nfs_pgio_data, list); - list_del_init(&data->list); - - ret2 = nfs_do_read(data, call_ops); - if (ret == 0) - ret = ret2; - } - return ret; -} - static void nfs_async_read_error(struct list_head *head) { @@ -222,8 +194,8 @@ static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) atomic_inc(&hdr->refcnt); ret = nfs_generic_pgio(desc, hdr); if (ret == 0) - ret = nfs_do_multiple_reads(&hdr->rpc_list, - desc->pg_rpc_callops); + ret = nfs_do_multiple_pgios(&hdr->rpc_list, + desc->pg_rpc_callops, 0); if (atomic_dec_and_test(&hdr->refcnt)) hdr->completion_ops->completion(hdr); return ret; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e46a1fc..d3fa181 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -945,35 +945,6 @@ static void nfs_initiate_write(struct nfs_pgio_data *data, struct rpc_message *m &task_setup_data->rpc_client, msg, data); } -static int nfs_do_write(struct nfs_pgio_data *data, - const struct rpc_call_ops *call_ops, - int how) -{ - struct inode *inode = data->header->inode; - - return nfs_initiate_pgio(NFS_CLIENT(inode), data, call_ops, how, 0); -} - -static int nfs_do_multiple_writes(struct list_head *head, - const struct rpc_call_ops *call_ops, - int how) -{ - struct nfs_pgio_data *data; - int ret = 0; - - while (!list_empty(head)) { - int ret2; - - data = list_first_entry(head, struct nfs_pgio_data, list); - list_del_init(&data->list); - - ret2 = nfs_do_write(data, call_ops, how); - if (ret == 0) - ret = ret2; - } - return ret; -} - /* If a nfs_flush_* function fails, it should remove reqs from @head and * call this on each, which will prepare them to be retried on next * writeback using standard nfs. @@ -1018,7 +989,7 @@ static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) atomic_inc(&hdr->refcnt); ret = nfs_generic_pgio(desc, hdr); if (ret == 0) - ret = nfs_do_multiple_writes(&hdr->rpc_list, + ret = nfs_do_multiple_pgios(&hdr->rpc_list, desc->pg_rpc_callops, desc->pg_ioflags); if (atomic_dec_and_test(&hdr->refcnt)) -- cgit v1.1 From cf485fcd68bc2dd91258e844ba4649404fff3235 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:39 -0400 Subject: NFS: Create a common generic_pg_pgios() What we have here is two functions that look identical. Let's share some more code! Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 2 +- fs/nfs/pagelist.c | 26 +++++++++++++++++++++++++- fs/nfs/read.c | 25 +------------------------ fs/nfs/write.c | 26 +------------------------- 4 files changed, 28 insertions(+), 51 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 886273a..29e99aa 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -243,7 +243,7 @@ void nfs_pgio_data_release(struct nfs_pgio_data *); int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *, const struct rpc_call_ops *, int, int); -int nfs_do_multiple_pgios(struct list_head *, const struct rpc_call_ops *, int); +int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *); static inline void nfs_iocounter_init(struct nfs_io_counter *c) { diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 2471e02..83d4ab4 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -493,7 +493,7 @@ out: } EXPORT_SYMBOL_GPL(nfs_initiate_pgio); -int nfs_do_multiple_pgios(struct list_head *head, +static int nfs_do_multiple_pgios(struct list_head *head, const struct rpc_call_ops *call_ops, int how) { @@ -688,6 +688,30 @@ static int nfs_pgio_one(struct nfs_pageio_descriptor *desc, return 0; } +int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) +{ + struct nfs_rw_header *rw_hdr; + struct nfs_pgio_header *hdr; + int ret; + + rw_hdr = nfs_rw_header_alloc(desc->pg_rw_ops); + if (!rw_hdr) { + desc->pg_completion_ops->error_cleanup(&desc->pg_list); + return -ENOMEM; + } + hdr = &rw_hdr->header; + nfs_pgheader_init(desc, hdr, nfs_rw_header_free); + atomic_inc(&hdr->refcnt); + ret = nfs_generic_pgio(desc, hdr); + if (ret == 0) + ret = nfs_do_multiple_pgios(&hdr->rpc_list, + desc->pg_rpc_callops, + desc->pg_ioflags); + if (atomic_dec_and_test(&hdr->refcnt)) + hdr->completion_ops->completion(hdr); + return ret; +} + int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { diff --git a/fs/nfs/read.c b/fs/nfs/read.c index b834d45..ebd1666 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -178,32 +178,9 @@ static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = { .completion = nfs_read_completion, }; -static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) -{ - struct nfs_rw_header *rhdr; - struct nfs_pgio_header *hdr; - int ret; - - rhdr = nfs_rw_header_alloc(desc->pg_rw_ops); - if (!rhdr) { - desc->pg_completion_ops->error_cleanup(&desc->pg_list); - return -ENOMEM; - } - hdr = &rhdr->header; - nfs_pgheader_init(desc, hdr, nfs_rw_header_free); - atomic_inc(&hdr->refcnt); - ret = nfs_generic_pgio(desc, hdr); - if (ret == 0) - ret = nfs_do_multiple_pgios(&hdr->rpc_list, - desc->pg_rpc_callops, 0); - if (atomic_dec_and_test(&hdr->refcnt)) - hdr->completion_ops->completion(hdr); - return ret; -} - static const struct nfs_pageio_ops nfs_pageio_read_ops = { .pg_test = nfs_generic_pg_test, - .pg_doio = nfs_generic_pg_readpages, + .pg_doio = nfs_generic_pg_pgios, }; /* diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d3fa181..31a8b29 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -973,33 +973,9 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = { .completion = nfs_write_completion, }; -static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) -{ - struct nfs_rw_header *whdr; - struct nfs_pgio_header *hdr; - int ret; - - whdr = nfs_rw_header_alloc(desc->pg_rw_ops); - if (!whdr) { - desc->pg_completion_ops->error_cleanup(&desc->pg_list); - return -ENOMEM; - } - hdr = &whdr->header; - nfs_pgheader_init(desc, hdr, nfs_rw_header_free); - atomic_inc(&hdr->refcnt); - ret = nfs_generic_pgio(desc, hdr); - if (ret == 0) - ret = nfs_do_multiple_pgios(&hdr->rpc_list, - desc->pg_rpc_callops, - desc->pg_ioflags); - if (atomic_dec_and_test(&hdr->refcnt)) - hdr->completion_ops->completion(hdr); - return ret; -} - static const struct nfs_pageio_ops nfs_pageio_write_ops = { .pg_test = nfs_generic_pg_test, - .pg_doio = nfs_generic_pg_writepages, + .pg_doio = nfs_generic_pg_pgios, }; void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, -- cgit v1.1 From 41d8d5b7a559a9bfbf9680d1e4777e1a7b0149d5 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 6 May 2014 09:12:40 -0400 Subject: NFS: Create a common nfs_pageio_ops struct At this point the read and write structures look identical, so combine them into something shared by both. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 2 +- fs/nfs/pagelist.c | 7 ++++++- fs/nfs/read.c | 10 ++-------- fs/nfs/write.c | 9 ++------- 4 files changed, 11 insertions(+), 17 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 29e99aa..8b69cba 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -237,13 +237,13 @@ extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos); int nfs_iocounter_wait(struct nfs_io_counter *c); +extern const struct nfs_pageio_ops nfs_pgio_rw_ops; struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *); void nfs_rw_header_free(struct nfs_pgio_header *); void nfs_pgio_data_release(struct nfs_pgio_data *); int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *, const struct rpc_call_ops *, int, int); -int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *); static inline void nfs_iocounter_init(struct nfs_io_counter *c) { diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 83d4ab4..2959109 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -688,7 +688,7 @@ static int nfs_pgio_one(struct nfs_pageio_descriptor *desc, return 0; } -int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) +static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) { struct nfs_rw_header *rw_hdr; struct nfs_pgio_header *hdr; @@ -933,3 +933,8 @@ static const struct rpc_call_ops nfs_pgio_common_ops = { .rpc_call_done = nfs_pgio_result, .rpc_release = nfs_pgio_release, }; + +const struct nfs_pageio_ops nfs_pgio_rw_ops = { + .pg_test = nfs_generic_pg_test, + .pg_doio = nfs_generic_pg_pgios, +}; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index ebd1666..3986668 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -28,7 +28,6 @@ #define NFSDBG_FACILITY NFSDBG_PAGECACHE -static const struct nfs_pageio_ops nfs_pageio_read_ops; static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops; static const struct nfs_rw_ops nfs_rw_read_ops; @@ -58,7 +57,7 @@ void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, const struct nfs_pgio_completion_ops *compl_ops) { struct nfs_server *server = NFS_SERVER(inode); - const struct nfs_pageio_ops *pg_ops = &nfs_pageio_read_ops; + const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops; #ifdef CONFIG_NFS_V4_1 if (server->pnfs_curr_ld && !force_mds) @@ -71,7 +70,7 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init_read); void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) { - pgio->pg_ops = &nfs_pageio_read_ops; + pgio->pg_ops = &nfs_pgio_rw_ops; pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; } EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); @@ -178,11 +177,6 @@ static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = { .completion = nfs_read_completion, }; -static const struct nfs_pageio_ops nfs_pageio_read_ops = { - .pg_test = nfs_generic_pg_test, - .pg_doio = nfs_generic_pg_pgios, -}; - /* * This is the callback from RPC telling us whether a reply was * received or some error occurred (timeout or socket shutdown). diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 31a8b29..2680f29 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -973,17 +973,12 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = { .completion = nfs_write_completion, }; -static const struct nfs_pageio_ops nfs_pageio_write_ops = { - .pg_test = nfs_generic_pg_test, - .pg_doio = nfs_generic_pg_pgios, -}; - void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags, bool force_mds, const struct nfs_pgio_completion_ops *compl_ops) { struct nfs_server *server = NFS_SERVER(inode); - const struct nfs_pageio_ops *pg_ops = &nfs_pageio_write_ops; + const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops; #ifdef CONFIG_NFS_V4_1 if (server->pnfs_curr_ld && !force_mds) @@ -996,7 +991,7 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init_write); void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) { - pgio->pg_ops = &nfs_pageio_write_ops; + pgio->pg_ops = &nfs_pgio_rw_ops; pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; } EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); -- cgit v1.1 From d201c4de518c1d617aa216664869fa329d562d7d Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 15 May 2014 11:56:40 -0400 Subject: pnfs: fix race in filelayout commit path Hold the lock while modifying commit info dataserver buckets. The following oops can be reproduced by running iozone for a while against a 2 DS pynfs filelayout server. general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC Modules linked in: nfs_layout_nfsv41_files rpcsec_gss_krb5 nfsv4 nfs fscache CPU: 0 PID: 903 Comm: iozone Not tainted 3.15.0-rc1-branch-dros_testing+ #44 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference task: ffff880078164480 ti: ffff88006e972000 task.ti: ffff88006e972000 RIP: 0010:[] [] nfs_init_commit+0x22/0x RSP: 0018:ffff88006e973d30 EFLAGS: 00010246 RAX: ffff88006e973e00 RBX: ffff88006e828800 RCX: ffff88006e973e10 RDX: 0000000000000000 RSI: ffff88006e973e00 RDI: dead4ead00000000 RBP: ffff88006e973d38 R08: ffff88006e8289d8 R09: 0000000000000000 R10: ffff88006e8289d8 R11: 0000000000016988 R12: ffff88006e973b98 R13: ffff88007a0a6648 R14: ffff88006e973e10 R15: ffff88006e828800 FS: 00007f2ce396b740(0000) GS:ffff88007f200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f03278a1000 CR3: 0000000079043000 CR4: 00000000001407f0 Stack: ffff88006e8289d8 ffff88006e973da8 ffffffffa00f144f ffff88006e9478c0 ffff88006e973e00 ffff88006de21080 0000000100000002 ffff880079be6c48 ffff88006e973d70 ffff88006e973d70 ffff88006e973e10 ffff88006de21080 Call Trace: [] filelayout_commit_pagelist+0x1ae/0x34a [nfs_layout_nfsv [] nfs_generic_commit_list+0x92/0xc4 [nfs] [] nfs_commit_inode+0xaf/0x114 [nfs] [] nfs_file_fsync_commit+0x82/0xbe [nfs] [] nfs4_file_fsync+0x59/0x9b [nfsv4] [] vfs_fsync_range+0x18/0x20 [] vfs_fsync+0x1c/0x1e [] nfs_file_flush+0x7f/0x84 [nfs] [] filp_close+0x3c/0x72 [] __close_fd+0x82/0x9a [] SyS_close+0x23/0x4c [] system_call_fastpath+0x16/0x1b Code: 5b 41 5c 41 5d 41 5e 5d c3 0f 1f 44 00 00 55 48 89 e5 53 48 89 fb 48 8 RIP [] nfs_init_commit+0x22/0xe1 [nfs] RSP ---[ end trace 732fe6419b235e2f ]--- Suggested-by: Trond Myklebust Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 7954e16..9fd7ceb 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -1067,6 +1067,7 @@ filelayout_choose_commit_list(struct nfs_page *req, */ j = nfs4_fl_calc_j_index(lseg, req_offset(req)); i = select_bucket_index(fl, j); + spin_lock(cinfo->lock); buckets = cinfo->ds->buckets; list = &buckets[i].written; if (list_empty(list)) { @@ -1080,6 +1081,7 @@ filelayout_choose_commit_list(struct nfs_page *req, } set_bit(PG_COMMIT_TO_DS, &req->wb_flags); cinfo->ds->nwritten++; + spin_unlock(cinfo->lock); return list; } @@ -1176,6 +1178,7 @@ transfer_commit_list(struct list_head *src, struct list_head *dst, return ret; } +/* Note called with cinfo->lock held. */ static int filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, struct nfs_commit_info *cinfo, @@ -1220,15 +1223,18 @@ static void filelayout_recover_commit_reqs(struct list_head *dst, struct nfs_commit_info *cinfo) { struct pnfs_commit_bucket *b; + struct pnfs_layout_segment *freeme; int i; +restart: spin_lock(cinfo->lock); for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { if (transfer_commit_list(&b->written, dst, cinfo, 0)) { - spin_unlock(cinfo->lock); - pnfs_put_lseg(b->wlseg); + freeme = b->wlseg; b->wlseg = NULL; - spin_lock(cinfo->lock); + spin_unlock(cinfo->lock); + pnfs_put_lseg(freeme); + goto restart; } } cinfo->ds->nwritten = 0; @@ -1243,6 +1249,7 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list) struct nfs_commit_data *data; int i, j; unsigned int nreq = 0; + struct pnfs_layout_segment *freeme; fl_cinfo = cinfo->ds; bucket = fl_cinfo->buckets; @@ -1253,8 +1260,10 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list) if (!data) break; data->ds_commit_index = i; + spin_lock(cinfo->lock); data->lseg = bucket->clseg; bucket->clseg = NULL; + spin_unlock(cinfo->lock); list_add(&data->pages, list); nreq++; } @@ -1264,8 +1273,11 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list) if (list_empty(&bucket->committing)) continue; nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo); - pnfs_put_lseg(bucket->clseg); + spin_lock(cinfo->lock); + freeme = bucket->clseg; bucket->clseg = NULL; + spin_unlock(cinfo->lock); + pnfs_put_lseg(freeme); } /* Caller will clean up entries put on list */ return nreq; -- cgit v1.1 From 8c8f1ac109726e4ed44a920f5c962c84610d4a17 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 15 May 2014 11:56:42 -0400 Subject: nfs: remove unused arg from nfs_create_request @inode is passed but not used. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 6 ++---- fs/nfs/pagelist.c | 4 +--- fs/nfs/read.c | 5 ++--- fs/nfs/write.c | 2 +- 4 files changed, 6 insertions(+), 11 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 164b016..1dd8c62 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -380,8 +380,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de struct nfs_page *req; unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); /* XXX do we need to do the eof zeroing found in async_filler? */ - req = nfs_create_request(dreq->ctx, dreq->inode, - pagevec[i], + req = nfs_create_request(dreq->ctx, pagevec[i], pgbase, req_len); if (IS_ERR(req)) { result = PTR_ERR(req); @@ -750,8 +749,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d struct nfs_page *req; unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); - req = nfs_create_request(dreq->ctx, dreq->inode, - pagevec[i], + req = nfs_create_request(dreq->ctx, pagevec[i], pgbase, req_len); if (IS_ERR(req)) { result = PTR_ERR(req); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 2959109..4b4b212 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -139,7 +139,6 @@ nfs_iocounter_wait(struct nfs_io_counter *c) /** * nfs_create_request - Create an NFS read/write request. * @ctx: open context to use - * @inode: inode to which the request is attached * @page: page to write * @offset: starting offset within the page for the write * @count: number of bytes to read/write @@ -149,8 +148,7 @@ nfs_iocounter_wait(struct nfs_io_counter *c) * User should ensure it is safe to sleep in this function. */ struct nfs_page * -nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, - struct page *page, +nfs_create_request(struct nfs_open_context *ctx, struct page *page, unsigned int offset, unsigned int count) { struct nfs_page *req; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 3986668..46d9044 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -85,7 +85,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, len = nfs_page_length(page); if (len == 0) return nfs_return_empty_page(page); - new = nfs_create_request(ctx, inode, page, 0, len); + new = nfs_create_request(ctx, page, 0, len); if (IS_ERR(new)) { unlock_page(page); return PTR_ERR(new); @@ -303,7 +303,6 @@ static int readpage_async_filler(void *data, struct page *page) { struct nfs_readdesc *desc = (struct nfs_readdesc *)data; - struct inode *inode = page_file_mapping(page)->host; struct nfs_page *new; unsigned int len; int error; @@ -312,7 +311,7 @@ readpage_async_filler(void *data, struct page *page) if (len == 0) return nfs_return_empty_page(page); - new = nfs_create_request(desc->ctx, inode, page, 0, len); + new = nfs_create_request(desc->ctx, page, 0, len); if (IS_ERR(new)) goto out_error; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 2680f29..e773df2 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -761,7 +761,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, req = nfs_try_to_update_request(inode, page, offset, bytes); if (req != NULL) goto out; - req = nfs_create_request(ctx, inode, page, offset, bytes); + req = nfs_create_request(ctx, page, offset, bytes); if (IS_ERR(req)) goto out; nfs_inode_add_request(inode, req); -- cgit v1.1 From b4fdac1a5150174df0847a45dc6612ce5ce3daeb Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 15 May 2014 11:56:43 -0400 Subject: nfs: modify pg_test interface to return size_t This is a step toward allowing pg_test to inform the the coalescing code to reduce the size of requests so they may fit in whatever scheme the pg_test callback wants to define. For now, just return the size of the request if there is space, or 0 if there is not. This shouldn't change any behavior as it acts the same as when the pg_test functions returned bool. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 16 ++++++++++++---- fs/nfs/nfs4filelayout.c | 12 +++++++----- fs/nfs/objlayout/objio_osd.c | 15 ++++++++++----- fs/nfs/pagelist.c | 22 +++++++++++++++++++--- fs/nfs/pnfs.c | 12 +++++++++--- fs/nfs/pnfs.h | 3 ++- 6 files changed, 59 insertions(+), 21 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 206cc68..9b431f4 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -1189,13 +1189,17 @@ bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) pnfs_generic_pg_init_read(pgio, req); } -static bool +/* + * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number + * of bytes (maximum @req->wb_bytes) that can be coalesced. + */ +static size_t bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { if (pgio->pg_dreq != NULL && !is_aligned_req(req, SECTOR_SIZE)) - return false; + return 0; return pnfs_generic_pg_test(pgio, prev, req); } @@ -1241,13 +1245,17 @@ bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) } } -static bool +/* + * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number + * of bytes (maximum @req->wb_bytes) that can be coalesced. + */ +static size_t bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { if (pgio->pg_dreq != NULL && !is_aligned_req(req, PAGE_CACHE_SIZE)) - return false; + return 0; return pnfs_generic_pg_test(pgio, prev, req); } diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 9fd7ceb..ba9a9aa 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -915,10 +915,10 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, /* * filelayout_pg_test(). Called by nfs_can_coalesce_requests() * - * return true : coalesce page - * return false : don't coalesce page + * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number + * of bytes (maximum @req->wb_bytes) that can be coalesced. */ -static bool +static size_t filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { @@ -927,7 +927,7 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, if (!pnfs_generic_pg_test(pgio, prev, req) || !nfs_generic_pg_test(pgio, prev, req)) - return false; + return 0; p_stripe = (u64)req_offset(prev); r_stripe = (u64)req_offset(req); @@ -936,7 +936,9 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, do_div(p_stripe, stripe_unit); do_div(r_stripe, stripe_unit); - return (p_stripe == r_stripe); + if (p_stripe == r_stripe) + return req->wb_bytes; + return 0; } static void diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 426b366..71b9c69 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -564,14 +564,19 @@ int objio_write_pagelist(struct nfs_pgio_data *wdata, int how) return 0; } -static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, +/* + * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number + * of bytes (maximum @req->wb_bytes) that can be coalesced. + */ +static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { - if (!pnfs_generic_pg_test(pgio, prev, req)) - return false; + if (!pnfs_generic_pg_test(pgio, prev, req) || + pgio->pg_count + req->wb_bytes > + (unsigned long)pgio->pg_layout_private) + return 0; - return pgio->pg_count + req->wb_bytes <= - (unsigned long)pgio->pg_layout_private; + return req->wb_bytes; } static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 4b4b212..8223343 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -280,7 +280,17 @@ nfs_wait_on_request(struct nfs_page *req) TASK_UNINTERRUPTIBLE); } -bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req) +/* + * nfs_generic_pg_test - determine if requests can be coalesced + * @desc: pointer to descriptor + * @prev: previous request in desc, or NULL + * @req: this request + * + * Returns zero if @req can be coalesced into @desc, otherwise it returns + * the size of the request. + */ +size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, + struct nfs_page *prev, struct nfs_page *req) { /* * FIXME: ideally we should be able to coalesce all requests @@ -292,7 +302,9 @@ bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *pr if (desc->pg_bsize < PAGE_SIZE) return 0; - return desc->pg_count + req->wb_bytes <= desc->pg_bsize; + if (desc->pg_count + req->wb_bytes <= desc->pg_bsize) + return req->wb_bytes; + return 0; } EXPORT_SYMBOL_GPL(nfs_generic_pg_test); @@ -747,6 +759,8 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, struct nfs_page *req, struct nfs_pageio_descriptor *pgio) { + size_t size; + if (!nfs_match_open_context(req->wb_context, prev->wb_context)) return false; if (req->wb_context->dentry->d_inode->i_flock != NULL && @@ -758,7 +772,9 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, return false; if (req_offset(req) != req_offset(prev) + prev->wb_bytes) return false; - return pgio->pg_ops->pg_test(pgio, prev, req); + size = pgio->pg_ops->pg_test(pgio, prev, req); + WARN_ON_ONCE(size && size != req->wb_bytes); + return size > 0; } /** diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 0fe6701..de6eb16 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1434,7 +1434,11 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, } EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); -bool +/* + * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number + * of bytes (maximum @req->wb_bytes) that can be coalesced. + */ +size_t pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { @@ -1455,8 +1459,10 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, * first byte that lies outside the pnfs_layout_range. FIXME? * */ - return req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset, - pgio->pg_lseg->pls_range.length); + if (req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset, + pgio->pg_lseg->pls_range.length)) + return req->wb_bytes; + return 0; } EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 0031267..dccf182 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -187,7 +187,8 @@ int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req, u64 wb_size); int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); -bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); +size_t pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, + struct nfs_page *prev, struct nfs_page *req); void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg); struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp); void pnfs_free_lseg_list(struct list_head *tmp_list); -- cgit v1.1 From ab75e417192a486ffe63a314b6d2e7361f0e157f Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 15 May 2014 11:56:44 -0400 Subject: nfs: call nfs_can_coalesce_requests for every req Call nfs_can_coalesce_requests for every request, even the first one. This is needed for future patches to give pg_test a way to inform add_request to reduce the size of the request. Now @prev can be null in nfs_can_coalesce_requests and pg_test functions. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 3 +++ fs/nfs/pagelist.c | 34 +++++++++++++++++++--------------- 2 files changed, 22 insertions(+), 15 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index ba9a9aa..9319427 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -929,6 +929,9 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, !nfs_generic_pg_test(pgio, prev, req)) return 0; + if (!prev) + return req->wb_bytes; + p_stripe = (u64)req_offset(prev); r_stripe = (u64)req_offset(req); stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 8223343..f343f49 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -292,6 +292,8 @@ nfs_wait_on_request(struct nfs_page *req) size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req) { + if (!prev) + return req->wb_bytes; /* * FIXME: ideally we should be able to coalesce all requests * that are not block boundary aligned, but currently this @@ -761,17 +763,20 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, { size_t size; - if (!nfs_match_open_context(req->wb_context, prev->wb_context)) - return false; - if (req->wb_context->dentry->d_inode->i_flock != NULL && - !nfs_match_lock_context(req->wb_lock_context, prev->wb_lock_context)) - return false; - if (req->wb_pgbase != 0) - return false; - if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) - return false; - if (req_offset(req) != req_offset(prev) + prev->wb_bytes) - return false; + if (prev) { + if (!nfs_match_open_context(req->wb_context, prev->wb_context)) + return false; + if (req->wb_context->dentry->d_inode->i_flock != NULL && + !nfs_match_lock_context(req->wb_lock_context, + prev->wb_lock_context)) + return false; + if (req->wb_pgbase != 0) + return false; + if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) + return false; + if (req_offset(req) != req_offset(prev) + prev->wb_bytes) + return false; + } size = pgio->pg_ops->pg_test(pgio, prev, req); WARN_ON_ONCE(size && size != req->wb_bytes); return size > 0; @@ -788,17 +793,16 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, struct nfs_page *req) { + struct nfs_page *prev = NULL; if (desc->pg_count != 0) { - struct nfs_page *prev; - prev = nfs_list_entry(desc->pg_list.prev); - if (!nfs_can_coalesce_requests(prev, req, desc)) - return 0; } else { if (desc->pg_ops->pg_init) desc->pg_ops->pg_init(desc, req); desc->pg_base = req->wb_pgbase; } + if (!nfs_can_coalesce_requests(prev, req, desc)) + return 0; nfs_list_remove_request(req); nfs_list_add_request(req, &desc->pg_list); desc->pg_count += req->wb_bytes; -- cgit v1.1 From 2bfc6e566daa8386c9cffef2f7de17fc330d3835 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 15 May 2014 11:56:45 -0400 Subject: nfs: add support for multiple nfs reqs per page Add "page groups" - a circular list of nfs requests (struct nfs_page) that all reference the same page. This gives nfs read and write paths the ability to account for sub-page regions independently. This somewhat follows the design of struct buffer_head's sub-page accounting. Only "head" requests are ever added/removed from the inode list in the buffered write path. "head" and "sub" requests are treated the same through the read path and the rest of the write/commit path. Requests are given an extra reference across the life of the list. Page groups are never rejoined after being split. If the read/write request fails and the client falls back to another path (ie revert to MDS in PNFS case), the already split requests are pushed through the recoalescing code again, which may split them further and then coalesce them into properly sized requests on the wire. Fragmentation shouldn't be a problem with the current design, because we flush all requests in page group when a non-contiguous request is added, so the only time resplitting should occur is on a resend of a read or write. This patch lays the groundwork for sub-page splitting, but does not actually do any splitting. For now all page groups have one request as pg_test functions don't yet split pages. There are several related patches that are needed support multiple requests per page group. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 7 +- fs/nfs/pagelist.c | 220 ++++++++++++++++++++++++++++++++++++++++++++++++++---- fs/nfs/read.c | 4 +- fs/nfs/write.c | 13 +++- 4 files changed, 225 insertions(+), 19 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 1dd8c62..2c0e08f 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -380,7 +380,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de struct nfs_page *req; unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); /* XXX do we need to do the eof zeroing found in async_filler? */ - req = nfs_create_request(dreq->ctx, pagevec[i], + req = nfs_create_request(dreq->ctx, pagevec[i], NULL, pgbase, req_len); if (IS_ERR(req)) { result = PTR_ERR(req); @@ -749,7 +749,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d struct nfs_page *req; unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); - req = nfs_create_request(dreq->ctx, pagevec[i], + req = nfs_create_request(dreq->ctx, pagevec[i], NULL, pgbase, req_len); if (IS_ERR(req)) { result = PTR_ERR(req); @@ -827,6 +827,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) spin_unlock(&dreq->lock); while (!list_empty(&hdr->pages)) { + bool do_destroy = true; + req = nfs_list_entry(hdr->pages.next); nfs_list_remove_request(req); switch (bit) { @@ -834,6 +836,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) case NFS_IOHDR_NEED_COMMIT: kref_get(&req->wb_kref); nfs_mark_request_commit(req, hdr->lseg, &cinfo); + do_destroy = false; } nfs_unlock_and_release_request(req); } diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index f343f49..015fb7b 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -29,6 +29,8 @@ static struct kmem_cache *nfs_page_cachep; static const struct rpc_call_ops nfs_pgio_common_ops; +static void nfs_free_request(struct nfs_page *); + static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) { p->npages = pagecount; @@ -136,10 +138,151 @@ nfs_iocounter_wait(struct nfs_io_counter *c) return __nfs_iocounter_wait(c); } +/* + * nfs_page_group_lock - lock the head of the page group + * @req - request in group that is to be locked + * + * this lock must be held if modifying the page group list + */ +void +nfs_page_group_lock(struct nfs_page *req) +{ + struct nfs_page *head = req->wb_head; + int err = -EAGAIN; + + WARN_ON_ONCE(head != head->wb_head); + + while (err) + err = wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, + nfs_wait_bit_killable, TASK_KILLABLE); +} + +/* + * nfs_page_group_unlock - unlock the head of the page group + * @req - request in group that is to be unlocked + */ +void +nfs_page_group_unlock(struct nfs_page *req) +{ + struct nfs_page *head = req->wb_head; + + WARN_ON_ONCE(head != head->wb_head); + + smp_mb__before_clear_bit(); + clear_bit(PG_HEADLOCK, &head->wb_flags); + smp_mb__after_clear_bit(); + wake_up_bit(&head->wb_flags, PG_HEADLOCK); +} + +/* + * nfs_page_group_sync_on_bit_locked + * + * must be called with page group lock held + */ +static bool +nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit) +{ + struct nfs_page *head = req->wb_head; + struct nfs_page *tmp; + + WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_flags)); + WARN_ON_ONCE(test_and_set_bit(bit, &req->wb_flags)); + + tmp = req->wb_this_page; + while (tmp != req) { + if (!test_bit(bit, &tmp->wb_flags)) + return false; + tmp = tmp->wb_this_page; + } + + /* true! reset all bits */ + tmp = req; + do { + clear_bit(bit, &tmp->wb_flags); + tmp = tmp->wb_this_page; + } while (tmp != req); + + return true; +} + +/* + * nfs_page_group_sync_on_bit - set bit on current request, but only + * return true if the bit is set for all requests in page group + * @req - request in page group + * @bit - PG_* bit that is used to sync page group + */ +bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit) +{ + bool ret; + + nfs_page_group_lock(req); + ret = nfs_page_group_sync_on_bit_locked(req, bit); + nfs_page_group_unlock(req); + + return ret; +} + +/* + * nfs_page_group_init - Initialize the page group linkage for @req + * @req - a new nfs request + * @prev - the previous request in page group, or NULL if @req is the first + * or only request in the group (the head). + */ +static inline void +nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev) +{ + WARN_ON_ONCE(prev == req); + + if (!prev) { + req->wb_head = req; + req->wb_this_page = req; + } else { + WARN_ON_ONCE(prev->wb_this_page != prev->wb_head); + WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &prev->wb_head->wb_flags)); + req->wb_head = prev->wb_head; + req->wb_this_page = prev->wb_this_page; + prev->wb_this_page = req; + + /* grab extra ref if head request has extra ref from + * the write/commit path to handle handoff between write + * and commit lists */ + if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) + kref_get(&req->wb_kref); + } +} + +/* + * nfs_page_group_destroy - sync the destruction of page groups + * @req - request that no longer needs the page group + * + * releases the page group reference from each member once all + * members have called this function. + */ +static void +nfs_page_group_destroy(struct kref *kref) +{ + struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); + struct nfs_page *tmp, *next; + + if (!nfs_page_group_sync_on_bit(req, PG_TEARDOWN)) + return; + + tmp = req; + do { + next = tmp->wb_this_page; + /* unlink and free */ + tmp->wb_this_page = tmp; + tmp->wb_head = tmp; + nfs_free_request(tmp); + tmp = next; + } while (tmp != req); +} + /** * nfs_create_request - Create an NFS read/write request. * @ctx: open context to use * @page: page to write + * @last: last nfs request created for this page group or NULL if head * @offset: starting offset within the page for the write * @count: number of bytes to read/write * @@ -149,7 +292,8 @@ nfs_iocounter_wait(struct nfs_io_counter *c) */ struct nfs_page * nfs_create_request(struct nfs_open_context *ctx, struct page *page, - unsigned int offset, unsigned int count) + struct nfs_page *last, unsigned int offset, + unsigned int count) { struct nfs_page *req; struct nfs_lock_context *l_ctx; @@ -181,6 +325,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct page *page, req->wb_bytes = count; req->wb_context = get_nfs_open_context(ctx); kref_init(&req->wb_kref); + nfs_page_group_init(req, last); return req; } @@ -238,16 +383,18 @@ static void nfs_clear_request(struct nfs_page *req) } } - /** * nfs_release_request - Release the count on an NFS read/write request * @req: request to release * * Note: Should never be called with the spinlock held! */ -static void nfs_free_request(struct kref *kref) +static void nfs_free_request(struct nfs_page *req) { - struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); + WARN_ON_ONCE(req->wb_this_page != req); + + /* extra debug: make sure no sync bits are still set */ + WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags)); /* Release struct file and open context */ nfs_clear_request(req); @@ -256,7 +403,7 @@ static void nfs_free_request(struct kref *kref) void nfs_release_request(struct nfs_page *req) { - kref_put(&req->wb_kref, nfs_free_request); + kref_put(&req->wb_kref, nfs_page_group_destroy); } static int nfs_wait_bit_uninterruptible(void *word) @@ -832,21 +979,66 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) * @desc: destination io descriptor * @req: request * + * This may split a request into subrequests which are all part of the + * same page group. + * * Returns true if the request 'req' was successfully coalesced into the * existing list of pages 'desc'. */ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, struct nfs_page *req) { - while (!nfs_pageio_do_add_request(desc, req)) { - desc->pg_moreio = 1; - nfs_pageio_doio(desc); - if (desc->pg_error < 0) - return 0; - desc->pg_moreio = 0; - if (desc->pg_recoalesce) - return 0; - } + struct nfs_page *subreq; + unsigned int bytes_left = 0; + unsigned int offset, pgbase; + + nfs_page_group_lock(req); + + subreq = req; + bytes_left = subreq->wb_bytes; + offset = subreq->wb_offset; + pgbase = subreq->wb_pgbase; + + do { + if (!nfs_pageio_do_add_request(desc, subreq)) { + /* make sure pg_test call(s) did nothing */ + WARN_ON_ONCE(subreq->wb_bytes != bytes_left); + WARN_ON_ONCE(subreq->wb_offset != offset); + WARN_ON_ONCE(subreq->wb_pgbase != pgbase); + + nfs_page_group_unlock(req); + desc->pg_moreio = 1; + nfs_pageio_doio(desc); + if (desc->pg_error < 0) + return 0; + desc->pg_moreio = 0; + if (desc->pg_recoalesce) + return 0; + /* retry add_request for this subreq */ + nfs_page_group_lock(req); + continue; + } + + /* check for buggy pg_test call(s) */ + WARN_ON_ONCE(subreq->wb_bytes + subreq->wb_pgbase > PAGE_SIZE); + WARN_ON_ONCE(subreq->wb_bytes > bytes_left); + WARN_ON_ONCE(subreq->wb_bytes == 0); + + bytes_left -= subreq->wb_bytes; + offset += subreq->wb_bytes; + pgbase += subreq->wb_bytes; + + if (bytes_left) { + subreq = nfs_create_request(req->wb_context, + req->wb_page, + subreq, pgbase, bytes_left); + nfs_lock_request(subreq); + subreq->wb_offset = offset; + subreq->wb_index = req->wb_index; + } + } while (bytes_left > 0); + + nfs_page_group_unlock(req); return 1; } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 46d9044..902ba2c 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -85,7 +85,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, len = nfs_page_length(page); if (len == 0) return nfs_return_empty_page(page); - new = nfs_create_request(ctx, page, 0, len); + new = nfs_create_request(ctx, page, NULL, 0, len); if (IS_ERR(new)) { unlock_page(page); return PTR_ERR(new); @@ -311,7 +311,7 @@ readpage_async_filler(void *data, struct page *page) if (len == 0) return nfs_return_empty_page(page); - new = nfs_create_request(desc->ctx, page, 0, len); + new = nfs_create_request(desc->ctx, page, NULL, 0, len); if (IS_ERR(new)) goto out_error; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e773df2..d0f30f1 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -367,6 +367,8 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) { struct nfs_inode *nfsi = NFS_I(inode); + WARN_ON_ONCE(req->wb_this_page != req); + /* Lock the request! */ nfs_lock_request(req); @@ -383,6 +385,7 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) set_page_private(req->wb_page, (unsigned long)req); } nfsi->npages++; + set_bit(PG_INODE_REF, &req->wb_flags); kref_get(&req->wb_kref); spin_unlock(&inode->i_lock); } @@ -567,6 +570,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) { struct nfs_commit_info cinfo; unsigned long bytes = 0; + bool do_destroy; if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) goto out; @@ -596,6 +600,7 @@ remove_req: next: nfs_unlock_request(req); nfs_end_page_writeback(req->wb_page); + do_destroy = !test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags); nfs_release_request(req); } out: @@ -700,6 +705,10 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, if (req == NULL) goto out_unlock; + /* should be handled by nfs_flush_incompatible */ + WARN_ON_ONCE(req->wb_head != req); + WARN_ON_ONCE(req->wb_this_page != req); + rqend = req->wb_offset + req->wb_bytes; /* * Tell the caller to flush out the request if @@ -761,7 +770,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, req = nfs_try_to_update_request(inode, page, offset, bytes); if (req != NULL) goto out; - req = nfs_create_request(ctx, page, offset, bytes); + req = nfs_create_request(ctx, page, NULL, offset, bytes); if (IS_ERR(req)) goto out; nfs_inode_add_request(inode, req); @@ -805,6 +814,8 @@ int nfs_flush_incompatible(struct file *file, struct page *page) return 0; l_ctx = req->wb_lock_context; do_flush = req->wb_page != page || req->wb_context != ctx; + /* for now, flush if more than 1 request in page_group */ + do_flush |= req->wb_this_page != req; if (l_ctx && ctx->dentry->d_inode->i_flock != NULL) { do_flush |= l_ctx->lockowner.l_owner != current->files || l_ctx->lockowner.l_pid != current->tgid; -- cgit v1.1 From 67d0338edd71db9a4f406d8778f7c525d31e9f7f Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 15 May 2014 11:56:46 -0400 Subject: nfs: page group syncing in read path Operations that modify state for a whole page must be syncronized across all requests within a page group. In the read path, this is calling unlock_page and SetPageUptodate. Both of these functions should not be called until all requests in a page group have reached the point where they would call them. This patch should have no effect yet since all page groups currently have one request, but will come into play when pg_test functions are modified to split pages into sub-page regions. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 2 ++ fs/nfs/read.c | 22 +++++++++++++++++----- 2 files changed, 19 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 015fb7b..18ee4e9 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -395,6 +395,8 @@ static void nfs_free_request(struct nfs_page *req) /* extra debug: make sure no sync bits are still set */ WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags)); + WARN_ON_ONCE(test_bit(PG_UNLOCKPAGE, &req->wb_flags)); + WARN_ON_ONCE(test_bit(PG_UPTODATE, &req->wb_flags)); /* Release struct file and open context */ nfs_clear_request(req); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 902ba2c..53d5b83 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -105,10 +105,16 @@ static void nfs_readpage_release(struct nfs_page *req) { struct inode *d_inode = req->wb_context->dentry->d_inode; - if (PageUptodate(req->wb_page)) - nfs_readpage_to_fscache(d_inode, req->wb_page, 0); + dprintk("NFS: read done (%s/%llu %d@%lld)\n", d_inode->i_sb->s_id, + (unsigned long long)NFS_FILEID(d_inode), req->wb_bytes, + (long long)req_offset(req)); - unlock_page(req->wb_page); + if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) { + if (PageUptodate(req->wb_page)) + nfs_readpage_to_fscache(d_inode, req->wb_page, 0); + + unlock_page(req->wb_page); + } dprintk("NFS: read done (%s/%Lu %d@%Ld)\n", req->wb_context->dentry->d_inode->i_sb->s_id, @@ -118,6 +124,12 @@ static void nfs_readpage_release(struct nfs_page *req) nfs_release_request(req); } +static void nfs_page_group_set_uptodate(struct nfs_page *req) +{ + if (nfs_page_group_sync_on_bit(req, PG_UPTODATE)) + SetPageUptodate(req->wb_page); +} + /* Note io was page aligned */ static void nfs_read_completion(struct nfs_pgio_header *hdr) { @@ -140,9 +152,9 @@ static void nfs_read_completion(struct nfs_pgio_header *hdr) bytes += req->wb_bytes; if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { if (bytes <= hdr->good_bytes) - SetPageUptodate(page); + nfs_page_group_set_uptodate(req); } else - SetPageUptodate(page); + nfs_page_group_set_uptodate(req); nfs_list_remove_request(req); nfs_readpage_release(req); } -- cgit v1.1 From 20633f042fd0907300069714b98aaf607a8b5bf8 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 15 May 2014 11:56:47 -0400 Subject: nfs: page group syncing in write path Operations that modify state for a whole page must be syncronized across all requests within a page group. In the write path, this is calling end_page_writeback and removing the head request from an inode. Both of these operations should not be called until all requests in a page group have reached the point where they would call them. This patch should have no effect yet since all page groups currently have one request, but will come into play when pg_test functions are modified to split pages into sub-page regions. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 2 ++ fs/nfs/write.c | 32 ++++++++++++++++++++------------ 2 files changed, 22 insertions(+), 12 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 18ee4e9..ceb4424 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -397,6 +397,8 @@ static void nfs_free_request(struct nfs_page *req) WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags)); WARN_ON_ONCE(test_bit(PG_UNLOCKPAGE, &req->wb_flags)); WARN_ON_ONCE(test_bit(PG_UPTODATE, &req->wb_flags)); + WARN_ON_ONCE(test_bit(PG_WB_END, &req->wb_flags)); + WARN_ON_ONCE(test_bit(PG_REMOVE, &req->wb_flags)); /* Release struct file and open context */ nfs_clear_request(req); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d0f30f1..5d75276 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -201,12 +201,15 @@ static void nfs_set_page_writeback(struct page *page) } } -static void nfs_end_page_writeback(struct page *page) +static void nfs_end_page_writeback(struct nfs_page *req) { - struct inode *inode = page_file_mapping(page)->host; + struct inode *inode = page_file_mapping(req->wb_page)->host; struct nfs_server *nfss = NFS_SERVER(inode); - end_page_writeback(page); + if (!nfs_page_group_sync_on_bit(req, PG_WB_END)) + return; + + end_page_writeback(req->wb_page); if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); } @@ -397,15 +400,20 @@ static void nfs_inode_remove_request(struct nfs_page *req) { struct inode *inode = req->wb_context->dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_page *head; - spin_lock(&inode->i_lock); - if (likely(!PageSwapCache(req->wb_page))) { - set_page_private(req->wb_page, 0); - ClearPagePrivate(req->wb_page); - clear_bit(PG_MAPPED, &req->wb_flags); + if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) { + head = req->wb_head; + + spin_lock(&inode->i_lock); + if (likely(!PageSwapCache(head->wb_page))) { + set_page_private(head->wb_page, 0); + ClearPagePrivate(head->wb_page); + clear_bit(PG_MAPPED, &head->wb_flags); + } + nfsi->npages--; + spin_unlock(&inode->i_lock); } - nfsi->npages--; - spin_unlock(&inode->i_lock); nfs_release_request(req); } @@ -599,7 +607,7 @@ remove_req: nfs_inode_remove_request(req); next: nfs_unlock_request(req); - nfs_end_page_writeback(req->wb_page); + nfs_end_page_writeback(req); do_destroy = !test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags); nfs_release_request(req); } @@ -964,7 +972,7 @@ static void nfs_redirty_request(struct nfs_page *req) { nfs_mark_request_dirty(req); nfs_unlock_request(req); - nfs_end_page_writeback(req->wb_page); + nfs_end_page_writeback(req); nfs_release_request(req); } -- cgit v1.1 From d72ddcbab60a70258d0cd5752db3f53824df78d6 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 15 May 2014 11:56:48 -0400 Subject: nfs: page group support in nfs_mark_uptodate Change how nfs_mark_uptodate checks to see if writes cover a whole page. This patch should have no effect yet since all page groups currently have one request, but will come into play when pg_test functions are modified to split pages into sub-page regions. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 67 insertions(+), 7 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 5d75276..17b9895 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -154,18 +154,78 @@ static void nfs_set_pageerror(struct page *page) nfs_zap_mapping(page_file_mapping(page)->host, page_file_mapping(page)); } +/* + * nfs_page_group_search_locked + * @head - head request of page group + * @page_offset - offset into page + * + * Search page group with head @head to find a request that contains the + * page offset @page_offset. + * + * Returns a pointer to the first matching nfs request, or NULL if no + * match is found. + * + * Must be called with the page group lock held + */ +static struct nfs_page * +nfs_page_group_search_locked(struct nfs_page *head, unsigned int page_offset) +{ + struct nfs_page *req; + + WARN_ON_ONCE(head != head->wb_head); + WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_head->wb_flags)); + + req = head; + do { + if (page_offset >= req->wb_pgbase && + page_offset < (req->wb_pgbase + req->wb_bytes)) + return req; + + req = req->wb_this_page; + } while (req != head); + + return NULL; +} + +/* + * nfs_page_group_covers_page + * @head - head request of page group + * + * Return true if the page group with head @head covers the whole page, + * returns false otherwise + */ +static bool nfs_page_group_covers_page(struct nfs_page *req) +{ + struct nfs_page *tmp; + unsigned int pos = 0; + unsigned int len = nfs_page_length(req->wb_page); + + nfs_page_group_lock(req); + + do { + tmp = nfs_page_group_search_locked(req->wb_head, pos); + if (tmp) { + /* no way this should happen */ + WARN_ON_ONCE(tmp->wb_pgbase != pos); + pos += tmp->wb_bytes - (pos - tmp->wb_pgbase); + } + } while (tmp && pos < len); + + nfs_page_group_unlock(req); + WARN_ON_ONCE(pos > len); + return pos == len; +} + /* We can set the PG_uptodate flag if we see that a write request * covers the full page. */ -static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count) +static void nfs_mark_uptodate(struct nfs_page *req) { - if (PageUptodate(page)) - return; - if (base != 0) + if (PageUptodate(req->wb_page)) return; - if (count != nfs_page_length(page)) + if (!nfs_page_group_covers_page(req)) return; - SetPageUptodate(page); + SetPageUptodate(req->wb_page); } static int wb_priority(struct writeback_control *wbc) @@ -796,7 +856,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, return PTR_ERR(req); /* Update file length */ nfs_grow_file(page, offset, count); - nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); + nfs_mark_uptodate(req); nfs_mark_request_dirty(req); nfs_unlock_and_release_request(req); return 0; -- cgit v1.1 From dd7663e7000d3408b5a5037ae8f774feaa9c0628 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 15 May 2014 11:56:49 -0400 Subject: pnfs: clean up filelayout_alloc_commit_info Remove unneeded else statement and clean up how commit info dataserver buckets are replaced. Suggested-by: Trond Myklebust Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 46 ++++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 20 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 9319427..9cea935 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -850,11 +850,15 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg, { struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); struct pnfs_commit_bucket *buckets; - int size; + int size, i; if (fl->commit_through_mds) return 0; - if (cinfo->ds->nbuckets != 0) { + + size = (fl->stripe_type == STRIPE_SPARSE) ? + fl->dsaddr->ds_num : fl->dsaddr->stripe_count; + + if (cinfo->ds->nbuckets >= size) { /* This assumes there is only one IOMODE_RW lseg. What * we really want to do is have a layout_hdr level * dictionary of keys, each @@ -864,30 +868,32 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg, return 0; } - size = (fl->stripe_type == STRIPE_SPARSE) ? - fl->dsaddr->ds_num : fl->dsaddr->stripe_count; - buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket), gfp_flags); if (!buckets) return -ENOMEM; - else { - int i; + for (i = 0; i < size; i++) { + INIT_LIST_HEAD(&buckets[i].written); + INIT_LIST_HEAD(&buckets[i].committing); + } - spin_lock(cinfo->lock); - if (cinfo->ds->nbuckets != 0) - kfree(buckets); - else { - cinfo->ds->buckets = buckets; - cinfo->ds->nbuckets = size; - for (i = 0; i < size; i++) { - INIT_LIST_HEAD(&buckets[i].written); - INIT_LIST_HEAD(&buckets[i].committing); - } - } - spin_unlock(cinfo->lock); - return 0; + spin_lock(cinfo->lock); + if (cinfo->ds->nbuckets >= size) + goto out; + for (i = 0; i < cinfo->ds->nbuckets; i++) { + list_splice(&cinfo->ds->buckets[i].written, + &buckets[i].written); + list_splice(&cinfo->ds->buckets[i].committing, + &buckets[i].committing); + buckets[i].wlseg = cinfo->ds->buckets[i].wlseg; + buckets[i].clseg = cinfo->ds->buckets[i].clseg; } + swap(cinfo->ds->buckets, buckets); + cinfo->ds->nbuckets = size; +out: + spin_unlock(cinfo->lock); + kfree(buckets); + return 0; } static struct pnfs_layout_segment * -- cgit v1.1 From 6094f83864c1d1296566a282cba05ba613f151ee Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 15 May 2014 11:56:50 -0400 Subject: nfs: allow coalescing of subpage requests Remove check that the request covers a whole page. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index ceb4424..838f7c9 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -921,10 +921,6 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, !nfs_match_lock_context(req->wb_lock_context, prev->wb_lock_context)) return false; - if (req->wb_pgbase != 0) - return false; - if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) - return false; if (req_offset(req) != req_offset(prev) + prev->wb_bytes) return false; } -- cgit v1.1 From 0f9c429eca07aca2764ccd751e2b48ba5397b936 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 15 May 2014 11:56:51 -0400 Subject: nfs: chain calls to pg_test Now that pg_test can change the size of the request (by returning a non-zero size smaller than the request), pg_test functions that call other pg_test functions must return the minimum of the result - or 0 if any fail. Also clean up the logic of some pg_test functions so that all checks are for contitions where coalescing is not possible. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 27 ++++++++++++++------------- fs/nfs/objlayout/objio_osd.c | 9 ++++++--- fs/nfs/pnfs.c | 15 ++++++++++----- 3 files changed, 30 insertions(+), 21 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 9cea935..7a665e0 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -928,26 +928,27 @@ static size_t filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { + unsigned int size; u64 p_stripe, r_stripe; u32 stripe_unit; - if (!pnfs_generic_pg_test(pgio, prev, req) || - !nfs_generic_pg_test(pgio, prev, req)) + /* calls nfs_generic_pg_test */ + size = pnfs_generic_pg_test(pgio, prev, req); + if (!size) return 0; - if (!prev) - return req->wb_bytes; + if (prev) { + p_stripe = (u64)req_offset(prev); + r_stripe = (u64)req_offset(req); + stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; - p_stripe = (u64)req_offset(prev); - r_stripe = (u64)req_offset(req); - stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; + do_div(p_stripe, stripe_unit); + do_div(r_stripe, stripe_unit); - do_div(p_stripe, stripe_unit); - do_div(r_stripe, stripe_unit); - - if (p_stripe == r_stripe) - return req->wb_bytes; - return 0; + if (p_stripe != r_stripe) + return 0; + } + return min(size, req->wb_bytes); } static void diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 71b9c69..6113207 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -571,12 +571,15 @@ int objio_write_pagelist(struct nfs_pgio_data *wdata, int how) static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { - if (!pnfs_generic_pg_test(pgio, prev, req) || - pgio->pg_count + req->wb_bytes > + unsigned int size; + + size = pnfs_generic_pg_test(pgio, prev, req); + + if (!size || pgio->pg_count + req->wb_bytes > (unsigned long)pgio->pg_layout_private) return 0; - return req->wb_bytes; + return min(size, req->wb_bytes); } static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index de6eb16..354c53c 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1442,8 +1442,12 @@ size_t pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { - if (pgio->pg_lseg == NULL) - return nfs_generic_pg_test(pgio, prev, req); + unsigned int size; + + size = nfs_generic_pg_test(pgio, prev, req); + + if (!size) + return 0; /* * Test if a nfs_page is fully contained in the pnfs_layout_range. @@ -1459,10 +1463,11 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, * first byte that lies outside the pnfs_layout_range. FIXME? * */ - if (req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset, + if (req_offset(req) >= end_offset(pgio->pg_lseg->pls_range.offset, pgio->pg_lseg->pls_range.length)) - return req->wb_bytes; - return 0; + return 0; + + return min(size, req->wb_bytes); } EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); -- cgit v1.1 From f0cb9ab8d5589fc553761068200e5a8342f61de0 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 15 May 2014 11:56:52 -0400 Subject: nfs: use > 1 request to handle bsize < PAGE_SIZE Use the newly added support for multiple requests per page for rsize/wsize < PAGE_SIZE, instead of having multiple read / write data structures per pageio header. This allows us to get rid of nfs_pgio_multi. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 80 ++++++++----------------------------------------------- 1 file changed, 11 insertions(+), 69 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 838f7c9..ec4311d 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -443,21 +443,13 @@ nfs_wait_on_request(struct nfs_page *req) size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req) { - if (!prev) - return req->wb_bytes; - /* - * FIXME: ideally we should be able to coalesce all requests - * that are not block boundary aligned, but currently this - * is problematic for the case of bsize < PAGE_CACHE_SIZE, - * since nfs_flush_multi and nfs_pagein_multi assume you - * can have only one struct nfs_page. - */ - if (desc->pg_bsize < PAGE_SIZE) + if (desc->pg_count > desc->pg_bsize) { + /* should never happen */ + WARN_ON_ONCE(1); return 0; + } - if (desc->pg_count + req->wb_bytes <= desc->pg_bsize) - return req->wb_bytes; - return 0; + return min(desc->pg_bsize - desc->pg_count, (size_t)req->wb_bytes); } EXPORT_SYMBOL_GPL(nfs_generic_pg_test); @@ -766,50 +758,6 @@ static void nfs_pgio_result(struct rpc_task *task, void *calldata) } /* - * Generate multiple small requests to read or write a single - * contiguous dirty on one page. - */ -static int nfs_pgio_multi(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_header *hdr) -{ - struct nfs_page *req = hdr->req; - struct page *page = req->wb_page; - struct nfs_pgio_data *data; - size_t wsize = desc->pg_bsize, nbytes; - unsigned int offset; - int requests = 0; - struct nfs_commit_info cinfo; - - nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); - - if ((desc->pg_ioflags & FLUSH_COND_STABLE) && - (desc->pg_moreio || nfs_reqs_to_commit(&cinfo) || - desc->pg_count > wsize)) - desc->pg_ioflags &= ~FLUSH_COND_STABLE; - - offset = 0; - nbytes = desc->pg_count; - do { - size_t len = min(nbytes, wsize); - - data = nfs_pgio_data_alloc(hdr, 1); - if (!data) - return nfs_pgio_error(desc, hdr); - data->pages.pagevec[0] = page; - nfs_pgio_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo); - list_add(&data->list, &hdr->rpc_list); - requests++; - nbytes -= len; - offset += len; - } while (nbytes != 0); - - nfs_list_remove_request(req); - nfs_list_add_request(req, &hdr->pages); - desc->pg_rpc_callops = &nfs_pgio_common_ops; - return 0; -} - -/* * Create an RPC task for the given read or write request and kick it. * The page must have been locked by the caller. * @@ -817,8 +765,8 @@ static int nfs_pgio_multi(struct nfs_pageio_descriptor *desc, * This is the case if nfs_updatepage detects a conflicting request * that has been written but not committed. */ -static int nfs_pgio_one(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_header *hdr) +int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr) { struct nfs_page *req; struct page **pages; @@ -850,6 +798,7 @@ static int nfs_pgio_one(struct nfs_pageio_descriptor *desc, desc->pg_rpc_callops = &nfs_pgio_common_ops; return 0; } +EXPORT_SYMBOL_GPL(nfs_generic_pgio); static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) { @@ -875,15 +824,6 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) return ret; } -int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_header *hdr) -{ - if (desc->pg_bsize < PAGE_CACHE_SIZE) - return nfs_pgio_multi(desc, hdr); - return nfs_pgio_one(desc, hdr); -} -EXPORT_SYMBOL_GPL(nfs_generic_pgio); - static bool nfs_match_open_context(const struct nfs_open_context *ctx1, const struct nfs_open_context *ctx2) { @@ -925,7 +865,9 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, return false; } size = pgio->pg_ops->pg_test(pgio, prev, req); - WARN_ON_ONCE(size && size != req->wb_bytes); + WARN_ON_ONCE(size > req->wb_bytes); + if (size && size < req->wb_bytes) + req->wb_bytes = size; return size > 0; } -- cgit v1.1 From 7f714720fac03383d687dbe39494cc96b845bd46 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 15 May 2014 11:56:53 -0400 Subject: nfs: remove data list from pgio header Since the ability to split pages into subpage requests has been added, nfs_pgio_header->rpc_list only ever has one pgio data. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 39 ++++++--------------------------------- fs/nfs/pnfs.c | 41 +++++++++++++++-------------------------- 2 files changed, 21 insertions(+), 59 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index ec4311d..fab78d1 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -470,7 +470,6 @@ struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *ops) struct nfs_pgio_header *hdr = &header->header; INIT_LIST_HEAD(&hdr->pages); - INIT_LIST_HEAD(&hdr->rpc_list); spin_lock_init(&hdr->lock); atomic_set(&hdr->refcnt, 0); hdr->rw_ops = ops; @@ -648,27 +647,6 @@ out: } EXPORT_SYMBOL_GPL(nfs_initiate_pgio); -static int nfs_do_multiple_pgios(struct list_head *head, - const struct rpc_call_ops *call_ops, - int how) -{ - struct nfs_pgio_data *data; - int ret = 0; - - while (!list_empty(head)) { - int ret2; - - data = list_first_entry(head, struct nfs_pgio_data, list); - list_del_init(&data->list); - - ret2 = nfs_initiate_pgio(NFS_CLIENT(data->header->inode), - data, call_ops, how, 0); - if (ret == 0) - ret = ret2; - } - return ret; -} - /** * nfs_pgio_error - Clean up from a pageio error * @desc: IO descriptor @@ -677,14 +655,9 @@ static int nfs_do_multiple_pgios(struct list_head *head, static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { - struct nfs_pgio_data *data; - set_bit(NFS_IOHDR_REDO, &hdr->flags); - while (!list_empty(&hdr->rpc_list)) { - data = list_first_entry(&hdr->rpc_list, struct nfs_pgio_data, list); - list_del(&data->list); - nfs_pgio_data_release(data); - } + nfs_pgio_data_release(hdr->data); + hdr->data = NULL; desc->pg_completion_ops->error_cleanup(&desc->pg_list); return -ENOMEM; } @@ -794,7 +767,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, /* Set up the argument struct */ nfs_pgio_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo); - list_add(&data->list, &hdr->rpc_list); + hdr->data = data; desc->pg_rpc_callops = &nfs_pgio_common_ops; return 0; } @@ -816,9 +789,9 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) atomic_inc(&hdr->refcnt); ret = nfs_generic_pgio(desc, hdr); if (ret == 0) - ret = nfs_do_multiple_pgios(&hdr->rpc_list, - desc->pg_rpc_callops, - desc->pg_ioflags); + ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode), + hdr->data, desc->pg_rpc_callops, + desc->pg_ioflags, 0); if (atomic_dec_and_test(&hdr->refcnt)) hdr->completion_ops->completion(hdr); return ret; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 354c53c..6ef108b 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1573,23 +1573,18 @@ pnfs_try_to_write_data(struct nfs_pgio_data *wdata, } static void -pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how) +pnfs_do_write(struct nfs_pageio_descriptor *desc, + struct nfs_pgio_header *hdr, int how) { - struct nfs_pgio_data *data; + struct nfs_pgio_data *data = hdr->data; const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; struct pnfs_layout_segment *lseg = desc->pg_lseg; + enum pnfs_try_status trypnfs; desc->pg_lseg = NULL; - while (!list_empty(head)) { - enum pnfs_try_status trypnfs; - - data = list_first_entry(head, struct nfs_pgio_data, list); - list_del_init(&data->list); - - trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); - if (trypnfs == PNFS_NOT_ATTEMPTED) - pnfs_write_through_mds(desc, data); - } + trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); + if (trypnfs == PNFS_NOT_ATTEMPTED) + pnfs_write_through_mds(desc, data); pnfs_put_lseg(lseg); } @@ -1623,7 +1618,7 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; } else - pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags); + pnfs_do_write(desc, hdr, desc->pg_ioflags); if (atomic_dec_and_test(&hdr->refcnt)) hdr->completion_ops->completion(hdr); return ret; @@ -1731,23 +1726,17 @@ pnfs_try_to_read_data(struct nfs_pgio_data *rdata, } static void -pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head) +pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { - struct nfs_pgio_data *data; + struct nfs_pgio_data *data = hdr->data; const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; struct pnfs_layout_segment *lseg = desc->pg_lseg; + enum pnfs_try_status trypnfs; desc->pg_lseg = NULL; - while (!list_empty(head)) { - enum pnfs_try_status trypnfs; - - data = list_first_entry(head, struct nfs_pgio_data, list); - list_del_init(&data->list); - - trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); - if (trypnfs == PNFS_NOT_ATTEMPTED) - pnfs_read_through_mds(desc, data); - } + trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); + if (trypnfs == PNFS_NOT_ATTEMPTED) + pnfs_read_through_mds(desc, data); pnfs_put_lseg(lseg); } @@ -1782,7 +1771,7 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) pnfs_put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; } else - pnfs_do_multiple_reads(desc, &hdr->rpc_list); + pnfs_do_read(desc, hdr); if (atomic_dec_and_test(&hdr->refcnt)) hdr->completion_ops->completion(hdr); return ret; -- cgit v1.1 From 5002c58639d41b93e800c8a4b7eca49c40d57822 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 15 May 2014 11:56:54 -0400 Subject: pnfs: support multiple verfs per direct req Support direct requests that span multiple pnfs data servers by comparing nfs_pgio_header->verf to a cached verf in pnfs_commit_bucket. Continue to use dreq->verf if the MDS is used / non-pNFS. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 102 +++++++++++++++++++++++++++++++++++++++++++++--- fs/nfs/nfs4filelayout.c | 6 +++ 2 files changed, 103 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 2c0e08f..4ad7bc3 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -108,6 +108,97 @@ static inline int put_dreq(struct nfs_direct_req *dreq) return atomic_dec_and_test(&dreq->io_count); } +/* + * nfs_direct_select_verf - select the right verifier + * @dreq - direct request possibly spanning multiple servers + * @ds_clp - nfs_client of data server or NULL if MDS / non-pnfs + * @ds_idx - index of data server in data server list, only valid if ds_clp set + * + * returns the correct verifier to use given the role of the server + */ +static struct nfs_writeverf * +nfs_direct_select_verf(struct nfs_direct_req *dreq, + struct nfs_client *ds_clp, + int ds_idx) +{ + struct nfs_writeverf *verfp = &dreq->verf; + +#ifdef CONFIG_NFS_V4_1 + if (ds_clp) { + /* pNFS is in use, use the DS verf */ + if (ds_idx >= 0 && ds_idx < dreq->ds_cinfo.nbuckets) + verfp = &dreq->ds_cinfo.buckets[ds_idx].direct_verf; + else + WARN_ON_ONCE(1); + } +#endif + return verfp; +} + + +/* + * nfs_direct_set_hdr_verf - set the write/commit verifier + * @dreq - direct request possibly spanning multiple servers + * @hdr - pageio header to validate against previously seen verfs + * + * Set the server's (MDS or DS) "seen" verifier + */ +static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq, + struct nfs_pgio_header *hdr) +{ + struct nfs_writeverf *verfp; + + verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp, + hdr->data->ds_idx); + WARN_ON_ONCE(verfp->committed >= 0); + memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); + WARN_ON_ONCE(verfp->committed < 0); +} + +/* + * nfs_direct_cmp_hdr_verf - compare verifier for pgio header + * @dreq - direct request possibly spanning multiple servers + * @hdr - pageio header to validate against previously seen verf + * + * set the server's "seen" verf if not initialized. + * returns result of comparison between @hdr->verf and the "seen" + * verf of the server used by @hdr (DS or MDS) + */ +static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq, + struct nfs_pgio_header *hdr) +{ + struct nfs_writeverf *verfp; + + verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp, + hdr->data->ds_idx); + if (verfp->committed < 0) { + nfs_direct_set_hdr_verf(dreq, hdr); + return 0; + } + return memcmp(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); +} + +#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) +/* + * nfs_direct_cmp_commit_data_verf - compare verifier for commit data + * @dreq - direct request possibly spanning multiple servers + * @data - commit data to validate against previously seen verf + * + * returns result of comparison between @data->verf and the verf of + * the server used by @data (DS or MDS) + */ +static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, + struct nfs_commit_data *data) +{ + struct nfs_writeverf *verfp; + + verfp = nfs_direct_select_verf(dreq, data->ds_clp, + data->ds_commit_index); + WARN_ON_ONCE(verfp->committed < 0); + return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf)); +} +#endif + /** * nfs_direct_IO - NFS address space operation for direct I/O * @rw: direction (read or write) @@ -168,6 +259,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) kref_get(&dreq->kref); init_completion(&dreq->completion); INIT_LIST_HEAD(&dreq->mds_cinfo.list); + dreq->verf.committed = NFS_INVALID_STABLE_HOW; /* not set yet */ INIT_WORK(&dreq->work, nfs_direct_write_schedule_work); spin_lock_init(&dreq->lock); @@ -602,7 +694,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) dprintk("NFS: %5u commit failed with error %d.\n", data->task.tk_pid, status); dreq->flags = NFS_ODIRECT_RESCHED_WRITES; - } else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) { + } else if (nfs_direct_cmp_commit_data_verf(dreq, data)) { dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid); dreq->flags = NFS_ODIRECT_RESCHED_WRITES; } @@ -811,13 +903,13 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) bit = NFS_IOHDR_NEED_RESCHED; else if (dreq->flags == 0) { - memcpy(&dreq->verf, &hdr->verf, - sizeof(dreq->verf)); + nfs_direct_set_hdr_verf(dreq, hdr); bit = NFS_IOHDR_NEED_COMMIT; dreq->flags = NFS_ODIRECT_DO_COMMIT; } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) { - if (memcmp(&dreq->verf, &hdr->verf, sizeof(dreq->verf))) { - dreq->flags = NFS_ODIRECT_RESCHED_WRITES; + if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr)) { + dreq->flags = + NFS_ODIRECT_RESCHED_WRITES; bit = NFS_IOHDR_NEED_RESCHED; } else bit = NFS_IOHDR_NEED_COMMIT; diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 7a665e0..0ebc521 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -560,6 +560,7 @@ filelayout_read_pagelist(struct nfs_pgio_data *data) /* No multipath support. Use first DS */ atomic_inc(&ds->ds_clp->cl_count); data->ds_clp = ds->ds_clp; + data->ds_idx = idx; fh = nfs4_fl_select_ds_fh(lseg, j); if (fh) data->args.fh = fh; @@ -603,6 +604,7 @@ filelayout_write_pagelist(struct nfs_pgio_data *data, int sync) data->pgio_done_cb = filelayout_write_done_cb; atomic_inc(&ds->ds_clp->cl_count); data->ds_clp = ds->ds_clp; + data->ds_idx = idx; fh = nfs4_fl_select_ds_fh(lseg, j); if (fh) data->args.fh = fh; @@ -875,6 +877,8 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg, for (i = 0; i < size; i++) { INIT_LIST_HEAD(&buckets[i].written); INIT_LIST_HEAD(&buckets[i].committing); + /* mark direct verifier as unset */ + buckets[i].direct_verf.committed = NFS_INVALID_STABLE_HOW; } spin_lock(cinfo->lock); @@ -885,6 +889,8 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg, &buckets[i].written); list_splice(&cinfo->ds->buckets[i].committing, &buckets[i].committing); + buckets[i].direct_verf.committed = + cinfo->ds->buckets[i].direct_verf.committed; buckets[i].wlseg = cinfo->ds->buckets[i].wlseg; buckets[i].clseg = cinfo->ds->buckets[i].clseg; } -- cgit v1.1 From 19b54848fee419f0bb35479e4ea98d9f2b985657 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 15 May 2014 11:56:55 -0400 Subject: pnfs: allow non page aligned pnfs layout segments Remove alignment checks that would revert to MDS and change pg_test to return the max ammount left in the segment (or other pg_test call) up to size of passed request, or 0 if no space is left. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 6ef108b..ce46a41 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1388,11 +1388,6 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r WARN_ON_ONCE(pgio->pg_lseg != NULL); - if (req->wb_offset != req->wb_pgbase) { - nfs_pageio_reset_read_mds(pgio); - return; - } - if (pgio->pg_dreq == NULL) rd_size = i_size_read(pgio->pg_inode) - req_offset(req); else @@ -1417,11 +1412,6 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, { WARN_ON_ONCE(pgio->pg_lseg != NULL); - if (req->wb_offset != req->wb_pgbase) { - nfs_pageio_reset_write_mds(pgio); - return; - } - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, req_offset(req), @@ -1443,9 +1433,9 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { unsigned int size; + u64 end; size = nfs_generic_pg_test(pgio, prev, req); - if (!size) return 0; @@ -1463,11 +1453,16 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, * first byte that lies outside the pnfs_layout_range. FIXME? * */ - if (req_offset(req) >= end_offset(pgio->pg_lseg->pls_range.offset, - pgio->pg_lseg->pls_range.length)) - return 0; + if (pgio->pg_lseg) { + end = end_offset(pgio->pg_lseg->pls_range.offset, + pgio->pg_lseg->pls_range.length); + WARN_ON_ONCE(req_offset(req) > end); + if (req_offset(req) >= end) + return 0; + size = min((unsigned int)(end - req_offset(req)), size); + } - return min(size, req->wb_bytes); + return size; } EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); -- cgit v1.1 From c6194271f94b81042bbc45034d31f9b0920f3905 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 15 May 2014 11:56:56 -0400 Subject: pnfs: filelayout: support non page aligned layouts Use the new pg_test interface to adjust requests to fit in the current stripe / segment. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 52 +++++++++++++++++++------------------------------ 1 file changed, 20 insertions(+), 32 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 0ebc521..63a1637 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -639,7 +639,6 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, struct nfs4_deviceid_node *d; struct nfs4_file_layout_dsaddr *dsaddr; int status = -EINVAL; - struct nfs_server *nfss = NFS_SERVER(lo->plh_inode); dprintk("--> %s\n", __func__); @@ -657,7 +656,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, goto out; } - if (!fl->stripe_unit || fl->stripe_unit % PAGE_SIZE) { + if (!fl->stripe_unit) { dprintk("%s Invalid stripe unit (%u)\n", __func__, fl->stripe_unit); goto out; @@ -694,12 +693,6 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, goto out_put; } - if (fl->stripe_unit % nfss->rsize || fl->stripe_unit % nfss->wsize) { - dprintk("%s Stripe unit (%u) not aligned with rsize %u " - "wsize %u\n", __func__, fl->stripe_unit, nfss->rsize, - nfss->wsize); - } - status = 0; out: dprintk("--> %s returns %d\n", __func__, status); @@ -936,44 +929,42 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, { unsigned int size; u64 p_stripe, r_stripe; - u32 stripe_unit; + u32 stripe_offset; + u64 segment_offset = pgio->pg_lseg->pls_range.offset; + u32 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; /* calls nfs_generic_pg_test */ size = pnfs_generic_pg_test(pgio, prev, req); if (!size) return 0; + /* see if req and prev are in the same stripe */ if (prev) { - p_stripe = (u64)req_offset(prev); - r_stripe = (u64)req_offset(req); - stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; - + p_stripe = (u64)req_offset(prev) - segment_offset; + r_stripe = (u64)req_offset(req) - segment_offset; do_div(p_stripe, stripe_unit); do_div(r_stripe, stripe_unit); if (p_stripe != r_stripe) return 0; } - return min(size, req->wb_bytes); + + /* calculate remaining bytes in the current stripe */ + div_u64_rem((u64)req_offset(req) - segment_offset, + stripe_unit, + &stripe_offset); + WARN_ON_ONCE(stripe_offset > stripe_unit); + if (stripe_offset >= stripe_unit) + return 0; + return min(stripe_unit - (unsigned int)stripe_offset, size); } static void filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { - WARN_ON_ONCE(pgio->pg_lseg != NULL); - - if (req->wb_offset != req->wb_pgbase) { - /* - * Handling unaligned pages is difficult, because have to - * somehow split a req in two in certain cases in the - * pg.test code. Avoid this by just not using pnfs - * in this case. - */ - nfs_pageio_reset_read_mds(pgio); - return; - } - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, + if (!pgio->pg_lseg) + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, 0, NFS4_MAX_UINT64, @@ -991,11 +982,8 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_commit_info cinfo; int status; - WARN_ON_ONCE(pgio->pg_lseg != NULL); - - if (req->wb_offset != req->wb_pgbase) - goto out_mds; - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, + if (!pgio->pg_lseg) + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, 0, NFS4_MAX_UINT64, -- cgit v1.1 From 68072992c8f6ace57fe80b6fb5d57b3ae887a09d Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Thu, 15 May 2014 11:56:57 -0400 Subject: nfs: support page groups in nfs_read_completion nfs_read_completion relied on the fact that there was a 1:1 mapping of page to nfs_request, but this has now changed. Regions not covered by a request have already been zeroed elsewhere. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/read.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 53d5b83..e818a47 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -130,7 +130,6 @@ static void nfs_page_group_set_uptodate(struct nfs_page *req) SetPageUptodate(req->wb_page); } -/* Note io was page aligned */ static void nfs_read_completion(struct nfs_pgio_header *hdr) { unsigned long bytes = 0; @@ -140,14 +139,25 @@ static void nfs_read_completion(struct nfs_pgio_header *hdr) while (!list_empty(&hdr->pages)) { struct nfs_page *req = nfs_list_entry(hdr->pages.next); struct page *page = req->wb_page; + unsigned long start = req->wb_pgbase; + unsigned long end = req->wb_pgbase + req->wb_bytes; if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) { - if (bytes > hdr->good_bytes) - zero_user(page, 0, PAGE_SIZE); - else if (hdr->good_bytes - bytes < PAGE_SIZE) - zero_user_segment(page, - hdr->good_bytes & ~PAGE_MASK, - PAGE_SIZE); + /* note: regions of the page not covered by a + * request are zeroed in nfs_readpage_async / + * readpage_async_filler */ + if (bytes > hdr->good_bytes) { + /* nothing in this request was good, so zero + * the full extent of the request */ + zero_user_segment(page, start, end); + + } else if (hdr->good_bytes - bytes < req->wb_bytes) { + /* part of this request has good bytes, but + * not all. zero the bad bytes */ + start += hdr->good_bytes - bytes; + WARN_ON(start < req->wb_pgbase); + zero_user_segment(page, start, end); + } } bytes += req->wb_bytes; if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { -- cgit v1.1 From c1109558ae0f07a3434357cd26203bfc7adfea75 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 29 May 2014 11:38:15 -0400 Subject: NFS: Fix error handling in __nfs_pageio_add_request Handle the case where nfs_create_request() returns an error. Reported-by: Dan Carpenter Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index fab78d1..0b8446f 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -947,6 +947,8 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, subreq = nfs_create_request(req->wb_context, req->wb_page, subreq, pgbase, bytes_left); + if (IS_ERR(subreq)) + goto err_ptr; nfs_lock_request(subreq); subreq->wb_offset = offset; subreq->wb_index = req->wb_index; @@ -955,6 +957,10 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, nfs_page_group_unlock(req); return 1; +err_ptr: + desc->pg_error = PTR_ERR(subreq); + nfs_page_group_unlock(req); + return 0; } static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) -- cgit v1.1 From f868089b09b51bd17ee41dedb96f98a1d0952fec Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 29 May 2014 11:45:57 -0400 Subject: NFS: Fix a potential busy wait in nfs_page_group_lock We cannot allow nfs_page_group_lock to use TASK_KILLABLE here, since the loop would cause a busy wait if somebody kills the task. Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 0b8446f..a875982 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -138,6 +138,12 @@ nfs_iocounter_wait(struct nfs_io_counter *c) return __nfs_iocounter_wait(c); } +static int nfs_wait_bit_uninterruptible(void *word) +{ + io_schedule(); + return 0; +} + /* * nfs_page_group_lock - lock the head of the page group * @req - request in group that is to be locked @@ -148,13 +154,12 @@ void nfs_page_group_lock(struct nfs_page *req) { struct nfs_page *head = req->wb_head; - int err = -EAGAIN; WARN_ON_ONCE(head != head->wb_head); - while (err) - err = wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, - nfs_wait_bit_killable, TASK_KILLABLE); + wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, + nfs_wait_bit_uninterruptible, + TASK_UNINTERRUPTIBLE); } /* @@ -410,12 +415,6 @@ void nfs_release_request(struct nfs_page *req) kref_put(&req->wb_kref, nfs_page_group_destroy); } -static int nfs_wait_bit_uninterruptible(void *word) -{ - io_schedule(); - return 0; -} - /** * nfs_wait_on_request - Wait for a request to complete. * @req: request to wait upon. -- cgit v1.1 From 8935ef664ea33a7ddd39015b26fe35aa7744f715 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 23 May 2014 06:22:59 -0700 Subject: NFSv4: Use error handler on failed GETATTR with successful OPEN Place the call to resend the failed GETATTR under the error handler so that when appropriate, the GETATTR is retried more than once. The server can fail the GETATTR op in the OPEN compound with a recoverable error such as NFS4ERR_DELAY. In the case of an O_EXCL open, the server has created the file, so a retrans of the OPEN call will fail with NFS4ERR_EXIST. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 44fb93a..68dd81e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2027,7 +2027,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) return status; } if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) - _nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, o_res->f_label); + nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, o_res->f_label); return 0; } -- cgit v1.1 From c8e470280a5c875468dd4eb41d8c4b44c87ebbcf Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Thu, 29 May 2014 16:41:22 -0400 Subject: nfs: Apply NFS_MOUNT_CMP_FLAGMASK to nfs_compare_remount_data() Those flags are obsolete and checking them can incorrectly cause remount operations to fail. Signed-off-by: Scott Mayhew Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 2cb5694..1a6d7ac 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2180,11 +2180,23 @@ out_no_address: return -EINVAL; } +#define NFS_MOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \ + | NFS_MOUNT_SECURE \ + | NFS_MOUNT_TCP \ + | NFS_MOUNT_VER3 \ + | NFS_MOUNT_KERBEROS \ + | NFS_MOUNT_NONLM \ + | NFS_MOUNT_BROKEN_SUID \ + | NFS_MOUNT_STRICTLOCK \ + | NFS_MOUNT_UNSHARED \ + | NFS_MOUNT_NORESVPORT \ + | NFS_MOUNT_LEGACY_INTERFACE) + static int nfs_compare_remount_data(struct nfs_server *nfss, struct nfs_parsed_mount_data *data) { - if (data->flags != nfss->flags || + if ((data->flags ^ nfss->flags) & NFS_MOUNT_CMP_FLAGMASK || data->rsize != nfss->rsize || data->wsize != nfss->wsize || data->version != nfss->nfs_client->rpc_ops->version || @@ -2347,18 +2359,6 @@ void nfs_clone_super(struct super_block *sb, struct nfs_mount_info *mount_info) nfs_initialise_sb(sb); } -#define NFS_MOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \ - | NFS_MOUNT_SECURE \ - | NFS_MOUNT_TCP \ - | NFS_MOUNT_VER3 \ - | NFS_MOUNT_KERBEROS \ - | NFS_MOUNT_NONLM \ - | NFS_MOUNT_BROKEN_SUID \ - | NFS_MOUNT_STRICTLOCK \ - | NFS_MOUNT_UNSHARED \ - | NFS_MOUNT_NORESVPORT \ - | NFS_MOUNT_LEGACY_INTERFACE) - static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags) { const struct nfs_server *a = s->s_fs_info; -- cgit v1.1 From 6df200f5d5191bdde4d2e408215383890f956781 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 29 May 2014 20:06:55 -0400 Subject: pNFS: Handle allocation errors correctly in filelayout_alloc_layout_hdr() Return the NULL pointer when the allocation fails. Reported-by: Fengguang Wu Cc: # 3.5.x Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 63a1637..21e9211 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -1348,7 +1348,7 @@ filelayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) struct nfs4_filelayout *flo; flo = kzalloc(sizeof(*flo), gfp_flags); - return &flo->generic_hdr; + return flo != NULL ? &flo->generic_hdr : NULL; } static void -- cgit v1.1 From 0aa61e78a0f262a2f94bd138831c97749cfca5bf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 29 May 2014 20:17:17 -0400 Subject: pNFS: Handle allocation errors correctly in objlayout_alloc_layout_hdr() Return the NULL pointer when the allocation fails. Cc: Boaz Harrosh Signed-off-by: Trond Myklebust --- fs/nfs/objlayout/objlayout.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index 2f955f6..765d3f5 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -53,10 +53,10 @@ objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) struct objlayout *objlay; objlay = kzalloc(sizeof(struct objlayout), gfp_flags); - if (objlay) { - spin_lock_init(&objlay->lock); - INIT_LIST_HEAD(&objlay->err_list); - } + if (!objlay) + return NULL; + spin_lock_init(&objlay->lock); + INIT_LIST_HEAD(&objlay->err_list); dprintk("%s: Return %p\n", __func__, objlay); return &objlay->pnfs_layout; } -- cgit v1.1 From b5968725f46d9535e908689bd04bf676f71138bb Mon Sep 17 00:00:00 2001 From: Tom Haynes Date: Mon, 12 May 2014 14:35:52 -0700 Subject: Push the file layout driver into a subdirectory The object and block layouts already exist in their own subdirectories. This patch completes the set! Note that as a layout denotes nfs4 already, I stripped that prefix out of the file names. Signed-off-by: Tom Haynes Acked-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/Makefile | 4 +- fs/nfs/filelayout/Makefile | 5 + fs/nfs/filelayout/filelayout.c | 1409 +++++++++++++++++++++++++++++++++++++ fs/nfs/filelayout/filelayout.h | 156 ++++ fs/nfs/filelayout/filelayoutdev.c | 843 ++++++++++++++++++++++ fs/nfs/nfs4filelayout.c | 1409 ------------------------------------- fs/nfs/nfs4filelayout.h | 156 ---- fs/nfs/nfs4filelayoutdev.c | 843 ---------------------- 8 files changed, 2414 insertions(+), 2411 deletions(-) create mode 100644 fs/nfs/filelayout/Makefile create mode 100644 fs/nfs/filelayout/filelayout.c create mode 100644 fs/nfs/filelayout/filelayout.h create mode 100644 fs/nfs/filelayout/filelayoutdev.c delete mode 100644 fs/nfs/nfs4filelayout.c delete mode 100644 fs/nfs/nfs4filelayout.h delete mode 100644 fs/nfs/nfs4filelayoutdev.c (limited to 'fs/nfs') diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 03192a6..4782e08 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -29,8 +29,6 @@ nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o -obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o -nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o - +obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/ obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/ obj-$(CONFIG_PNFS_BLOCK) += blocklayout/ diff --git a/fs/nfs/filelayout/Makefile b/fs/nfs/filelayout/Makefile new file mode 100644 index 0000000..8516cdf --- /dev/null +++ b/fs/nfs/filelayout/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the pNFS Files Layout Driver kernel module +# +obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o +nfs_layout_nfsv41_files-y := filelayout.o filelayoutdev.o diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c new file mode 100644 index 0000000..a008e85 --- /dev/null +++ b/fs/nfs/filelayout/filelayout.c @@ -0,0 +1,1409 @@ +/* + * Module for the pnfs nfs4 file layout driver. + * Defines all I/O and Policy interface operations, plus code + * to register itself with the pNFS client. + * + * Copyright (c) 2002 + * The Regents of the University of Michigan + * All Rights Reserved + * + * Dean Hildebrand + * + * Permission is granted to use, copy, create derivative works, and + * redistribute this software and such derivative works for any purpose, + * so long as the name of the University of Michigan is not used in + * any advertising or publicity pertaining to the use or distribution + * of this software without specific, written prior authorization. If + * the above copyright notice or any other identification of the + * University of Michigan is included in any copy of any portion of + * this software, then the disclaimer below must also be included. + * + * This software is provided as is, without representation or warranty + * of any kind either express or implied, including without limitation + * the implied warranties of merchantability, fitness for a particular + * purpose, or noninfringement. The Regents of the University of + * Michigan shall not be liable for any damages, including special, + * indirect, incidental, or consequential damages, with respect to any + * claim arising out of or in connection with the use of the software, + * even if it has been or is hereafter advised of the possibility of + * such damages. + */ + +#include +#include +#include + +#include + +#include "../nfs4session.h" +#include "../internal.h" +#include "../delegation.h" +#include "filelayout.h" +#include "../nfs4trace.h" + +#define NFSDBG_FACILITY NFSDBG_PNFS_LD + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Dean Hildebrand "); +MODULE_DESCRIPTION("The NFSv4 file layout driver"); + +#define FILELAYOUT_POLL_RETRY_MAX (15*HZ) + +static loff_t +filelayout_get_dense_offset(struct nfs4_filelayout_segment *flseg, + loff_t offset) +{ + u32 stripe_width = flseg->stripe_unit * flseg->dsaddr->stripe_count; + u64 stripe_no; + u32 rem; + + offset -= flseg->pattern_offset; + stripe_no = div_u64(offset, stripe_width); + div_u64_rem(offset, flseg->stripe_unit, &rem); + + return stripe_no * flseg->stripe_unit + rem; +} + +/* This function is used by the layout driver to calculate the + * offset of the file on the dserver based on whether the + * layout type is STRIPE_DENSE or STRIPE_SPARSE + */ +static loff_t +filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset) +{ + struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); + + switch (flseg->stripe_type) { + case STRIPE_SPARSE: + return offset; + + case STRIPE_DENSE: + return filelayout_get_dense_offset(flseg, offset); + } + + BUG(); +} + +static void filelayout_reset_write(struct nfs_pgio_data *data) +{ + struct nfs_pgio_header *hdr = data->header; + struct rpc_task *task = &data->task; + + if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { + dprintk("%s Reset task %5u for i/o through MDS " + "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, + data->task.tk_pid, + hdr->inode->i_sb->s_id, + (unsigned long long)NFS_FILEID(hdr->inode), + data->args.count, + (unsigned long long)data->args.offset); + + task->tk_status = pnfs_write_done_resend_to_mds(hdr->inode, + &hdr->pages, + hdr->completion_ops, + hdr->dreq); + } +} + +static void filelayout_reset_read(struct nfs_pgio_data *data) +{ + struct nfs_pgio_header *hdr = data->header; + struct rpc_task *task = &data->task; + + if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { + dprintk("%s Reset task %5u for i/o through MDS " + "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, + data->task.tk_pid, + hdr->inode->i_sb->s_id, + (unsigned long long)NFS_FILEID(hdr->inode), + data->args.count, + (unsigned long long)data->args.offset); + + task->tk_status = pnfs_read_done_resend_to_mds(hdr->inode, + &hdr->pages, + hdr->completion_ops, + hdr->dreq); + } +} + +static void filelayout_fenceme(struct inode *inode, struct pnfs_layout_hdr *lo) +{ + if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) + return; + pnfs_return_layout(inode); +} + +static int filelayout_async_handle_error(struct rpc_task *task, + struct nfs4_state *state, + struct nfs_client *clp, + struct pnfs_layout_segment *lseg) +{ + struct pnfs_layout_hdr *lo = lseg->pls_layout; + struct inode *inode = lo->plh_inode; + struct nfs_server *mds_server = NFS_SERVER(inode); + struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); + struct nfs_client *mds_client = mds_server->nfs_client; + struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table; + + if (task->tk_status >= 0) + return 0; + + switch (task->tk_status) { + /* MDS state errors */ + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_BAD_STATEID: + if (state == NULL) + break; + nfs_remove_bad_delegation(state->inode); + case -NFS4ERR_OPENMODE: + if (state == NULL) + break; + if (nfs4_schedule_stateid_recovery(mds_server, state) < 0) + goto out_bad_stateid; + goto wait_on_recovery; + case -NFS4ERR_EXPIRED: + if (state != NULL) { + if (nfs4_schedule_stateid_recovery(mds_server, state) < 0) + goto out_bad_stateid; + } + nfs4_schedule_lease_recovery(mds_client); + goto wait_on_recovery; + /* DS session errors */ + case -NFS4ERR_BADSESSION: + case -NFS4ERR_BADSLOT: + case -NFS4ERR_BAD_HIGH_SLOT: + case -NFS4ERR_DEADSESSION: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + case -NFS4ERR_SEQ_FALSE_RETRY: + case -NFS4ERR_SEQ_MISORDERED: + dprintk("%s ERROR %d, Reset session. Exchangeid " + "flags 0x%x\n", __func__, task->tk_status, + clp->cl_exchange_flags); + nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); + break; + case -NFS4ERR_DELAY: + case -NFS4ERR_GRACE: + rpc_delay(task, FILELAYOUT_POLL_RETRY_MAX); + break; + case -NFS4ERR_RETRY_UNCACHED_REP: + break; + /* Invalidate Layout errors */ + case -NFS4ERR_PNFS_NO_LAYOUT: + case -ESTALE: /* mapped NFS4ERR_STALE */ + case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */ + case -EISDIR: /* mapped NFS4ERR_ISDIR */ + case -NFS4ERR_FHEXPIRED: + case -NFS4ERR_WRONG_TYPE: + dprintk("%s Invalid layout error %d\n", __func__, + task->tk_status); + /* + * Destroy layout so new i/o will get a new layout. + * Layout will not be destroyed until all current lseg + * references are put. Mark layout as invalid to resend failed + * i/o and all i/o waiting on the slot table to the MDS until + * layout is destroyed and a new valid layout is obtained. + */ + pnfs_destroy_layout(NFS_I(inode)); + rpc_wake_up(&tbl->slot_tbl_waitq); + goto reset; + /* RPC connection errors */ + case -ECONNREFUSED: + case -EHOSTDOWN: + case -EHOSTUNREACH: + case -ENETUNREACH: + case -EIO: + case -ETIMEDOUT: + case -EPIPE: + dprintk("%s DS connection error %d\n", __func__, + task->tk_status); + nfs4_mark_deviceid_unavailable(devid); + set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); + rpc_wake_up(&tbl->slot_tbl_waitq); + /* fall through */ + default: +reset: + dprintk("%s Retry through MDS. Error %d\n", __func__, + task->tk_status); + return -NFS4ERR_RESET_TO_MDS; + } +out: + task->tk_status = 0; + return -EAGAIN; +out_bad_stateid: + task->tk_status = -EIO; + return 0; +wait_on_recovery: + rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL); + if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0) + rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task); + goto out; +} + +/* NFS_PROTO call done callback routines */ + +static int filelayout_read_done_cb(struct rpc_task *task, + struct nfs_pgio_data *data) +{ + struct nfs_pgio_header *hdr = data->header; + int err; + + trace_nfs4_pnfs_read(data, task->tk_status); + err = filelayout_async_handle_error(task, data->args.context->state, + data->ds_clp, hdr->lseg); + + switch (err) { + case -NFS4ERR_RESET_TO_MDS: + filelayout_reset_read(data); + return task->tk_status; + case -EAGAIN: + rpc_restart_call_prepare(task); + return -EAGAIN; + } + + return 0; +} + +/* + * We reference the rpc_cred of the first WRITE that triggers the need for + * a LAYOUTCOMMIT, and use it to send the layoutcommit compound. + * rfc5661 is not clear about which credential should be used. + */ +static void +filelayout_set_layoutcommit(struct nfs_pgio_data *wdata) +{ + struct nfs_pgio_header *hdr = wdata->header; + + if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds || + wdata->res.verf->committed == NFS_FILE_SYNC) + return; + + pnfs_set_layoutcommit(wdata); + dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, + (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); +} + +bool +filelayout_test_devid_unavailable(struct nfs4_deviceid_node *node) +{ + return filelayout_test_devid_invalid(node) || + nfs4_test_deviceid_unavailable(node); +} + +static bool +filelayout_reset_to_mds(struct pnfs_layout_segment *lseg) +{ + struct nfs4_deviceid_node *node = FILELAYOUT_DEVID_NODE(lseg); + + return filelayout_test_devid_unavailable(node); +} + +/* + * Call ops for the async read/write cases + * In the case of dense layouts, the offset needs to be reset to its + * original value. + */ +static void filelayout_read_prepare(struct rpc_task *task, void *data) +{ + struct nfs_pgio_data *rdata = data; + + if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) { + rpc_exit(task, -EIO); + return; + } + if (filelayout_reset_to_mds(rdata->header->lseg)) { + dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); + filelayout_reset_read(rdata); + rpc_exit(task, 0); + return; + } + rdata->pgio_done_cb = filelayout_read_done_cb; + + if (nfs41_setup_sequence(rdata->ds_clp->cl_session, + &rdata->args.seq_args, + &rdata->res.seq_res, + task)) + return; + if (nfs4_set_rw_stateid(&rdata->args.stateid, rdata->args.context, + rdata->args.lock_context, FMODE_READ) == -EIO) + rpc_exit(task, -EIO); /* lost lock, terminate I/O */ +} + +static void filelayout_read_call_done(struct rpc_task *task, void *data) +{ + struct nfs_pgio_data *rdata = data; + + dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); + + if (test_bit(NFS_IOHDR_REDO, &rdata->header->flags) && + task->tk_status == 0) { + nfs41_sequence_done(task, &rdata->res.seq_res); + return; + } + + /* Note this may cause RPC to be resent */ + rdata->header->mds_ops->rpc_call_done(task, data); +} + +static void filelayout_read_count_stats(struct rpc_task *task, void *data) +{ + struct nfs_pgio_data *rdata = data; + + rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics); +} + +static void filelayout_read_release(void *data) +{ + struct nfs_pgio_data *rdata = data; + struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout; + + filelayout_fenceme(lo->plh_inode, lo); + nfs_put_client(rdata->ds_clp); + rdata->header->mds_ops->rpc_release(data); +} + +static int filelayout_write_done_cb(struct rpc_task *task, + struct nfs_pgio_data *data) +{ + struct nfs_pgio_header *hdr = data->header; + int err; + + trace_nfs4_pnfs_write(data, task->tk_status); + err = filelayout_async_handle_error(task, data->args.context->state, + data->ds_clp, hdr->lseg); + + switch (err) { + case -NFS4ERR_RESET_TO_MDS: + filelayout_reset_write(data); + return task->tk_status; + case -EAGAIN: + rpc_restart_call_prepare(task); + return -EAGAIN; + } + + filelayout_set_layoutcommit(data); + return 0; +} + +/* Fake up some data that will cause nfs_commit_release to retry the writes. */ +static void prepare_to_resend_writes(struct nfs_commit_data *data) +{ + struct nfs_page *first = nfs_list_entry(data->pages.next); + + data->task.tk_status = 0; + memcpy(&data->verf.verifier, &first->wb_verf, + sizeof(data->verf.verifier)); + data->verf.verifier.data[0]++; /* ensure verifier mismatch */ +} + +static int filelayout_commit_done_cb(struct rpc_task *task, + struct nfs_commit_data *data) +{ + int err; + + trace_nfs4_pnfs_commit_ds(data, task->tk_status); + err = filelayout_async_handle_error(task, NULL, data->ds_clp, + data->lseg); + + switch (err) { + case -NFS4ERR_RESET_TO_MDS: + prepare_to_resend_writes(data); + return -EAGAIN; + case -EAGAIN: + rpc_restart_call_prepare(task); + return -EAGAIN; + } + + return 0; +} + +static void filelayout_write_prepare(struct rpc_task *task, void *data) +{ + struct nfs_pgio_data *wdata = data; + + if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) { + rpc_exit(task, -EIO); + return; + } + if (filelayout_reset_to_mds(wdata->header->lseg)) { + dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); + filelayout_reset_write(wdata); + rpc_exit(task, 0); + return; + } + if (nfs41_setup_sequence(wdata->ds_clp->cl_session, + &wdata->args.seq_args, + &wdata->res.seq_res, + task)) + return; + if (nfs4_set_rw_stateid(&wdata->args.stateid, wdata->args.context, + wdata->args.lock_context, FMODE_WRITE) == -EIO) + rpc_exit(task, -EIO); /* lost lock, terminate I/O */ +} + +static void filelayout_write_call_done(struct rpc_task *task, void *data) +{ + struct nfs_pgio_data *wdata = data; + + if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) && + task->tk_status == 0) { + nfs41_sequence_done(task, &wdata->res.seq_res); + return; + } + + /* Note this may cause RPC to be resent */ + wdata->header->mds_ops->rpc_call_done(task, data); +} + +static void filelayout_write_count_stats(struct rpc_task *task, void *data) +{ + struct nfs_pgio_data *wdata = data; + + rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics); +} + +static void filelayout_write_release(void *data) +{ + struct nfs_pgio_data *wdata = data; + struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout; + + filelayout_fenceme(lo->plh_inode, lo); + nfs_put_client(wdata->ds_clp); + wdata->header->mds_ops->rpc_release(data); +} + +static void filelayout_commit_prepare(struct rpc_task *task, void *data) +{ + struct nfs_commit_data *wdata = data; + + nfs41_setup_sequence(wdata->ds_clp->cl_session, + &wdata->args.seq_args, + &wdata->res.seq_res, + task); +} + +static void filelayout_write_commit_done(struct rpc_task *task, void *data) +{ + struct nfs_commit_data *wdata = data; + + /* Note this may cause RPC to be resent */ + wdata->mds_ops->rpc_call_done(task, data); +} + +static void filelayout_commit_count_stats(struct rpc_task *task, void *data) +{ + struct nfs_commit_data *cdata = data; + + rpc_count_iostats(task, NFS_SERVER(cdata->inode)->client->cl_metrics); +} + +static void filelayout_commit_release(void *calldata) +{ + struct nfs_commit_data *data = calldata; + + data->completion_ops->completion(data); + pnfs_put_lseg(data->lseg); + nfs_put_client(data->ds_clp); + nfs_commitdata_release(data); +} + +static const struct rpc_call_ops filelayout_read_call_ops = { + .rpc_call_prepare = filelayout_read_prepare, + .rpc_call_done = filelayout_read_call_done, + .rpc_count_stats = filelayout_read_count_stats, + .rpc_release = filelayout_read_release, +}; + +static const struct rpc_call_ops filelayout_write_call_ops = { + .rpc_call_prepare = filelayout_write_prepare, + .rpc_call_done = filelayout_write_call_done, + .rpc_count_stats = filelayout_write_count_stats, + .rpc_release = filelayout_write_release, +}; + +static const struct rpc_call_ops filelayout_commit_call_ops = { + .rpc_call_prepare = filelayout_commit_prepare, + .rpc_call_done = filelayout_write_commit_done, + .rpc_count_stats = filelayout_commit_count_stats, + .rpc_release = filelayout_commit_release, +}; + +static enum pnfs_try_status +filelayout_read_pagelist(struct nfs_pgio_data *data) +{ + struct nfs_pgio_header *hdr = data->header; + struct pnfs_layout_segment *lseg = hdr->lseg; + struct nfs4_pnfs_ds *ds; + struct rpc_clnt *ds_clnt; + loff_t offset = data->args.offset; + u32 j, idx; + struct nfs_fh *fh; + + dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n", + __func__, hdr->inode->i_ino, + data->args.pgbase, (size_t)data->args.count, offset); + + /* Retrieve the correct rpc_client for the byte range */ + j = nfs4_fl_calc_j_index(lseg, offset); + idx = nfs4_fl_calc_ds_index(lseg, j); + ds = nfs4_fl_prepare_ds(lseg, idx); + if (!ds) + return PNFS_NOT_ATTEMPTED; + + ds_clnt = nfs4_find_or_create_ds_client(ds->ds_clp, hdr->inode); + if (IS_ERR(ds_clnt)) + return PNFS_NOT_ATTEMPTED; + + dprintk("%s USE DS: %s cl_count %d\n", __func__, + ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); + + /* No multipath support. Use first DS */ + atomic_inc(&ds->ds_clp->cl_count); + data->ds_clp = ds->ds_clp; + data->ds_idx = idx; + fh = nfs4_fl_select_ds_fh(lseg, j); + if (fh) + data->args.fh = fh; + + data->args.offset = filelayout_get_dserver_offset(lseg, offset); + data->mds_offset = offset; + + /* Perform an asynchronous read to ds */ + nfs_initiate_pgio(ds_clnt, data, + &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN); + return PNFS_ATTEMPTED; +} + +/* Perform async writes. */ +static enum pnfs_try_status +filelayout_write_pagelist(struct nfs_pgio_data *data, int sync) +{ + struct nfs_pgio_header *hdr = data->header; + struct pnfs_layout_segment *lseg = hdr->lseg; + struct nfs4_pnfs_ds *ds; + struct rpc_clnt *ds_clnt; + loff_t offset = data->args.offset; + u32 j, idx; + struct nfs_fh *fh; + + /* Retrieve the correct rpc_client for the byte range */ + j = nfs4_fl_calc_j_index(lseg, offset); + idx = nfs4_fl_calc_ds_index(lseg, j); + ds = nfs4_fl_prepare_ds(lseg, idx); + if (!ds) + return PNFS_NOT_ATTEMPTED; + + ds_clnt = nfs4_find_or_create_ds_client(ds->ds_clp, hdr->inode); + if (IS_ERR(ds_clnt)) + return PNFS_NOT_ATTEMPTED; + + dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n", + __func__, hdr->inode->i_ino, sync, (size_t) data->args.count, + offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); + + data->pgio_done_cb = filelayout_write_done_cb; + atomic_inc(&ds->ds_clp->cl_count); + data->ds_clp = ds->ds_clp; + data->ds_idx = idx; + fh = nfs4_fl_select_ds_fh(lseg, j); + if (fh) + data->args.fh = fh; + /* + * Get the file offset on the dserver. Set the write offset to + * this offset and save the original offset. + */ + data->args.offset = filelayout_get_dserver_offset(lseg, offset); + + /* Perform an asynchronous write */ + nfs_initiate_pgio(ds_clnt, data, + &filelayout_write_call_ops, sync, + RPC_TASK_SOFTCONN); + return PNFS_ATTEMPTED; +} + +/* + * filelayout_check_layout() + * + * Make sure layout segment parameters are sane WRT the device. + * At this point no generic layer initialization of the lseg has occurred, + * and nothing has been added to the layout_hdr cache. + * + */ +static int +filelayout_check_layout(struct pnfs_layout_hdr *lo, + struct nfs4_filelayout_segment *fl, + struct nfs4_layoutget_res *lgr, + struct nfs4_deviceid *id, + gfp_t gfp_flags) +{ + struct nfs4_deviceid_node *d; + struct nfs4_file_layout_dsaddr *dsaddr; + int status = -EINVAL; + + dprintk("--> %s\n", __func__); + + /* FIXME: remove this check when layout segment support is added */ + if (lgr->range.offset != 0 || + lgr->range.length != NFS4_MAX_UINT64) { + dprintk("%s Only whole file layouts supported. Use MDS i/o\n", + __func__); + goto out; + } + + if (fl->pattern_offset > lgr->range.offset) { + dprintk("%s pattern_offset %lld too large\n", + __func__, fl->pattern_offset); + goto out; + } + + if (!fl->stripe_unit) { + dprintk("%s Invalid stripe unit (%u)\n", + __func__, fl->stripe_unit); + goto out; + } + + /* find and reference the deviceid */ + d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode)->pnfs_curr_ld, + NFS_SERVER(lo->plh_inode)->nfs_client, id); + if (d == NULL) { + dsaddr = filelayout_get_device_info(lo->plh_inode, id, + lo->plh_lc_cred, gfp_flags); + if (dsaddr == NULL) + goto out; + } else + dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); + /* Found deviceid is unavailable */ + if (filelayout_test_devid_unavailable(&dsaddr->id_node)) + goto out_put; + + fl->dsaddr = dsaddr; + + if (fl->first_stripe_index >= dsaddr->stripe_count) { + dprintk("%s Bad first_stripe_index %u\n", + __func__, fl->first_stripe_index); + goto out_put; + } + + if ((fl->stripe_type == STRIPE_SPARSE && + fl->num_fh > 1 && fl->num_fh != dsaddr->ds_num) || + (fl->stripe_type == STRIPE_DENSE && + fl->num_fh != dsaddr->stripe_count)) { + dprintk("%s num_fh %u not valid for given packing\n", + __func__, fl->num_fh); + goto out_put; + } + + status = 0; +out: + dprintk("--> %s returns %d\n", __func__, status); + return status; +out_put: + nfs4_fl_put_deviceid(dsaddr); + goto out; +} + +static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl) +{ + int i; + + for (i = 0; i < fl->num_fh; i++) { + if (!fl->fh_array[i]) + break; + kfree(fl->fh_array[i]); + } + kfree(fl->fh_array); + fl->fh_array = NULL; +} + +static void +_filelayout_free_lseg(struct nfs4_filelayout_segment *fl) +{ + filelayout_free_fh_array(fl); + kfree(fl); +} + +static int +filelayout_decode_layout(struct pnfs_layout_hdr *flo, + struct nfs4_filelayout_segment *fl, + struct nfs4_layoutget_res *lgr, + struct nfs4_deviceid *id, + gfp_t gfp_flags) +{ + struct xdr_stream stream; + struct xdr_buf buf; + struct page *scratch; + __be32 *p; + uint32_t nfl_util; + int i; + + dprintk("%s: set_layout_map Begin\n", __func__); + + scratch = alloc_page(gfp_flags); + if (!scratch) + return -ENOMEM; + + xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len); + xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); + + /* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8), + * num_fh (4) */ + p = xdr_inline_decode(&stream, NFS4_DEVICEID4_SIZE + 20); + if (unlikely(!p)) + goto out_err; + + memcpy(id, p, sizeof(*id)); + p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); + nfs4_print_deviceid(id); + + nfl_util = be32_to_cpup(p++); + if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS) + fl->commit_through_mds = 1; + if (nfl_util & NFL4_UFLG_DENSE) + fl->stripe_type = STRIPE_DENSE; + else + fl->stripe_type = STRIPE_SPARSE; + fl->stripe_unit = nfl_util & ~NFL4_UFLG_MASK; + + fl->first_stripe_index = be32_to_cpup(p++); + p = xdr_decode_hyper(p, &fl->pattern_offset); + fl->num_fh = be32_to_cpup(p++); + + dprintk("%s: nfl_util 0x%X num_fh %u fsi %u po %llu\n", + __func__, nfl_util, fl->num_fh, fl->first_stripe_index, + fl->pattern_offset); + + /* Note that a zero value for num_fh is legal for STRIPE_SPARSE. + * Futher checking is done in filelayout_check_layout */ + if (fl->num_fh > + max(NFS4_PNFS_MAX_STRIPE_CNT, NFS4_PNFS_MAX_MULTI_CNT)) + goto out_err; + + if (fl->num_fh > 0) { + fl->fh_array = kcalloc(fl->num_fh, sizeof(fl->fh_array[0]), + gfp_flags); + if (!fl->fh_array) + goto out_err; + } + + for (i = 0; i < fl->num_fh; i++) { + /* Do we want to use a mempool here? */ + fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), gfp_flags); + if (!fl->fh_array[i]) + goto out_err_free; + + p = xdr_inline_decode(&stream, 4); + if (unlikely(!p)) + goto out_err_free; + fl->fh_array[i]->size = be32_to_cpup(p++); + if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) { + printk(KERN_ERR "NFS: Too big fh %d received %d\n", + i, fl->fh_array[i]->size); + goto out_err_free; + } + + p = xdr_inline_decode(&stream, fl->fh_array[i]->size); + if (unlikely(!p)) + goto out_err_free; + memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size); + dprintk("DEBUG: %s: fh len %d\n", __func__, + fl->fh_array[i]->size); + } + + __free_page(scratch); + return 0; + +out_err_free: + filelayout_free_fh_array(fl); +out_err: + __free_page(scratch); + return -EIO; +} + +static void +filelayout_free_lseg(struct pnfs_layout_segment *lseg) +{ + struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); + + dprintk("--> %s\n", __func__); + nfs4_fl_put_deviceid(fl->dsaddr); + /* This assumes a single RW lseg */ + if (lseg->pls_range.iomode == IOMODE_RW) { + struct nfs4_filelayout *flo; + + flo = FILELAYOUT_FROM_HDR(lseg->pls_layout); + flo->commit_info.nbuckets = 0; + kfree(flo->commit_info.buckets); + flo->commit_info.buckets = NULL; + } + _filelayout_free_lseg(fl); +} + +static int +filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo, + gfp_t gfp_flags) +{ + struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); + struct pnfs_commit_bucket *buckets; + int size, i; + + if (fl->commit_through_mds) + return 0; + + size = (fl->stripe_type == STRIPE_SPARSE) ? + fl->dsaddr->ds_num : fl->dsaddr->stripe_count; + + if (cinfo->ds->nbuckets >= size) { + /* This assumes there is only one IOMODE_RW lseg. What + * we really want to do is have a layout_hdr level + * dictionary of keys, each + * associated with a struct list_head, populated by calls + * to filelayout_write_pagelist(). + * */ + return 0; + } + + buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket), + gfp_flags); + if (!buckets) + return -ENOMEM; + for (i = 0; i < size; i++) { + INIT_LIST_HEAD(&buckets[i].written); + INIT_LIST_HEAD(&buckets[i].committing); + /* mark direct verifier as unset */ + buckets[i].direct_verf.committed = NFS_INVALID_STABLE_HOW; + } + + spin_lock(cinfo->lock); + if (cinfo->ds->nbuckets >= size) + goto out; + for (i = 0; i < cinfo->ds->nbuckets; i++) { + list_splice(&cinfo->ds->buckets[i].written, + &buckets[i].written); + list_splice(&cinfo->ds->buckets[i].committing, + &buckets[i].committing); + buckets[i].direct_verf.committed = + cinfo->ds->buckets[i].direct_verf.committed; + buckets[i].wlseg = cinfo->ds->buckets[i].wlseg; + buckets[i].clseg = cinfo->ds->buckets[i].clseg; + } + swap(cinfo->ds->buckets, buckets); + cinfo->ds->nbuckets = size; +out: + spin_unlock(cinfo->lock); + kfree(buckets); + return 0; +} + +static struct pnfs_layout_segment * +filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, + struct nfs4_layoutget_res *lgr, + gfp_t gfp_flags) +{ + struct nfs4_filelayout_segment *fl; + int rc; + struct nfs4_deviceid id; + + dprintk("--> %s\n", __func__); + fl = kzalloc(sizeof(*fl), gfp_flags); + if (!fl) + return NULL; + + rc = filelayout_decode_layout(layoutid, fl, lgr, &id, gfp_flags); + if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id, gfp_flags)) { + _filelayout_free_lseg(fl); + return NULL; + } + return &fl->generic_hdr; +} + +/* + * filelayout_pg_test(). Called by nfs_can_coalesce_requests() + * + * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number + * of bytes (maximum @req->wb_bytes) that can be coalesced. + */ +static size_t +filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, + struct nfs_page *req) +{ + unsigned int size; + u64 p_stripe, r_stripe; + u32 stripe_offset; + u64 segment_offset = pgio->pg_lseg->pls_range.offset; + u32 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; + + /* calls nfs_generic_pg_test */ + size = pnfs_generic_pg_test(pgio, prev, req); + if (!size) + return 0; + + /* see if req and prev are in the same stripe */ + if (prev) { + p_stripe = (u64)req_offset(prev) - segment_offset; + r_stripe = (u64)req_offset(req) - segment_offset; + do_div(p_stripe, stripe_unit); + do_div(r_stripe, stripe_unit); + + if (p_stripe != r_stripe) + return 0; + } + + /* calculate remaining bytes in the current stripe */ + div_u64_rem((u64)req_offset(req) - segment_offset, + stripe_unit, + &stripe_offset); + WARN_ON_ONCE(stripe_offset > stripe_unit); + if (stripe_offset >= stripe_unit) + return 0; + return min(stripe_unit - (unsigned int)stripe_offset, size); +} + +static void +filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, + struct nfs_page *req) +{ + if (!pgio->pg_lseg) + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, + req->wb_context, + 0, + NFS4_MAX_UINT64, + IOMODE_READ, + GFP_KERNEL); + /* If no lseg, fall back to read through mds */ + if (pgio->pg_lseg == NULL) + nfs_pageio_reset_read_mds(pgio); +} + +static void +filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, + struct nfs_page *req) +{ + struct nfs_commit_info cinfo; + int status; + + if (!pgio->pg_lseg) + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, + req->wb_context, + 0, + NFS4_MAX_UINT64, + IOMODE_RW, + GFP_NOFS); + /* If no lseg, fall back to write through mds */ + if (pgio->pg_lseg == NULL) + goto out_mds; + nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq); + status = filelayout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS); + if (status < 0) { + pnfs_put_lseg(pgio->pg_lseg); + pgio->pg_lseg = NULL; + goto out_mds; + } + return; +out_mds: + nfs_pageio_reset_write_mds(pgio); +} + +static const struct nfs_pageio_ops filelayout_pg_read_ops = { + .pg_init = filelayout_pg_init_read, + .pg_test = filelayout_pg_test, + .pg_doio = pnfs_generic_pg_readpages, +}; + +static const struct nfs_pageio_ops filelayout_pg_write_ops = { + .pg_init = filelayout_pg_init_write, + .pg_test = filelayout_pg_test, + .pg_doio = pnfs_generic_pg_writepages, +}; + +static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) +{ + if (fl->stripe_type == STRIPE_SPARSE) + return nfs4_fl_calc_ds_index(&fl->generic_hdr, j); + else + return j; +} + +/* The generic layer is about to remove the req from the commit list. + * If this will make the bucket empty, it will need to put the lseg reference. + */ +static void +filelayout_clear_request_commit(struct nfs_page *req, + struct nfs_commit_info *cinfo) +{ + struct pnfs_layout_segment *freeme = NULL; + + spin_lock(cinfo->lock); + if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) + goto out; + cinfo->ds->nwritten--; + if (list_is_singular(&req->wb_list)) { + struct pnfs_commit_bucket *bucket; + + bucket = list_first_entry(&req->wb_list, + struct pnfs_commit_bucket, + written); + freeme = bucket->wlseg; + bucket->wlseg = NULL; + } +out: + nfs_request_remove_commit_list(req, cinfo); + spin_unlock(cinfo->lock); + pnfs_put_lseg(freeme); +} + +static struct list_head * +filelayout_choose_commit_list(struct nfs_page *req, + struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo) +{ + struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); + u32 i, j; + struct list_head *list; + struct pnfs_commit_bucket *buckets; + + if (fl->commit_through_mds) + return &cinfo->mds->list; + + /* Note that we are calling nfs4_fl_calc_j_index on each page + * that ends up being committed to a data server. An attractive + * alternative is to add a field to nfs_write_data and nfs_page + * to store the value calculated in filelayout_write_pagelist + * and just use that here. + */ + j = nfs4_fl_calc_j_index(lseg, req_offset(req)); + i = select_bucket_index(fl, j); + spin_lock(cinfo->lock); + buckets = cinfo->ds->buckets; + list = &buckets[i].written; + if (list_empty(list)) { + /* Non-empty buckets hold a reference on the lseg. That ref + * is normally transferred to the COMMIT call and released + * there. It could also be released if the last req is pulled + * off due to a rewrite, in which case it will be done in + * filelayout_clear_request_commit + */ + buckets[i].wlseg = pnfs_get_lseg(lseg); + } + set_bit(PG_COMMIT_TO_DS, &req->wb_flags); + cinfo->ds->nwritten++; + spin_unlock(cinfo->lock); + return list; +} + +static void +filelayout_mark_request_commit(struct nfs_page *req, + struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo) +{ + struct list_head *list; + + list = filelayout_choose_commit_list(req, lseg, cinfo); + nfs_request_add_commit_list(req, list, cinfo); +} + +static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) +{ + struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); + + if (flseg->stripe_type == STRIPE_SPARSE) + return i; + else + return nfs4_fl_calc_ds_index(lseg, i); +} + +static struct nfs_fh * +select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i) +{ + struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); + + if (flseg->stripe_type == STRIPE_SPARSE) { + if (flseg->num_fh == 1) + i = 0; + else if (flseg->num_fh == 0) + /* Use the MDS OPEN fh set in nfs_read_rpcsetup */ + return NULL; + } + return flseg->fh_array[i]; +} + +static int filelayout_initiate_commit(struct nfs_commit_data *data, int how) +{ + struct pnfs_layout_segment *lseg = data->lseg; + struct nfs4_pnfs_ds *ds; + struct rpc_clnt *ds_clnt; + u32 idx; + struct nfs_fh *fh; + + idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); + ds = nfs4_fl_prepare_ds(lseg, idx); + if (!ds) + goto out_err; + + ds_clnt = nfs4_find_or_create_ds_client(ds->ds_clp, data->inode); + if (IS_ERR(ds_clnt)) + goto out_err; + + dprintk("%s ino %lu, how %d cl_count %d\n", __func__, + data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count)); + data->commit_done_cb = filelayout_commit_done_cb; + atomic_inc(&ds->ds_clp->cl_count); + data->ds_clp = ds->ds_clp; + fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); + if (fh) + data->args.fh = fh; + return nfs_initiate_commit(ds_clnt, data, + &filelayout_commit_call_ops, how, + RPC_TASK_SOFTCONN); +out_err: + prepare_to_resend_writes(data); + filelayout_commit_release(data); + return -EAGAIN; +} + +static int +transfer_commit_list(struct list_head *src, struct list_head *dst, + struct nfs_commit_info *cinfo, int max) +{ + struct nfs_page *req, *tmp; + int ret = 0; + + list_for_each_entry_safe(req, tmp, src, wb_list) { + if (!nfs_lock_request(req)) + continue; + kref_get(&req->wb_kref); + if (cond_resched_lock(cinfo->lock)) + list_safe_reset_next(req, tmp, wb_list); + nfs_request_remove_commit_list(req, cinfo); + clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); + nfs_list_add_request(req, dst); + ret++; + if ((ret == max) && !cinfo->dreq) + break; + } + return ret; +} + +/* Note called with cinfo->lock held. */ +static int +filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, + struct nfs_commit_info *cinfo, + int max) +{ + struct list_head *src = &bucket->written; + struct list_head *dst = &bucket->committing; + int ret; + + ret = transfer_commit_list(src, dst, cinfo, max); + if (ret) { + cinfo->ds->nwritten -= ret; + cinfo->ds->ncommitting += ret; + bucket->clseg = bucket->wlseg; + if (list_empty(src)) + bucket->wlseg = NULL; + else + pnfs_get_lseg(bucket->clseg); + } + return ret; +} + +/* Move reqs from written to committing lists, returning count of number moved. + * Note called with cinfo->lock held. + */ +static int filelayout_scan_commit_lists(struct nfs_commit_info *cinfo, + int max) +{ + int i, rv = 0, cnt; + + for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) { + cnt = filelayout_scan_ds_commit_list(&cinfo->ds->buckets[i], + cinfo, max); + max -= cnt; + rv += cnt; + } + return rv; +} + +/* Pull everything off the committing lists and dump into @dst */ +static void filelayout_recover_commit_reqs(struct list_head *dst, + struct nfs_commit_info *cinfo) +{ + struct pnfs_commit_bucket *b; + struct pnfs_layout_segment *freeme; + int i; + +restart: + spin_lock(cinfo->lock); + for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { + if (transfer_commit_list(&b->written, dst, cinfo, 0)) { + freeme = b->wlseg; + b->wlseg = NULL; + spin_unlock(cinfo->lock); + pnfs_put_lseg(freeme); + goto restart; + } + } + cinfo->ds->nwritten = 0; + spin_unlock(cinfo->lock); +} + +static unsigned int +alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list) +{ + struct pnfs_ds_commit_info *fl_cinfo; + struct pnfs_commit_bucket *bucket; + struct nfs_commit_data *data; + int i, j; + unsigned int nreq = 0; + struct pnfs_layout_segment *freeme; + + fl_cinfo = cinfo->ds; + bucket = fl_cinfo->buckets; + for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) { + if (list_empty(&bucket->committing)) + continue; + data = nfs_commitdata_alloc(); + if (!data) + break; + data->ds_commit_index = i; + spin_lock(cinfo->lock); + data->lseg = bucket->clseg; + bucket->clseg = NULL; + spin_unlock(cinfo->lock); + list_add(&data->pages, list); + nreq++; + } + + /* Clean up on error */ + for (j = i; j < fl_cinfo->nbuckets; j++, bucket++) { + if (list_empty(&bucket->committing)) + continue; + nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo); + spin_lock(cinfo->lock); + freeme = bucket->clseg; + bucket->clseg = NULL; + spin_unlock(cinfo->lock); + pnfs_put_lseg(freeme); + } + /* Caller will clean up entries put on list */ + return nreq; +} + +/* This follows nfs_commit_list pretty closely */ +static int +filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, + int how, struct nfs_commit_info *cinfo) +{ + struct nfs_commit_data *data, *tmp; + LIST_HEAD(list); + unsigned int nreq = 0; + + if (!list_empty(mds_pages)) { + data = nfs_commitdata_alloc(); + if (data != NULL) { + data->lseg = NULL; + list_add(&data->pages, &list); + nreq++; + } else + nfs_retry_commit(mds_pages, NULL, cinfo); + } + + nreq += alloc_ds_commits(cinfo, &list); + + if (nreq == 0) { + cinfo->completion_ops->error_cleanup(NFS_I(inode)); + goto out; + } + + atomic_add(nreq, &cinfo->mds->rpcs_out); + + list_for_each_entry_safe(data, tmp, &list, pages) { + list_del_init(&data->pages); + if (!data->lseg) { + nfs_init_commit(data, mds_pages, NULL, cinfo); + nfs_initiate_commit(NFS_CLIENT(inode), data, + data->mds_ops, how, 0); + } else { + struct pnfs_commit_bucket *buckets; + + buckets = cinfo->ds->buckets; + nfs_init_commit(data, &buckets[data->ds_commit_index].committing, data->lseg, cinfo); + filelayout_initiate_commit(data, how); + } + } +out: + cinfo->ds->ncommitting = 0; + return PNFS_ATTEMPTED; +} + +static void +filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d) +{ + nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node)); +} + +static struct pnfs_layout_hdr * +filelayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) +{ + struct nfs4_filelayout *flo; + + flo = kzalloc(sizeof(*flo), gfp_flags); + return flo != NULL ? &flo->generic_hdr : NULL; +} + +static void +filelayout_free_layout_hdr(struct pnfs_layout_hdr *lo) +{ + kfree(FILELAYOUT_FROM_HDR(lo)); +} + +static struct pnfs_ds_commit_info * +filelayout_get_ds_info(struct inode *inode) +{ + struct pnfs_layout_hdr *layout = NFS_I(inode)->layout; + + if (layout == NULL) + return NULL; + else + return &FILELAYOUT_FROM_HDR(layout)->commit_info; +} + +static struct pnfs_layoutdriver_type filelayout_type = { + .id = LAYOUT_NFSV4_1_FILES, + .name = "LAYOUT_NFSV4_1_FILES", + .owner = THIS_MODULE, + .alloc_layout_hdr = filelayout_alloc_layout_hdr, + .free_layout_hdr = filelayout_free_layout_hdr, + .alloc_lseg = filelayout_alloc_lseg, + .free_lseg = filelayout_free_lseg, + .pg_read_ops = &filelayout_pg_read_ops, + .pg_write_ops = &filelayout_pg_write_ops, + .get_ds_info = &filelayout_get_ds_info, + .mark_request_commit = filelayout_mark_request_commit, + .clear_request_commit = filelayout_clear_request_commit, + .scan_commit_lists = filelayout_scan_commit_lists, + .recover_commit_reqs = filelayout_recover_commit_reqs, + .commit_pagelist = filelayout_commit_pagelist, + .read_pagelist = filelayout_read_pagelist, + .write_pagelist = filelayout_write_pagelist, + .free_deviceid_node = filelayout_free_deveiceid_node, +}; + +static int __init nfs4filelayout_init(void) +{ + printk(KERN_INFO "%s: NFSv4 File Layout Driver Registering...\n", + __func__); + return pnfs_register_layoutdriver(&filelayout_type); +} + +static void __exit nfs4filelayout_exit(void) +{ + printk(KERN_INFO "%s: NFSv4 File Layout Driver Unregistering...\n", + __func__); + pnfs_unregister_layoutdriver(&filelayout_type); +} + +MODULE_ALIAS("nfs-layouttype4-1"); + +module_init(nfs4filelayout_init); +module_exit(nfs4filelayout_exit); diff --git a/fs/nfs/filelayout/filelayout.h b/fs/nfs/filelayout/filelayout.h new file mode 100644 index 0000000..ffbddf2 --- /dev/null +++ b/fs/nfs/filelayout/filelayout.h @@ -0,0 +1,156 @@ +/* + * NFSv4 file layout driver data structures. + * + * Copyright (c) 2002 + * The Regents of the University of Michigan + * All Rights Reserved + * + * Dean Hildebrand + * + * Permission is granted to use, copy, create derivative works, and + * redistribute this software and such derivative works for any purpose, + * so long as the name of the University of Michigan is not used in + * any advertising or publicity pertaining to the use or distribution + * of this software without specific, written prior authorization. If + * the above copyright notice or any other identification of the + * University of Michigan is included in any copy of any portion of + * this software, then the disclaimer below must also be included. + * + * This software is provided as is, without representation or warranty + * of any kind either express or implied, including without limitation + * the implied warranties of merchantability, fitness for a particular + * purpose, or noninfringement. The Regents of the University of + * Michigan shall not be liable for any damages, including special, + * indirect, incidental, or consequential damages, with respect to any + * claim arising out of or in connection with the use of the software, + * even if it has been or is hereafter advised of the possibility of + * such damages. + */ + +#ifndef FS_NFS_NFS4FILELAYOUT_H +#define FS_NFS_NFS4FILELAYOUT_H + +#include "../pnfs.h" + +/* + * Default data server connection timeout and retrans vaules. + * Set by module paramters dataserver_timeo and dataserver_retrans. + */ +#define NFS4_DEF_DS_TIMEO 600 /* in tenths of a second */ +#define NFS4_DEF_DS_RETRANS 5 + +/* + * Field testing shows we need to support up to 4096 stripe indices. + * We store each index as a u8 (u32 on the wire) to keep the memory footprint + * reasonable. This in turn means we support a maximum of 256 + * RFC 5661 multipath_list4 structures. + */ +#define NFS4_PNFS_MAX_STRIPE_CNT 4096 +#define NFS4_PNFS_MAX_MULTI_CNT 256 /* 256 fit into a u8 stripe_index */ + +/* error codes for internal use */ +#define NFS4ERR_RESET_TO_MDS 12001 + +enum stripetype4 { + STRIPE_SPARSE = 1, + STRIPE_DENSE = 2 +}; + +/* Individual ip address */ +struct nfs4_pnfs_ds_addr { + struct sockaddr_storage da_addr; + size_t da_addrlen; + struct list_head da_node; /* nfs4_pnfs_dev_hlist dev_dslist */ + char *da_remotestr; /* human readable addr+port */ +}; + +struct nfs4_pnfs_ds { + struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */ + char *ds_remotestr; /* comma sep list of addrs */ + struct list_head ds_addrs; + struct nfs_client *ds_clp; + atomic_t ds_count; + unsigned long ds_state; +#define NFS4DS_CONNECTING 0 /* ds is establishing connection */ +}; + +struct nfs4_file_layout_dsaddr { + struct nfs4_deviceid_node id_node; + u32 stripe_count; + u8 *stripe_indices; + u32 ds_num; + struct nfs4_pnfs_ds *ds_list[1]; +}; + +struct nfs4_filelayout_segment { + struct pnfs_layout_segment generic_hdr; + u32 stripe_type; + u32 commit_through_mds; + u32 stripe_unit; + u32 first_stripe_index; + u64 pattern_offset; + struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ + unsigned int num_fh; + struct nfs_fh **fh_array; +}; + +struct nfs4_filelayout { + struct pnfs_layout_hdr generic_hdr; + struct pnfs_ds_commit_info commit_info; +}; + +static inline struct nfs4_filelayout * +FILELAYOUT_FROM_HDR(struct pnfs_layout_hdr *lo) +{ + return container_of(lo, struct nfs4_filelayout, generic_hdr); +} + +static inline struct nfs4_filelayout_segment * +FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg) +{ + return container_of(lseg, + struct nfs4_filelayout_segment, + generic_hdr); +} + +static inline struct nfs4_deviceid_node * +FILELAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg) +{ + return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node; +} + +static inline void +filelayout_mark_devid_invalid(struct nfs4_deviceid_node *node) +{ + u32 *p = (u32 *)&node->deviceid; + + printk(KERN_WARNING "NFS: Deviceid [%x%x%x%x] marked out of use.\n", + p[0], p[1], p[2], p[3]); + + set_bit(NFS_DEVICEID_INVALID, &node->flags); +} + +static inline bool +filelayout_test_devid_invalid(struct nfs4_deviceid_node *node) +{ + return test_bit(NFS_DEVICEID_INVALID, &node->flags); +} + +extern bool +filelayout_test_devid_unavailable(struct nfs4_deviceid_node *node); + +extern struct nfs_fh * +nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); + +extern void print_ds(struct nfs4_pnfs_ds *ds); +u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset); +u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j); +struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, + u32 ds_idx); +extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); +extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); +struct nfs4_file_layout_dsaddr * +filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, + struct rpc_cred *cred, gfp_t gfp_flags); + +#endif /* FS_NFS_NFS4FILELAYOUT_H */ diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c new file mode 100644 index 0000000..7c85390 --- /dev/null +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -0,0 +1,843 @@ +/* + * Device operations for the pnfs nfs4 file layout driver. + * + * Copyright (c) 2002 + * The Regents of the University of Michigan + * All Rights Reserved + * + * Dean Hildebrand + * Garth Goodson + * + * Permission is granted to use, copy, create derivative works, and + * redistribute this software and such derivative works for any purpose, + * so long as the name of the University of Michigan is not used in + * any advertising or publicity pertaining to the use or distribution + * of this software without specific, written prior authorization. If + * the above copyright notice or any other identification of the + * University of Michigan is included in any copy of any portion of + * this software, then the disclaimer below must also be included. + * + * This software is provided as is, without representation or warranty + * of any kind either express or implied, including without limitation + * the implied warranties of merchantability, fitness for a particular + * purpose, or noninfringement. The Regents of the University of + * Michigan shall not be liable for any damages, including special, + * indirect, incidental, or consequential damages, with respect to any + * claim arising out of or in connection with the use of the software, + * even if it has been or is hereafter advised of the possibility of + * such damages. + */ + +#include +#include +#include +#include + +#include "../internal.h" +#include "../nfs4session.h" +#include "filelayout.h" + +#define NFSDBG_FACILITY NFSDBG_PNFS_LD + +static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO; +static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS; + +/* + * Data server cache + * + * Data servers can be mapped to different device ids. + * nfs4_pnfs_ds reference counting + * - set to 1 on allocation + * - incremented when a device id maps a data server already in the cache. + * - decremented when deviceid is removed from the cache. + */ +static DEFINE_SPINLOCK(nfs4_ds_cache_lock); +static LIST_HEAD(nfs4_data_server_cache); + +/* Debug routines */ +void +print_ds(struct nfs4_pnfs_ds *ds) +{ + if (ds == NULL) { + printk("%s NULL device\n", __func__); + return; + } + printk(" ds %s\n" + " ref count %d\n" + " client %p\n" + " cl_exchange_flags %x\n", + ds->ds_remotestr, + atomic_read(&ds->ds_count), ds->ds_clp, + ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); +} + +static bool +same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2) +{ + struct sockaddr_in *a, *b; + struct sockaddr_in6 *a6, *b6; + + if (addr1->sa_family != addr2->sa_family) + return false; + + switch (addr1->sa_family) { + case AF_INET: + a = (struct sockaddr_in *)addr1; + b = (struct sockaddr_in *)addr2; + + if (a->sin_addr.s_addr == b->sin_addr.s_addr && + a->sin_port == b->sin_port) + return true; + break; + + case AF_INET6: + a6 = (struct sockaddr_in6 *)addr1; + b6 = (struct sockaddr_in6 *)addr2; + + /* LINKLOCAL addresses must have matching scope_id */ + if (ipv6_addr_src_scope(&a6->sin6_addr) == + IPV6_ADDR_SCOPE_LINKLOCAL && + a6->sin6_scope_id != b6->sin6_scope_id) + return false; + + if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) && + a6->sin6_port == b6->sin6_port) + return true; + break; + + default: + dprintk("%s: unhandled address family: %u\n", + __func__, addr1->sa_family); + return false; + } + + return false; +} + +static bool +_same_data_server_addrs_locked(const struct list_head *dsaddrs1, + const struct list_head *dsaddrs2) +{ + struct nfs4_pnfs_ds_addr *da1, *da2; + + /* step through both lists, comparing as we go */ + for (da1 = list_first_entry(dsaddrs1, typeof(*da1), da_node), + da2 = list_first_entry(dsaddrs2, typeof(*da2), da_node); + da1 != NULL && da2 != NULL; + da1 = list_entry(da1->da_node.next, typeof(*da1), da_node), + da2 = list_entry(da2->da_node.next, typeof(*da2), da_node)) { + if (!same_sockaddr((struct sockaddr *)&da1->da_addr, + (struct sockaddr *)&da2->da_addr)) + return false; + } + if (da1 == NULL && da2 == NULL) + return true; + + return false; +} + +/* + * Lookup DS by addresses. nfs4_ds_cache_lock is held + */ +static struct nfs4_pnfs_ds * +_data_server_lookup_locked(const struct list_head *dsaddrs) +{ + struct nfs4_pnfs_ds *ds; + + list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) + if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs)) + return ds; + return NULL; +} + +/* + * Create an rpc connection to the nfs4_pnfs_ds data server + * Currently only supports IPv4 and IPv6 addresses + */ +static int +nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) +{ + struct nfs_client *clp = ERR_PTR(-EIO); + struct nfs4_pnfs_ds_addr *da; + int status = 0; + + dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr, + mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor); + + list_for_each_entry(da, &ds->ds_addrs, da_node) { + dprintk("%s: DS %s: trying address %s\n", + __func__, ds->ds_remotestr, da->da_remotestr); + + clp = nfs4_set_ds_client(mds_srv->nfs_client, + (struct sockaddr *)&da->da_addr, + da->da_addrlen, IPPROTO_TCP, + dataserver_timeo, dataserver_retrans); + if (!IS_ERR(clp)) + break; + } + + if (IS_ERR(clp)) { + status = PTR_ERR(clp); + goto out; + } + + status = nfs4_init_ds_session(clp, mds_srv->nfs_client->cl_lease_time); + if (status) + goto out_put; + + smp_wmb(); + ds->ds_clp = clp; + dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); +out: + return status; +out_put: + nfs_put_client(clp); + goto out; +} + +static void +destroy_ds(struct nfs4_pnfs_ds *ds) +{ + struct nfs4_pnfs_ds_addr *da; + + dprintk("--> %s\n", __func__); + ifdebug(FACILITY) + print_ds(ds); + + if (ds->ds_clp) + nfs_put_client(ds->ds_clp); + + while (!list_empty(&ds->ds_addrs)) { + da = list_first_entry(&ds->ds_addrs, + struct nfs4_pnfs_ds_addr, + da_node); + list_del_init(&da->da_node); + kfree(da->da_remotestr); + kfree(da); + } + + kfree(ds->ds_remotestr); + kfree(ds); +} + +void +nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) +{ + struct nfs4_pnfs_ds *ds; + int i; + + nfs4_print_deviceid(&dsaddr->id_node.deviceid); + + for (i = 0; i < dsaddr->ds_num; i++) { + ds = dsaddr->ds_list[i]; + if (ds != NULL) { + if (atomic_dec_and_lock(&ds->ds_count, + &nfs4_ds_cache_lock)) { + list_del_init(&ds->ds_node); + spin_unlock(&nfs4_ds_cache_lock); + destroy_ds(ds); + } + } + } + kfree(dsaddr->stripe_indices); + kfree(dsaddr); +} + +/* + * Create a string with a human readable address and port to avoid + * complicated setup around many dprinks. + */ +static char * +nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags) +{ + struct nfs4_pnfs_ds_addr *da; + char *remotestr; + size_t len; + char *p; + + len = 3; /* '{', '}' and eol */ + list_for_each_entry(da, dsaddrs, da_node) { + len += strlen(da->da_remotestr) + 1; /* string plus comma */ + } + + remotestr = kzalloc(len, gfp_flags); + if (!remotestr) + return NULL; + + p = remotestr; + *(p++) = '{'; + len--; + list_for_each_entry(da, dsaddrs, da_node) { + size_t ll = strlen(da->da_remotestr); + + if (ll > len) + goto out_err; + + memcpy(p, da->da_remotestr, ll); + p += ll; + len -= ll; + + if (len < 1) + goto out_err; + (*p++) = ','; + len--; + } + if (len < 2) + goto out_err; + *(p++) = '}'; + *p = '\0'; + return remotestr; +out_err: + kfree(remotestr); + return NULL; +} + +static struct nfs4_pnfs_ds * +nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) +{ + struct nfs4_pnfs_ds *tmp_ds, *ds = NULL; + char *remotestr; + + if (list_empty(dsaddrs)) { + dprintk("%s: no addresses defined\n", __func__); + goto out; + } + + ds = kzalloc(sizeof(*ds), gfp_flags); + if (!ds) + goto out; + + /* this is only used for debugging, so it's ok if its NULL */ + remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags); + + spin_lock(&nfs4_ds_cache_lock); + tmp_ds = _data_server_lookup_locked(dsaddrs); + if (tmp_ds == NULL) { + INIT_LIST_HEAD(&ds->ds_addrs); + list_splice_init(dsaddrs, &ds->ds_addrs); + ds->ds_remotestr = remotestr; + atomic_set(&ds->ds_count, 1); + INIT_LIST_HEAD(&ds->ds_node); + ds->ds_clp = NULL; + list_add(&ds->ds_node, &nfs4_data_server_cache); + dprintk("%s add new data server %s\n", __func__, + ds->ds_remotestr); + } else { + kfree(remotestr); + kfree(ds); + atomic_inc(&tmp_ds->ds_count); + dprintk("%s data server %s found, inc'ed ds_count to %d\n", + __func__, tmp_ds->ds_remotestr, + atomic_read(&tmp_ds->ds_count)); + ds = tmp_ds; + } + spin_unlock(&nfs4_ds_cache_lock); +out: + return ds; +} + +/* + * Currently only supports ipv4, ipv6 and one multi-path address. + */ +static struct nfs4_pnfs_ds_addr * +decode_ds_addr(struct net *net, struct xdr_stream *streamp, gfp_t gfp_flags) +{ + struct nfs4_pnfs_ds_addr *da = NULL; + char *buf, *portstr; + __be16 port; + int nlen, rlen; + int tmp[2]; + __be32 *p; + char *netid, *match_netid; + size_t len, match_netid_len; + char *startsep = ""; + char *endsep = ""; + + + /* r_netid */ + p = xdr_inline_decode(streamp, 4); + if (unlikely(!p)) + goto out_err; + nlen = be32_to_cpup(p++); + + p = xdr_inline_decode(streamp, nlen); + if (unlikely(!p)) + goto out_err; + + netid = kmalloc(nlen+1, gfp_flags); + if (unlikely(!netid)) + goto out_err; + + netid[nlen] = '\0'; + memcpy(netid, p, nlen); + + /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */ + p = xdr_inline_decode(streamp, 4); + if (unlikely(!p)) + goto out_free_netid; + rlen = be32_to_cpup(p); + + p = xdr_inline_decode(streamp, rlen); + if (unlikely(!p)) + goto out_free_netid; + + /* port is ".ABC.DEF", 8 chars max */ + if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) { + dprintk("%s: Invalid address, length %d\n", __func__, + rlen); + goto out_free_netid; + } + buf = kmalloc(rlen + 1, gfp_flags); + if (!buf) { + dprintk("%s: Not enough memory\n", __func__); + goto out_free_netid; + } + buf[rlen] = '\0'; + memcpy(buf, p, rlen); + + /* replace port '.' with '-' */ + portstr = strrchr(buf, '.'); + if (!portstr) { + dprintk("%s: Failed finding expected dot in port\n", + __func__); + goto out_free_buf; + } + *portstr = '-'; + + /* find '.' between address and port */ + portstr = strrchr(buf, '.'); + if (!portstr) { + dprintk("%s: Failed finding expected dot between address and " + "port\n", __func__); + goto out_free_buf; + } + *portstr = '\0'; + + da = kzalloc(sizeof(*da), gfp_flags); + if (unlikely(!da)) + goto out_free_buf; + + INIT_LIST_HEAD(&da->da_node); + + if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr, + sizeof(da->da_addr))) { + dprintk("%s: error parsing address %s\n", __func__, buf); + goto out_free_da; + } + + portstr++; + sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]); + port = htons((tmp[0] << 8) | (tmp[1])); + + switch (da->da_addr.ss_family) { + case AF_INET: + ((struct sockaddr_in *)&da->da_addr)->sin_port = port; + da->da_addrlen = sizeof(struct sockaddr_in); + match_netid = "tcp"; + match_netid_len = 3; + break; + + case AF_INET6: + ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port; + da->da_addrlen = sizeof(struct sockaddr_in6); + match_netid = "tcp6"; + match_netid_len = 4; + startsep = "["; + endsep = "]"; + break; + + default: + dprintk("%s: unsupported address family: %u\n", + __func__, da->da_addr.ss_family); + goto out_free_da; + } + + if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) { + dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n", + __func__, netid, match_netid); + goto out_free_da; + } + + /* save human readable address */ + len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7; + da->da_remotestr = kzalloc(len, gfp_flags); + + /* NULL is ok, only used for dprintk */ + if (da->da_remotestr) + snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep, + buf, endsep, ntohs(port)); + + dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr); + kfree(buf); + kfree(netid); + return da; + +out_free_da: + kfree(da); +out_free_buf: + dprintk("%s: Error parsing DS addr: %s\n", __func__, buf); + kfree(buf); +out_free_netid: + kfree(netid); +out_err: + return NULL; +} + +/* Decode opaque device data and return the result */ +static struct nfs4_file_layout_dsaddr* +decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) +{ + int i; + u32 cnt, num; + u8 *indexp; + __be32 *p; + u8 *stripe_indices; + u8 max_stripe_index; + struct nfs4_file_layout_dsaddr *dsaddr = NULL; + struct xdr_stream stream; + struct xdr_buf buf; + struct page *scratch; + struct list_head dsaddrs; + struct nfs4_pnfs_ds_addr *da; + + /* set up xdr stream */ + scratch = alloc_page(gfp_flags); + if (!scratch) + goto out_err; + + xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); + xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); + + /* Get the stripe count (number of stripe index) */ + p = xdr_inline_decode(&stream, 4); + if (unlikely(!p)) + goto out_err_free_scratch; + + cnt = be32_to_cpup(p); + dprintk("%s stripe count %d\n", __func__, cnt); + if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) { + printk(KERN_WARNING "NFS: %s: stripe count %d greater than " + "supported maximum %d\n", __func__, + cnt, NFS4_PNFS_MAX_STRIPE_CNT); + goto out_err_free_scratch; + } + + /* read stripe indices */ + stripe_indices = kcalloc(cnt, sizeof(u8), gfp_flags); + if (!stripe_indices) + goto out_err_free_scratch; + + p = xdr_inline_decode(&stream, cnt << 2); + if (unlikely(!p)) + goto out_err_free_stripe_indices; + + indexp = &stripe_indices[0]; + max_stripe_index = 0; + for (i = 0; i < cnt; i++) { + *indexp = be32_to_cpup(p++); + max_stripe_index = max(max_stripe_index, *indexp); + indexp++; + } + + /* Check the multipath list count */ + p = xdr_inline_decode(&stream, 4); + if (unlikely(!p)) + goto out_err_free_stripe_indices; + + num = be32_to_cpup(p); + dprintk("%s ds_num %u\n", __func__, num); + if (num > NFS4_PNFS_MAX_MULTI_CNT) { + printk(KERN_WARNING "NFS: %s: multipath count %d greater than " + "supported maximum %d\n", __func__, + num, NFS4_PNFS_MAX_MULTI_CNT); + goto out_err_free_stripe_indices; + } + + /* validate stripe indices are all < num */ + if (max_stripe_index >= num) { + printk(KERN_WARNING "NFS: %s: stripe index %u >= num ds %u\n", + __func__, max_stripe_index, num); + goto out_err_free_stripe_indices; + } + + dsaddr = kzalloc(sizeof(*dsaddr) + + (sizeof(struct nfs4_pnfs_ds *) * (num - 1)), + gfp_flags); + if (!dsaddr) + goto out_err_free_stripe_indices; + + dsaddr->stripe_count = cnt; + dsaddr->stripe_indices = stripe_indices; + stripe_indices = NULL; + dsaddr->ds_num = num; + nfs4_init_deviceid_node(&dsaddr->id_node, + NFS_SERVER(ino)->pnfs_curr_ld, + NFS_SERVER(ino)->nfs_client, + &pdev->dev_id); + + INIT_LIST_HEAD(&dsaddrs); + + for (i = 0; i < dsaddr->ds_num; i++) { + int j; + u32 mp_count; + + p = xdr_inline_decode(&stream, 4); + if (unlikely(!p)) + goto out_err_free_deviceid; + + mp_count = be32_to_cpup(p); /* multipath count */ + for (j = 0; j < mp_count; j++) { + da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->cl_net, + &stream, gfp_flags); + if (da) + list_add_tail(&da->da_node, &dsaddrs); + } + if (list_empty(&dsaddrs)) { + dprintk("%s: no suitable DS addresses found\n", + __func__); + goto out_err_free_deviceid; + } + + dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags); + if (!dsaddr->ds_list[i]) + goto out_err_drain_dsaddrs; + + /* If DS was already in cache, free ds addrs */ + while (!list_empty(&dsaddrs)) { + da = list_first_entry(&dsaddrs, + struct nfs4_pnfs_ds_addr, + da_node); + list_del_init(&da->da_node); + kfree(da->da_remotestr); + kfree(da); + } + } + + __free_page(scratch); + return dsaddr; + +out_err_drain_dsaddrs: + while (!list_empty(&dsaddrs)) { + da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr, + da_node); + list_del_init(&da->da_node); + kfree(da->da_remotestr); + kfree(da); + } +out_err_free_deviceid: + nfs4_fl_free_deviceid(dsaddr); + /* stripe_indicies was part of dsaddr */ + goto out_err_free_scratch; +out_err_free_stripe_indices: + kfree(stripe_indices); +out_err_free_scratch: + __free_page(scratch); +out_err: + dprintk("%s ERROR: returning NULL\n", __func__); + return NULL; +} + +/* + * Decode the opaque device specified in 'dev' and add it to the cache of + * available devices. + */ +static struct nfs4_file_layout_dsaddr * +decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_flags) +{ + struct nfs4_deviceid_node *d; + struct nfs4_file_layout_dsaddr *n, *new; + + new = decode_device(inode, dev, gfp_flags); + if (!new) { + printk(KERN_WARNING "NFS: %s: Could not decode or add device\n", + __func__); + return NULL; + } + + d = nfs4_insert_deviceid_node(&new->id_node); + n = container_of(d, struct nfs4_file_layout_dsaddr, id_node); + if (n != new) { + nfs4_fl_free_deviceid(new); + return n; + } + + return new; +} + +/* + * Retrieve the information for dev_id, add it to the list + * of available devices, and return it. + */ +struct nfs4_file_layout_dsaddr * +filelayout_get_device_info(struct inode *inode, + struct nfs4_deviceid *dev_id, + struct rpc_cred *cred, + gfp_t gfp_flags) +{ + struct pnfs_device *pdev = NULL; + u32 max_resp_sz; + int max_pages; + struct page **pages = NULL; + struct nfs4_file_layout_dsaddr *dsaddr = NULL; + int rc, i; + struct nfs_server *server = NFS_SERVER(inode); + + /* + * Use the session max response size as the basis for setting + * GETDEVICEINFO's maxcount + */ + max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; + max_pages = nfs_page_array_len(0, max_resp_sz); + dprintk("%s inode %p max_resp_sz %u max_pages %d\n", + __func__, inode, max_resp_sz, max_pages); + + pdev = kzalloc(sizeof(struct pnfs_device), gfp_flags); + if (pdev == NULL) + return NULL; + + pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags); + if (pages == NULL) { + kfree(pdev); + return NULL; + } + for (i = 0; i < max_pages; i++) { + pages[i] = alloc_page(gfp_flags); + if (!pages[i]) + goto out_free; + } + + memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id)); + pdev->layout_type = LAYOUT_NFSV4_1_FILES; + pdev->pages = pages; + pdev->pgbase = 0; + pdev->pglen = max_resp_sz; + pdev->mincount = 0; + pdev->maxcount = max_resp_sz - nfs41_maxgetdevinfo_overhead; + + rc = nfs4_proc_getdeviceinfo(server, pdev, cred); + dprintk("%s getdevice info returns %d\n", __func__, rc); + if (rc) + goto out_free; + + /* + * Found new device, need to decode it and then add it to the + * list of known devices for this mountpoint. + */ + dsaddr = decode_and_add_device(inode, pdev, gfp_flags); +out_free: + for (i = 0; i < max_pages; i++) + __free_page(pages[i]); + kfree(pages); + kfree(pdev); + dprintk("<-- %s dsaddr %p\n", __func__, dsaddr); + return dsaddr; +} + +void +nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) +{ + nfs4_put_deviceid_node(&dsaddr->id_node); +} + +/* + * Want res = (offset - layout->pattern_offset)/ layout->stripe_unit + * Then: ((res + fsi) % dsaddr->stripe_count) + */ +u32 +nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset) +{ + struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); + u64 tmp; + + tmp = offset - flseg->pattern_offset; + do_div(tmp, flseg->stripe_unit); + tmp += flseg->first_stripe_index; + return do_div(tmp, flseg->dsaddr->stripe_count); +} + +u32 +nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j) +{ + return FILELAYOUT_LSEG(lseg)->dsaddr->stripe_indices[j]; +} + +struct nfs_fh * +nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j) +{ + struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); + u32 i; + + if (flseg->stripe_type == STRIPE_SPARSE) { + if (flseg->num_fh == 1) + i = 0; + else if (flseg->num_fh == 0) + /* Use the MDS OPEN fh set in nfs_read_rpcsetup */ + return NULL; + else + i = nfs4_fl_calc_ds_index(lseg, j); + } else + i = j; + return flseg->fh_array[i]; +} + +static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds) +{ + might_sleep(); + wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING, + nfs_wait_bit_killable, TASK_KILLABLE); +} + +static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds) +{ + smp_mb__before_clear_bit(); + clear_bit(NFS4DS_CONNECTING, &ds->ds_state); + smp_mb__after_clear_bit(); + wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING); +} + + +struct nfs4_pnfs_ds * +nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) +{ + struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr; + struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; + struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); + struct nfs4_pnfs_ds *ret = ds; + + if (ds == NULL) { + printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", + __func__, ds_idx); + filelayout_mark_devid_invalid(devid); + goto out; + } + smp_rmb(); + if (ds->ds_clp) + goto out_test_devid; + + if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) { + struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode); + int err; + + err = nfs4_ds_connect(s, ds); + if (err) + nfs4_mark_deviceid_unavailable(devid); + nfs4_clear_ds_conn_bit(ds); + } else { + /* Either ds is connected, or ds is NULL */ + nfs4_wait_ds_connect(ds); + } +out_test_devid: + if (filelayout_test_devid_unavailable(devid)) + ret = NULL; +out: + return ret; +} + +module_param(dataserver_retrans, uint, 0644); +MODULE_PARM_DESC(dataserver_retrans, "The number of times the NFSv4.1 client " + "retries a request before it attempts further " + " recovery action."); +module_param(dataserver_timeo, uint, 0644); +MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the " + "NFSv4.1 client waits for a response from a " + " data server before it retries an NFS request."); diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c deleted file mode 100644 index 21e9211..0000000 --- a/fs/nfs/nfs4filelayout.c +++ /dev/null @@ -1,1409 +0,0 @@ -/* - * Module for the pnfs nfs4 file layout driver. - * Defines all I/O and Policy interface operations, plus code - * to register itself with the pNFS client. - * - * Copyright (c) 2002 - * The Regents of the University of Michigan - * All Rights Reserved - * - * Dean Hildebrand - * - * Permission is granted to use, copy, create derivative works, and - * redistribute this software and such derivative works for any purpose, - * so long as the name of the University of Michigan is not used in - * any advertising or publicity pertaining to the use or distribution - * of this software without specific, written prior authorization. If - * the above copyright notice or any other identification of the - * University of Michigan is included in any copy of any portion of - * this software, then the disclaimer below must also be included. - * - * This software is provided as is, without representation or warranty - * of any kind either express or implied, including without limitation - * the implied warranties of merchantability, fitness for a particular - * purpose, or noninfringement. The Regents of the University of - * Michigan shall not be liable for any damages, including special, - * indirect, incidental, or consequential damages, with respect to any - * claim arising out of or in connection with the use of the software, - * even if it has been or is hereafter advised of the possibility of - * such damages. - */ - -#include -#include -#include - -#include - -#include "nfs4session.h" -#include "internal.h" -#include "delegation.h" -#include "nfs4filelayout.h" -#include "nfs4trace.h" - -#define NFSDBG_FACILITY NFSDBG_PNFS_LD - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Dean Hildebrand "); -MODULE_DESCRIPTION("The NFSv4 file layout driver"); - -#define FILELAYOUT_POLL_RETRY_MAX (15*HZ) - -static loff_t -filelayout_get_dense_offset(struct nfs4_filelayout_segment *flseg, - loff_t offset) -{ - u32 stripe_width = flseg->stripe_unit * flseg->dsaddr->stripe_count; - u64 stripe_no; - u32 rem; - - offset -= flseg->pattern_offset; - stripe_no = div_u64(offset, stripe_width); - div_u64_rem(offset, flseg->stripe_unit, &rem); - - return stripe_no * flseg->stripe_unit + rem; -} - -/* This function is used by the layout driver to calculate the - * offset of the file on the dserver based on whether the - * layout type is STRIPE_DENSE or STRIPE_SPARSE - */ -static loff_t -filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset) -{ - struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); - - switch (flseg->stripe_type) { - case STRIPE_SPARSE: - return offset; - - case STRIPE_DENSE: - return filelayout_get_dense_offset(flseg, offset); - } - - BUG(); -} - -static void filelayout_reset_write(struct nfs_pgio_data *data) -{ - struct nfs_pgio_header *hdr = data->header; - struct rpc_task *task = &data->task; - - if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { - dprintk("%s Reset task %5u for i/o through MDS " - "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, - data->task.tk_pid, - hdr->inode->i_sb->s_id, - (unsigned long long)NFS_FILEID(hdr->inode), - data->args.count, - (unsigned long long)data->args.offset); - - task->tk_status = pnfs_write_done_resend_to_mds(hdr->inode, - &hdr->pages, - hdr->completion_ops, - hdr->dreq); - } -} - -static void filelayout_reset_read(struct nfs_pgio_data *data) -{ - struct nfs_pgio_header *hdr = data->header; - struct rpc_task *task = &data->task; - - if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { - dprintk("%s Reset task %5u for i/o through MDS " - "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, - data->task.tk_pid, - hdr->inode->i_sb->s_id, - (unsigned long long)NFS_FILEID(hdr->inode), - data->args.count, - (unsigned long long)data->args.offset); - - task->tk_status = pnfs_read_done_resend_to_mds(hdr->inode, - &hdr->pages, - hdr->completion_ops, - hdr->dreq); - } -} - -static void filelayout_fenceme(struct inode *inode, struct pnfs_layout_hdr *lo) -{ - if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) - return; - pnfs_return_layout(inode); -} - -static int filelayout_async_handle_error(struct rpc_task *task, - struct nfs4_state *state, - struct nfs_client *clp, - struct pnfs_layout_segment *lseg) -{ - struct pnfs_layout_hdr *lo = lseg->pls_layout; - struct inode *inode = lo->plh_inode; - struct nfs_server *mds_server = NFS_SERVER(inode); - struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); - struct nfs_client *mds_client = mds_server->nfs_client; - struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table; - - if (task->tk_status >= 0) - return 0; - - switch (task->tk_status) { - /* MDS state errors */ - case -NFS4ERR_DELEG_REVOKED: - case -NFS4ERR_ADMIN_REVOKED: - case -NFS4ERR_BAD_STATEID: - if (state == NULL) - break; - nfs_remove_bad_delegation(state->inode); - case -NFS4ERR_OPENMODE: - if (state == NULL) - break; - if (nfs4_schedule_stateid_recovery(mds_server, state) < 0) - goto out_bad_stateid; - goto wait_on_recovery; - case -NFS4ERR_EXPIRED: - if (state != NULL) { - if (nfs4_schedule_stateid_recovery(mds_server, state) < 0) - goto out_bad_stateid; - } - nfs4_schedule_lease_recovery(mds_client); - goto wait_on_recovery; - /* DS session errors */ - case -NFS4ERR_BADSESSION: - case -NFS4ERR_BADSLOT: - case -NFS4ERR_BAD_HIGH_SLOT: - case -NFS4ERR_DEADSESSION: - case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: - case -NFS4ERR_SEQ_FALSE_RETRY: - case -NFS4ERR_SEQ_MISORDERED: - dprintk("%s ERROR %d, Reset session. Exchangeid " - "flags 0x%x\n", __func__, task->tk_status, - clp->cl_exchange_flags); - nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); - break; - case -NFS4ERR_DELAY: - case -NFS4ERR_GRACE: - rpc_delay(task, FILELAYOUT_POLL_RETRY_MAX); - break; - case -NFS4ERR_RETRY_UNCACHED_REP: - break; - /* Invalidate Layout errors */ - case -NFS4ERR_PNFS_NO_LAYOUT: - case -ESTALE: /* mapped NFS4ERR_STALE */ - case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */ - case -EISDIR: /* mapped NFS4ERR_ISDIR */ - case -NFS4ERR_FHEXPIRED: - case -NFS4ERR_WRONG_TYPE: - dprintk("%s Invalid layout error %d\n", __func__, - task->tk_status); - /* - * Destroy layout so new i/o will get a new layout. - * Layout will not be destroyed until all current lseg - * references are put. Mark layout as invalid to resend failed - * i/o and all i/o waiting on the slot table to the MDS until - * layout is destroyed and a new valid layout is obtained. - */ - pnfs_destroy_layout(NFS_I(inode)); - rpc_wake_up(&tbl->slot_tbl_waitq); - goto reset; - /* RPC connection errors */ - case -ECONNREFUSED: - case -EHOSTDOWN: - case -EHOSTUNREACH: - case -ENETUNREACH: - case -EIO: - case -ETIMEDOUT: - case -EPIPE: - dprintk("%s DS connection error %d\n", __func__, - task->tk_status); - nfs4_mark_deviceid_unavailable(devid); - set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); - rpc_wake_up(&tbl->slot_tbl_waitq); - /* fall through */ - default: -reset: - dprintk("%s Retry through MDS. Error %d\n", __func__, - task->tk_status); - return -NFS4ERR_RESET_TO_MDS; - } -out: - task->tk_status = 0; - return -EAGAIN; -out_bad_stateid: - task->tk_status = -EIO; - return 0; -wait_on_recovery: - rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL); - if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0) - rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task); - goto out; -} - -/* NFS_PROTO call done callback routines */ - -static int filelayout_read_done_cb(struct rpc_task *task, - struct nfs_pgio_data *data) -{ - struct nfs_pgio_header *hdr = data->header; - int err; - - trace_nfs4_pnfs_read(data, task->tk_status); - err = filelayout_async_handle_error(task, data->args.context->state, - data->ds_clp, hdr->lseg); - - switch (err) { - case -NFS4ERR_RESET_TO_MDS: - filelayout_reset_read(data); - return task->tk_status; - case -EAGAIN: - rpc_restart_call_prepare(task); - return -EAGAIN; - } - - return 0; -} - -/* - * We reference the rpc_cred of the first WRITE that triggers the need for - * a LAYOUTCOMMIT, and use it to send the layoutcommit compound. - * rfc5661 is not clear about which credential should be used. - */ -static void -filelayout_set_layoutcommit(struct nfs_pgio_data *wdata) -{ - struct nfs_pgio_header *hdr = wdata->header; - - if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds || - wdata->res.verf->committed == NFS_FILE_SYNC) - return; - - pnfs_set_layoutcommit(wdata); - dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, - (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); -} - -bool -filelayout_test_devid_unavailable(struct nfs4_deviceid_node *node) -{ - return filelayout_test_devid_invalid(node) || - nfs4_test_deviceid_unavailable(node); -} - -static bool -filelayout_reset_to_mds(struct pnfs_layout_segment *lseg) -{ - struct nfs4_deviceid_node *node = FILELAYOUT_DEVID_NODE(lseg); - - return filelayout_test_devid_unavailable(node); -} - -/* - * Call ops for the async read/write cases - * In the case of dense layouts, the offset needs to be reset to its - * original value. - */ -static void filelayout_read_prepare(struct rpc_task *task, void *data) -{ - struct nfs_pgio_data *rdata = data; - - if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) { - rpc_exit(task, -EIO); - return; - } - if (filelayout_reset_to_mds(rdata->header->lseg)) { - dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); - filelayout_reset_read(rdata); - rpc_exit(task, 0); - return; - } - rdata->pgio_done_cb = filelayout_read_done_cb; - - if (nfs41_setup_sequence(rdata->ds_clp->cl_session, - &rdata->args.seq_args, - &rdata->res.seq_res, - task)) - return; - if (nfs4_set_rw_stateid(&rdata->args.stateid, rdata->args.context, - rdata->args.lock_context, FMODE_READ) == -EIO) - rpc_exit(task, -EIO); /* lost lock, terminate I/O */ -} - -static void filelayout_read_call_done(struct rpc_task *task, void *data) -{ - struct nfs_pgio_data *rdata = data; - - dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); - - if (test_bit(NFS_IOHDR_REDO, &rdata->header->flags) && - task->tk_status == 0) { - nfs41_sequence_done(task, &rdata->res.seq_res); - return; - } - - /* Note this may cause RPC to be resent */ - rdata->header->mds_ops->rpc_call_done(task, data); -} - -static void filelayout_read_count_stats(struct rpc_task *task, void *data) -{ - struct nfs_pgio_data *rdata = data; - - rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics); -} - -static void filelayout_read_release(void *data) -{ - struct nfs_pgio_data *rdata = data; - struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout; - - filelayout_fenceme(lo->plh_inode, lo); - nfs_put_client(rdata->ds_clp); - rdata->header->mds_ops->rpc_release(data); -} - -static int filelayout_write_done_cb(struct rpc_task *task, - struct nfs_pgio_data *data) -{ - struct nfs_pgio_header *hdr = data->header; - int err; - - trace_nfs4_pnfs_write(data, task->tk_status); - err = filelayout_async_handle_error(task, data->args.context->state, - data->ds_clp, hdr->lseg); - - switch (err) { - case -NFS4ERR_RESET_TO_MDS: - filelayout_reset_write(data); - return task->tk_status; - case -EAGAIN: - rpc_restart_call_prepare(task); - return -EAGAIN; - } - - filelayout_set_layoutcommit(data); - return 0; -} - -/* Fake up some data that will cause nfs_commit_release to retry the writes. */ -static void prepare_to_resend_writes(struct nfs_commit_data *data) -{ - struct nfs_page *first = nfs_list_entry(data->pages.next); - - data->task.tk_status = 0; - memcpy(&data->verf.verifier, &first->wb_verf, - sizeof(data->verf.verifier)); - data->verf.verifier.data[0]++; /* ensure verifier mismatch */ -} - -static int filelayout_commit_done_cb(struct rpc_task *task, - struct nfs_commit_data *data) -{ - int err; - - trace_nfs4_pnfs_commit_ds(data, task->tk_status); - err = filelayout_async_handle_error(task, NULL, data->ds_clp, - data->lseg); - - switch (err) { - case -NFS4ERR_RESET_TO_MDS: - prepare_to_resend_writes(data); - return -EAGAIN; - case -EAGAIN: - rpc_restart_call_prepare(task); - return -EAGAIN; - } - - return 0; -} - -static void filelayout_write_prepare(struct rpc_task *task, void *data) -{ - struct nfs_pgio_data *wdata = data; - - if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) { - rpc_exit(task, -EIO); - return; - } - if (filelayout_reset_to_mds(wdata->header->lseg)) { - dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); - filelayout_reset_write(wdata); - rpc_exit(task, 0); - return; - } - if (nfs41_setup_sequence(wdata->ds_clp->cl_session, - &wdata->args.seq_args, - &wdata->res.seq_res, - task)) - return; - if (nfs4_set_rw_stateid(&wdata->args.stateid, wdata->args.context, - wdata->args.lock_context, FMODE_WRITE) == -EIO) - rpc_exit(task, -EIO); /* lost lock, terminate I/O */ -} - -static void filelayout_write_call_done(struct rpc_task *task, void *data) -{ - struct nfs_pgio_data *wdata = data; - - if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) && - task->tk_status == 0) { - nfs41_sequence_done(task, &wdata->res.seq_res); - return; - } - - /* Note this may cause RPC to be resent */ - wdata->header->mds_ops->rpc_call_done(task, data); -} - -static void filelayout_write_count_stats(struct rpc_task *task, void *data) -{ - struct nfs_pgio_data *wdata = data; - - rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics); -} - -static void filelayout_write_release(void *data) -{ - struct nfs_pgio_data *wdata = data; - struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout; - - filelayout_fenceme(lo->plh_inode, lo); - nfs_put_client(wdata->ds_clp); - wdata->header->mds_ops->rpc_release(data); -} - -static void filelayout_commit_prepare(struct rpc_task *task, void *data) -{ - struct nfs_commit_data *wdata = data; - - nfs41_setup_sequence(wdata->ds_clp->cl_session, - &wdata->args.seq_args, - &wdata->res.seq_res, - task); -} - -static void filelayout_write_commit_done(struct rpc_task *task, void *data) -{ - struct nfs_commit_data *wdata = data; - - /* Note this may cause RPC to be resent */ - wdata->mds_ops->rpc_call_done(task, data); -} - -static void filelayout_commit_count_stats(struct rpc_task *task, void *data) -{ - struct nfs_commit_data *cdata = data; - - rpc_count_iostats(task, NFS_SERVER(cdata->inode)->client->cl_metrics); -} - -static void filelayout_commit_release(void *calldata) -{ - struct nfs_commit_data *data = calldata; - - data->completion_ops->completion(data); - pnfs_put_lseg(data->lseg); - nfs_put_client(data->ds_clp); - nfs_commitdata_release(data); -} - -static const struct rpc_call_ops filelayout_read_call_ops = { - .rpc_call_prepare = filelayout_read_prepare, - .rpc_call_done = filelayout_read_call_done, - .rpc_count_stats = filelayout_read_count_stats, - .rpc_release = filelayout_read_release, -}; - -static const struct rpc_call_ops filelayout_write_call_ops = { - .rpc_call_prepare = filelayout_write_prepare, - .rpc_call_done = filelayout_write_call_done, - .rpc_count_stats = filelayout_write_count_stats, - .rpc_release = filelayout_write_release, -}; - -static const struct rpc_call_ops filelayout_commit_call_ops = { - .rpc_call_prepare = filelayout_commit_prepare, - .rpc_call_done = filelayout_write_commit_done, - .rpc_count_stats = filelayout_commit_count_stats, - .rpc_release = filelayout_commit_release, -}; - -static enum pnfs_try_status -filelayout_read_pagelist(struct nfs_pgio_data *data) -{ - struct nfs_pgio_header *hdr = data->header; - struct pnfs_layout_segment *lseg = hdr->lseg; - struct nfs4_pnfs_ds *ds; - struct rpc_clnt *ds_clnt; - loff_t offset = data->args.offset; - u32 j, idx; - struct nfs_fh *fh; - - dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n", - __func__, hdr->inode->i_ino, - data->args.pgbase, (size_t)data->args.count, offset); - - /* Retrieve the correct rpc_client for the byte range */ - j = nfs4_fl_calc_j_index(lseg, offset); - idx = nfs4_fl_calc_ds_index(lseg, j); - ds = nfs4_fl_prepare_ds(lseg, idx); - if (!ds) - return PNFS_NOT_ATTEMPTED; - - ds_clnt = nfs4_find_or_create_ds_client(ds->ds_clp, hdr->inode); - if (IS_ERR(ds_clnt)) - return PNFS_NOT_ATTEMPTED; - - dprintk("%s USE DS: %s cl_count %d\n", __func__, - ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); - - /* No multipath support. Use first DS */ - atomic_inc(&ds->ds_clp->cl_count); - data->ds_clp = ds->ds_clp; - data->ds_idx = idx; - fh = nfs4_fl_select_ds_fh(lseg, j); - if (fh) - data->args.fh = fh; - - data->args.offset = filelayout_get_dserver_offset(lseg, offset); - data->mds_offset = offset; - - /* Perform an asynchronous read to ds */ - nfs_initiate_pgio(ds_clnt, data, - &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN); - return PNFS_ATTEMPTED; -} - -/* Perform async writes. */ -static enum pnfs_try_status -filelayout_write_pagelist(struct nfs_pgio_data *data, int sync) -{ - struct nfs_pgio_header *hdr = data->header; - struct pnfs_layout_segment *lseg = hdr->lseg; - struct nfs4_pnfs_ds *ds; - struct rpc_clnt *ds_clnt; - loff_t offset = data->args.offset; - u32 j, idx; - struct nfs_fh *fh; - - /* Retrieve the correct rpc_client for the byte range */ - j = nfs4_fl_calc_j_index(lseg, offset); - idx = nfs4_fl_calc_ds_index(lseg, j); - ds = nfs4_fl_prepare_ds(lseg, idx); - if (!ds) - return PNFS_NOT_ATTEMPTED; - - ds_clnt = nfs4_find_or_create_ds_client(ds->ds_clp, hdr->inode); - if (IS_ERR(ds_clnt)) - return PNFS_NOT_ATTEMPTED; - - dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n", - __func__, hdr->inode->i_ino, sync, (size_t) data->args.count, - offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); - - data->pgio_done_cb = filelayout_write_done_cb; - atomic_inc(&ds->ds_clp->cl_count); - data->ds_clp = ds->ds_clp; - data->ds_idx = idx; - fh = nfs4_fl_select_ds_fh(lseg, j); - if (fh) - data->args.fh = fh; - /* - * Get the file offset on the dserver. Set the write offset to - * this offset and save the original offset. - */ - data->args.offset = filelayout_get_dserver_offset(lseg, offset); - - /* Perform an asynchronous write */ - nfs_initiate_pgio(ds_clnt, data, - &filelayout_write_call_ops, sync, - RPC_TASK_SOFTCONN); - return PNFS_ATTEMPTED; -} - -/* - * filelayout_check_layout() - * - * Make sure layout segment parameters are sane WRT the device. - * At this point no generic layer initialization of the lseg has occurred, - * and nothing has been added to the layout_hdr cache. - * - */ -static int -filelayout_check_layout(struct pnfs_layout_hdr *lo, - struct nfs4_filelayout_segment *fl, - struct nfs4_layoutget_res *lgr, - struct nfs4_deviceid *id, - gfp_t gfp_flags) -{ - struct nfs4_deviceid_node *d; - struct nfs4_file_layout_dsaddr *dsaddr; - int status = -EINVAL; - - dprintk("--> %s\n", __func__); - - /* FIXME: remove this check when layout segment support is added */ - if (lgr->range.offset != 0 || - lgr->range.length != NFS4_MAX_UINT64) { - dprintk("%s Only whole file layouts supported. Use MDS i/o\n", - __func__); - goto out; - } - - if (fl->pattern_offset > lgr->range.offset) { - dprintk("%s pattern_offset %lld too large\n", - __func__, fl->pattern_offset); - goto out; - } - - if (!fl->stripe_unit) { - dprintk("%s Invalid stripe unit (%u)\n", - __func__, fl->stripe_unit); - goto out; - } - - /* find and reference the deviceid */ - d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode)->pnfs_curr_ld, - NFS_SERVER(lo->plh_inode)->nfs_client, id); - if (d == NULL) { - dsaddr = filelayout_get_device_info(lo->plh_inode, id, - lo->plh_lc_cred, gfp_flags); - if (dsaddr == NULL) - goto out; - } else - dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); - /* Found deviceid is unavailable */ - if (filelayout_test_devid_unavailable(&dsaddr->id_node)) - goto out_put; - - fl->dsaddr = dsaddr; - - if (fl->first_stripe_index >= dsaddr->stripe_count) { - dprintk("%s Bad first_stripe_index %u\n", - __func__, fl->first_stripe_index); - goto out_put; - } - - if ((fl->stripe_type == STRIPE_SPARSE && - fl->num_fh > 1 && fl->num_fh != dsaddr->ds_num) || - (fl->stripe_type == STRIPE_DENSE && - fl->num_fh != dsaddr->stripe_count)) { - dprintk("%s num_fh %u not valid for given packing\n", - __func__, fl->num_fh); - goto out_put; - } - - status = 0; -out: - dprintk("--> %s returns %d\n", __func__, status); - return status; -out_put: - nfs4_fl_put_deviceid(dsaddr); - goto out; -} - -static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl) -{ - int i; - - for (i = 0; i < fl->num_fh; i++) { - if (!fl->fh_array[i]) - break; - kfree(fl->fh_array[i]); - } - kfree(fl->fh_array); - fl->fh_array = NULL; -} - -static void -_filelayout_free_lseg(struct nfs4_filelayout_segment *fl) -{ - filelayout_free_fh_array(fl); - kfree(fl); -} - -static int -filelayout_decode_layout(struct pnfs_layout_hdr *flo, - struct nfs4_filelayout_segment *fl, - struct nfs4_layoutget_res *lgr, - struct nfs4_deviceid *id, - gfp_t gfp_flags) -{ - struct xdr_stream stream; - struct xdr_buf buf; - struct page *scratch; - __be32 *p; - uint32_t nfl_util; - int i; - - dprintk("%s: set_layout_map Begin\n", __func__); - - scratch = alloc_page(gfp_flags); - if (!scratch) - return -ENOMEM; - - xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len); - xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); - - /* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8), - * num_fh (4) */ - p = xdr_inline_decode(&stream, NFS4_DEVICEID4_SIZE + 20); - if (unlikely(!p)) - goto out_err; - - memcpy(id, p, sizeof(*id)); - p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); - nfs4_print_deviceid(id); - - nfl_util = be32_to_cpup(p++); - if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS) - fl->commit_through_mds = 1; - if (nfl_util & NFL4_UFLG_DENSE) - fl->stripe_type = STRIPE_DENSE; - else - fl->stripe_type = STRIPE_SPARSE; - fl->stripe_unit = nfl_util & ~NFL4_UFLG_MASK; - - fl->first_stripe_index = be32_to_cpup(p++); - p = xdr_decode_hyper(p, &fl->pattern_offset); - fl->num_fh = be32_to_cpup(p++); - - dprintk("%s: nfl_util 0x%X num_fh %u fsi %u po %llu\n", - __func__, nfl_util, fl->num_fh, fl->first_stripe_index, - fl->pattern_offset); - - /* Note that a zero value for num_fh is legal for STRIPE_SPARSE. - * Futher checking is done in filelayout_check_layout */ - if (fl->num_fh > - max(NFS4_PNFS_MAX_STRIPE_CNT, NFS4_PNFS_MAX_MULTI_CNT)) - goto out_err; - - if (fl->num_fh > 0) { - fl->fh_array = kcalloc(fl->num_fh, sizeof(fl->fh_array[0]), - gfp_flags); - if (!fl->fh_array) - goto out_err; - } - - for (i = 0; i < fl->num_fh; i++) { - /* Do we want to use a mempool here? */ - fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), gfp_flags); - if (!fl->fh_array[i]) - goto out_err_free; - - p = xdr_inline_decode(&stream, 4); - if (unlikely(!p)) - goto out_err_free; - fl->fh_array[i]->size = be32_to_cpup(p++); - if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) { - printk(KERN_ERR "NFS: Too big fh %d received %d\n", - i, fl->fh_array[i]->size); - goto out_err_free; - } - - p = xdr_inline_decode(&stream, fl->fh_array[i]->size); - if (unlikely(!p)) - goto out_err_free; - memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size); - dprintk("DEBUG: %s: fh len %d\n", __func__, - fl->fh_array[i]->size); - } - - __free_page(scratch); - return 0; - -out_err_free: - filelayout_free_fh_array(fl); -out_err: - __free_page(scratch); - return -EIO; -} - -static void -filelayout_free_lseg(struct pnfs_layout_segment *lseg) -{ - struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); - - dprintk("--> %s\n", __func__); - nfs4_fl_put_deviceid(fl->dsaddr); - /* This assumes a single RW lseg */ - if (lseg->pls_range.iomode == IOMODE_RW) { - struct nfs4_filelayout *flo; - - flo = FILELAYOUT_FROM_HDR(lseg->pls_layout); - flo->commit_info.nbuckets = 0; - kfree(flo->commit_info.buckets); - flo->commit_info.buckets = NULL; - } - _filelayout_free_lseg(fl); -} - -static int -filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg, - struct nfs_commit_info *cinfo, - gfp_t gfp_flags) -{ - struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); - struct pnfs_commit_bucket *buckets; - int size, i; - - if (fl->commit_through_mds) - return 0; - - size = (fl->stripe_type == STRIPE_SPARSE) ? - fl->dsaddr->ds_num : fl->dsaddr->stripe_count; - - if (cinfo->ds->nbuckets >= size) { - /* This assumes there is only one IOMODE_RW lseg. What - * we really want to do is have a layout_hdr level - * dictionary of keys, each - * associated with a struct list_head, populated by calls - * to filelayout_write_pagelist(). - * */ - return 0; - } - - buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket), - gfp_flags); - if (!buckets) - return -ENOMEM; - for (i = 0; i < size; i++) { - INIT_LIST_HEAD(&buckets[i].written); - INIT_LIST_HEAD(&buckets[i].committing); - /* mark direct verifier as unset */ - buckets[i].direct_verf.committed = NFS_INVALID_STABLE_HOW; - } - - spin_lock(cinfo->lock); - if (cinfo->ds->nbuckets >= size) - goto out; - for (i = 0; i < cinfo->ds->nbuckets; i++) { - list_splice(&cinfo->ds->buckets[i].written, - &buckets[i].written); - list_splice(&cinfo->ds->buckets[i].committing, - &buckets[i].committing); - buckets[i].direct_verf.committed = - cinfo->ds->buckets[i].direct_verf.committed; - buckets[i].wlseg = cinfo->ds->buckets[i].wlseg; - buckets[i].clseg = cinfo->ds->buckets[i].clseg; - } - swap(cinfo->ds->buckets, buckets); - cinfo->ds->nbuckets = size; -out: - spin_unlock(cinfo->lock); - kfree(buckets); - return 0; -} - -static struct pnfs_layout_segment * -filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, - struct nfs4_layoutget_res *lgr, - gfp_t gfp_flags) -{ - struct nfs4_filelayout_segment *fl; - int rc; - struct nfs4_deviceid id; - - dprintk("--> %s\n", __func__); - fl = kzalloc(sizeof(*fl), gfp_flags); - if (!fl) - return NULL; - - rc = filelayout_decode_layout(layoutid, fl, lgr, &id, gfp_flags); - if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id, gfp_flags)) { - _filelayout_free_lseg(fl); - return NULL; - } - return &fl->generic_hdr; -} - -/* - * filelayout_pg_test(). Called by nfs_can_coalesce_requests() - * - * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number - * of bytes (maximum @req->wb_bytes) that can be coalesced. - */ -static size_t -filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, - struct nfs_page *req) -{ - unsigned int size; - u64 p_stripe, r_stripe; - u32 stripe_offset; - u64 segment_offset = pgio->pg_lseg->pls_range.offset; - u32 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; - - /* calls nfs_generic_pg_test */ - size = pnfs_generic_pg_test(pgio, prev, req); - if (!size) - return 0; - - /* see if req and prev are in the same stripe */ - if (prev) { - p_stripe = (u64)req_offset(prev) - segment_offset; - r_stripe = (u64)req_offset(req) - segment_offset; - do_div(p_stripe, stripe_unit); - do_div(r_stripe, stripe_unit); - - if (p_stripe != r_stripe) - return 0; - } - - /* calculate remaining bytes in the current stripe */ - div_u64_rem((u64)req_offset(req) - segment_offset, - stripe_unit, - &stripe_offset); - WARN_ON_ONCE(stripe_offset > stripe_unit); - if (stripe_offset >= stripe_unit) - return 0; - return min(stripe_unit - (unsigned int)stripe_offset, size); -} - -static void -filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, - struct nfs_page *req) -{ - if (!pgio->pg_lseg) - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, - req->wb_context, - 0, - NFS4_MAX_UINT64, - IOMODE_READ, - GFP_KERNEL); - /* If no lseg, fall back to read through mds */ - if (pgio->pg_lseg == NULL) - nfs_pageio_reset_read_mds(pgio); -} - -static void -filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, - struct nfs_page *req) -{ - struct nfs_commit_info cinfo; - int status; - - if (!pgio->pg_lseg) - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, - req->wb_context, - 0, - NFS4_MAX_UINT64, - IOMODE_RW, - GFP_NOFS); - /* If no lseg, fall back to write through mds */ - if (pgio->pg_lseg == NULL) - goto out_mds; - nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq); - status = filelayout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS); - if (status < 0) { - pnfs_put_lseg(pgio->pg_lseg); - pgio->pg_lseg = NULL; - goto out_mds; - } - return; -out_mds: - nfs_pageio_reset_write_mds(pgio); -} - -static const struct nfs_pageio_ops filelayout_pg_read_ops = { - .pg_init = filelayout_pg_init_read, - .pg_test = filelayout_pg_test, - .pg_doio = pnfs_generic_pg_readpages, -}; - -static const struct nfs_pageio_ops filelayout_pg_write_ops = { - .pg_init = filelayout_pg_init_write, - .pg_test = filelayout_pg_test, - .pg_doio = pnfs_generic_pg_writepages, -}; - -static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) -{ - if (fl->stripe_type == STRIPE_SPARSE) - return nfs4_fl_calc_ds_index(&fl->generic_hdr, j); - else - return j; -} - -/* The generic layer is about to remove the req from the commit list. - * If this will make the bucket empty, it will need to put the lseg reference. - */ -static void -filelayout_clear_request_commit(struct nfs_page *req, - struct nfs_commit_info *cinfo) -{ - struct pnfs_layout_segment *freeme = NULL; - - spin_lock(cinfo->lock); - if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) - goto out; - cinfo->ds->nwritten--; - if (list_is_singular(&req->wb_list)) { - struct pnfs_commit_bucket *bucket; - - bucket = list_first_entry(&req->wb_list, - struct pnfs_commit_bucket, - written); - freeme = bucket->wlseg; - bucket->wlseg = NULL; - } -out: - nfs_request_remove_commit_list(req, cinfo); - spin_unlock(cinfo->lock); - pnfs_put_lseg(freeme); -} - -static struct list_head * -filelayout_choose_commit_list(struct nfs_page *req, - struct pnfs_layout_segment *lseg, - struct nfs_commit_info *cinfo) -{ - struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); - u32 i, j; - struct list_head *list; - struct pnfs_commit_bucket *buckets; - - if (fl->commit_through_mds) - return &cinfo->mds->list; - - /* Note that we are calling nfs4_fl_calc_j_index on each page - * that ends up being committed to a data server. An attractive - * alternative is to add a field to nfs_write_data and nfs_page - * to store the value calculated in filelayout_write_pagelist - * and just use that here. - */ - j = nfs4_fl_calc_j_index(lseg, req_offset(req)); - i = select_bucket_index(fl, j); - spin_lock(cinfo->lock); - buckets = cinfo->ds->buckets; - list = &buckets[i].written; - if (list_empty(list)) { - /* Non-empty buckets hold a reference on the lseg. That ref - * is normally transferred to the COMMIT call and released - * there. It could also be released if the last req is pulled - * off due to a rewrite, in which case it will be done in - * filelayout_clear_request_commit - */ - buckets[i].wlseg = pnfs_get_lseg(lseg); - } - set_bit(PG_COMMIT_TO_DS, &req->wb_flags); - cinfo->ds->nwritten++; - spin_unlock(cinfo->lock); - return list; -} - -static void -filelayout_mark_request_commit(struct nfs_page *req, - struct pnfs_layout_segment *lseg, - struct nfs_commit_info *cinfo) -{ - struct list_head *list; - - list = filelayout_choose_commit_list(req, lseg, cinfo); - nfs_request_add_commit_list(req, list, cinfo); -} - -static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) -{ - struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); - - if (flseg->stripe_type == STRIPE_SPARSE) - return i; - else - return nfs4_fl_calc_ds_index(lseg, i); -} - -static struct nfs_fh * -select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i) -{ - struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); - - if (flseg->stripe_type == STRIPE_SPARSE) { - if (flseg->num_fh == 1) - i = 0; - else if (flseg->num_fh == 0) - /* Use the MDS OPEN fh set in nfs_read_rpcsetup */ - return NULL; - } - return flseg->fh_array[i]; -} - -static int filelayout_initiate_commit(struct nfs_commit_data *data, int how) -{ - struct pnfs_layout_segment *lseg = data->lseg; - struct nfs4_pnfs_ds *ds; - struct rpc_clnt *ds_clnt; - u32 idx; - struct nfs_fh *fh; - - idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); - ds = nfs4_fl_prepare_ds(lseg, idx); - if (!ds) - goto out_err; - - ds_clnt = nfs4_find_or_create_ds_client(ds->ds_clp, data->inode); - if (IS_ERR(ds_clnt)) - goto out_err; - - dprintk("%s ino %lu, how %d cl_count %d\n", __func__, - data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count)); - data->commit_done_cb = filelayout_commit_done_cb; - atomic_inc(&ds->ds_clp->cl_count); - data->ds_clp = ds->ds_clp; - fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); - if (fh) - data->args.fh = fh; - return nfs_initiate_commit(ds_clnt, data, - &filelayout_commit_call_ops, how, - RPC_TASK_SOFTCONN); -out_err: - prepare_to_resend_writes(data); - filelayout_commit_release(data); - return -EAGAIN; -} - -static int -transfer_commit_list(struct list_head *src, struct list_head *dst, - struct nfs_commit_info *cinfo, int max) -{ - struct nfs_page *req, *tmp; - int ret = 0; - - list_for_each_entry_safe(req, tmp, src, wb_list) { - if (!nfs_lock_request(req)) - continue; - kref_get(&req->wb_kref); - if (cond_resched_lock(cinfo->lock)) - list_safe_reset_next(req, tmp, wb_list); - nfs_request_remove_commit_list(req, cinfo); - clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); - nfs_list_add_request(req, dst); - ret++; - if ((ret == max) && !cinfo->dreq) - break; - } - return ret; -} - -/* Note called with cinfo->lock held. */ -static int -filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, - struct nfs_commit_info *cinfo, - int max) -{ - struct list_head *src = &bucket->written; - struct list_head *dst = &bucket->committing; - int ret; - - ret = transfer_commit_list(src, dst, cinfo, max); - if (ret) { - cinfo->ds->nwritten -= ret; - cinfo->ds->ncommitting += ret; - bucket->clseg = bucket->wlseg; - if (list_empty(src)) - bucket->wlseg = NULL; - else - pnfs_get_lseg(bucket->clseg); - } - return ret; -} - -/* Move reqs from written to committing lists, returning count of number moved. - * Note called with cinfo->lock held. - */ -static int filelayout_scan_commit_lists(struct nfs_commit_info *cinfo, - int max) -{ - int i, rv = 0, cnt; - - for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) { - cnt = filelayout_scan_ds_commit_list(&cinfo->ds->buckets[i], - cinfo, max); - max -= cnt; - rv += cnt; - } - return rv; -} - -/* Pull everything off the committing lists and dump into @dst */ -static void filelayout_recover_commit_reqs(struct list_head *dst, - struct nfs_commit_info *cinfo) -{ - struct pnfs_commit_bucket *b; - struct pnfs_layout_segment *freeme; - int i; - -restart: - spin_lock(cinfo->lock); - for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { - if (transfer_commit_list(&b->written, dst, cinfo, 0)) { - freeme = b->wlseg; - b->wlseg = NULL; - spin_unlock(cinfo->lock); - pnfs_put_lseg(freeme); - goto restart; - } - } - cinfo->ds->nwritten = 0; - spin_unlock(cinfo->lock); -} - -static unsigned int -alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list) -{ - struct pnfs_ds_commit_info *fl_cinfo; - struct pnfs_commit_bucket *bucket; - struct nfs_commit_data *data; - int i, j; - unsigned int nreq = 0; - struct pnfs_layout_segment *freeme; - - fl_cinfo = cinfo->ds; - bucket = fl_cinfo->buckets; - for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) { - if (list_empty(&bucket->committing)) - continue; - data = nfs_commitdata_alloc(); - if (!data) - break; - data->ds_commit_index = i; - spin_lock(cinfo->lock); - data->lseg = bucket->clseg; - bucket->clseg = NULL; - spin_unlock(cinfo->lock); - list_add(&data->pages, list); - nreq++; - } - - /* Clean up on error */ - for (j = i; j < fl_cinfo->nbuckets; j++, bucket++) { - if (list_empty(&bucket->committing)) - continue; - nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo); - spin_lock(cinfo->lock); - freeme = bucket->clseg; - bucket->clseg = NULL; - spin_unlock(cinfo->lock); - pnfs_put_lseg(freeme); - } - /* Caller will clean up entries put on list */ - return nreq; -} - -/* This follows nfs_commit_list pretty closely */ -static int -filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, - int how, struct nfs_commit_info *cinfo) -{ - struct nfs_commit_data *data, *tmp; - LIST_HEAD(list); - unsigned int nreq = 0; - - if (!list_empty(mds_pages)) { - data = nfs_commitdata_alloc(); - if (data != NULL) { - data->lseg = NULL; - list_add(&data->pages, &list); - nreq++; - } else - nfs_retry_commit(mds_pages, NULL, cinfo); - } - - nreq += alloc_ds_commits(cinfo, &list); - - if (nreq == 0) { - cinfo->completion_ops->error_cleanup(NFS_I(inode)); - goto out; - } - - atomic_add(nreq, &cinfo->mds->rpcs_out); - - list_for_each_entry_safe(data, tmp, &list, pages) { - list_del_init(&data->pages); - if (!data->lseg) { - nfs_init_commit(data, mds_pages, NULL, cinfo); - nfs_initiate_commit(NFS_CLIENT(inode), data, - data->mds_ops, how, 0); - } else { - struct pnfs_commit_bucket *buckets; - - buckets = cinfo->ds->buckets; - nfs_init_commit(data, &buckets[data->ds_commit_index].committing, data->lseg, cinfo); - filelayout_initiate_commit(data, how); - } - } -out: - cinfo->ds->ncommitting = 0; - return PNFS_ATTEMPTED; -} - -static void -filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d) -{ - nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node)); -} - -static struct pnfs_layout_hdr * -filelayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) -{ - struct nfs4_filelayout *flo; - - flo = kzalloc(sizeof(*flo), gfp_flags); - return flo != NULL ? &flo->generic_hdr : NULL; -} - -static void -filelayout_free_layout_hdr(struct pnfs_layout_hdr *lo) -{ - kfree(FILELAYOUT_FROM_HDR(lo)); -} - -static struct pnfs_ds_commit_info * -filelayout_get_ds_info(struct inode *inode) -{ - struct pnfs_layout_hdr *layout = NFS_I(inode)->layout; - - if (layout == NULL) - return NULL; - else - return &FILELAYOUT_FROM_HDR(layout)->commit_info; -} - -static struct pnfs_layoutdriver_type filelayout_type = { - .id = LAYOUT_NFSV4_1_FILES, - .name = "LAYOUT_NFSV4_1_FILES", - .owner = THIS_MODULE, - .alloc_layout_hdr = filelayout_alloc_layout_hdr, - .free_layout_hdr = filelayout_free_layout_hdr, - .alloc_lseg = filelayout_alloc_lseg, - .free_lseg = filelayout_free_lseg, - .pg_read_ops = &filelayout_pg_read_ops, - .pg_write_ops = &filelayout_pg_write_ops, - .get_ds_info = &filelayout_get_ds_info, - .mark_request_commit = filelayout_mark_request_commit, - .clear_request_commit = filelayout_clear_request_commit, - .scan_commit_lists = filelayout_scan_commit_lists, - .recover_commit_reqs = filelayout_recover_commit_reqs, - .commit_pagelist = filelayout_commit_pagelist, - .read_pagelist = filelayout_read_pagelist, - .write_pagelist = filelayout_write_pagelist, - .free_deviceid_node = filelayout_free_deveiceid_node, -}; - -static int __init nfs4filelayout_init(void) -{ - printk(KERN_INFO "%s: NFSv4 File Layout Driver Registering...\n", - __func__); - return pnfs_register_layoutdriver(&filelayout_type); -} - -static void __exit nfs4filelayout_exit(void) -{ - printk(KERN_INFO "%s: NFSv4 File Layout Driver Unregistering...\n", - __func__); - pnfs_unregister_layoutdriver(&filelayout_type); -} - -MODULE_ALIAS("nfs-layouttype4-1"); - -module_init(nfs4filelayout_init); -module_exit(nfs4filelayout_exit); diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h deleted file mode 100644 index cebd20e..0000000 --- a/fs/nfs/nfs4filelayout.h +++ /dev/null @@ -1,156 +0,0 @@ -/* - * NFSv4 file layout driver data structures. - * - * Copyright (c) 2002 - * The Regents of the University of Michigan - * All Rights Reserved - * - * Dean Hildebrand - * - * Permission is granted to use, copy, create derivative works, and - * redistribute this software and such derivative works for any purpose, - * so long as the name of the University of Michigan is not used in - * any advertising or publicity pertaining to the use or distribution - * of this software without specific, written prior authorization. If - * the above copyright notice or any other identification of the - * University of Michigan is included in any copy of any portion of - * this software, then the disclaimer below must also be included. - * - * This software is provided as is, without representation or warranty - * of any kind either express or implied, including without limitation - * the implied warranties of merchantability, fitness for a particular - * purpose, or noninfringement. The Regents of the University of - * Michigan shall not be liable for any damages, including special, - * indirect, incidental, or consequential damages, with respect to any - * claim arising out of or in connection with the use of the software, - * even if it has been or is hereafter advised of the possibility of - * such damages. - */ - -#ifndef FS_NFS_NFS4FILELAYOUT_H -#define FS_NFS_NFS4FILELAYOUT_H - -#include "pnfs.h" - -/* - * Default data server connection timeout and retrans vaules. - * Set by module paramters dataserver_timeo and dataserver_retrans. - */ -#define NFS4_DEF_DS_TIMEO 600 /* in tenths of a second */ -#define NFS4_DEF_DS_RETRANS 5 - -/* - * Field testing shows we need to support up to 4096 stripe indices. - * We store each index as a u8 (u32 on the wire) to keep the memory footprint - * reasonable. This in turn means we support a maximum of 256 - * RFC 5661 multipath_list4 structures. - */ -#define NFS4_PNFS_MAX_STRIPE_CNT 4096 -#define NFS4_PNFS_MAX_MULTI_CNT 256 /* 256 fit into a u8 stripe_index */ - -/* error codes for internal use */ -#define NFS4ERR_RESET_TO_MDS 12001 - -enum stripetype4 { - STRIPE_SPARSE = 1, - STRIPE_DENSE = 2 -}; - -/* Individual ip address */ -struct nfs4_pnfs_ds_addr { - struct sockaddr_storage da_addr; - size_t da_addrlen; - struct list_head da_node; /* nfs4_pnfs_dev_hlist dev_dslist */ - char *da_remotestr; /* human readable addr+port */ -}; - -struct nfs4_pnfs_ds { - struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */ - char *ds_remotestr; /* comma sep list of addrs */ - struct list_head ds_addrs; - struct nfs_client *ds_clp; - atomic_t ds_count; - unsigned long ds_state; -#define NFS4DS_CONNECTING 0 /* ds is establishing connection */ -}; - -struct nfs4_file_layout_dsaddr { - struct nfs4_deviceid_node id_node; - u32 stripe_count; - u8 *stripe_indices; - u32 ds_num; - struct nfs4_pnfs_ds *ds_list[1]; -}; - -struct nfs4_filelayout_segment { - struct pnfs_layout_segment generic_hdr; - u32 stripe_type; - u32 commit_through_mds; - u32 stripe_unit; - u32 first_stripe_index; - u64 pattern_offset; - struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ - unsigned int num_fh; - struct nfs_fh **fh_array; -}; - -struct nfs4_filelayout { - struct pnfs_layout_hdr generic_hdr; - struct pnfs_ds_commit_info commit_info; -}; - -static inline struct nfs4_filelayout * -FILELAYOUT_FROM_HDR(struct pnfs_layout_hdr *lo) -{ - return container_of(lo, struct nfs4_filelayout, generic_hdr); -} - -static inline struct nfs4_filelayout_segment * -FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg) -{ - return container_of(lseg, - struct nfs4_filelayout_segment, - generic_hdr); -} - -static inline struct nfs4_deviceid_node * -FILELAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg) -{ - return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node; -} - -static inline void -filelayout_mark_devid_invalid(struct nfs4_deviceid_node *node) -{ - u32 *p = (u32 *)&node->deviceid; - - printk(KERN_WARNING "NFS: Deviceid [%x%x%x%x] marked out of use.\n", - p[0], p[1], p[2], p[3]); - - set_bit(NFS_DEVICEID_INVALID, &node->flags); -} - -static inline bool -filelayout_test_devid_invalid(struct nfs4_deviceid_node *node) -{ - return test_bit(NFS_DEVICEID_INVALID, &node->flags); -} - -extern bool -filelayout_test_devid_unavailable(struct nfs4_deviceid_node *node); - -extern struct nfs_fh * -nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); - -extern void print_ds(struct nfs4_pnfs_ds *ds); -u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset); -u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j); -struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, - u32 ds_idx); -extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); -extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); -struct nfs4_file_layout_dsaddr * -filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, - struct rpc_cred *cred, gfp_t gfp_flags); - -#endif /* FS_NFS_NFS4FILELAYOUT_H */ diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c deleted file mode 100644 index efac602..0000000 --- a/fs/nfs/nfs4filelayoutdev.c +++ /dev/null @@ -1,843 +0,0 @@ -/* - * Device operations for the pnfs nfs4 file layout driver. - * - * Copyright (c) 2002 - * The Regents of the University of Michigan - * All Rights Reserved - * - * Dean Hildebrand - * Garth Goodson - * - * Permission is granted to use, copy, create derivative works, and - * redistribute this software and such derivative works for any purpose, - * so long as the name of the University of Michigan is not used in - * any advertising or publicity pertaining to the use or distribution - * of this software without specific, written prior authorization. If - * the above copyright notice or any other identification of the - * University of Michigan is included in any copy of any portion of - * this software, then the disclaimer below must also be included. - * - * This software is provided as is, without representation or warranty - * of any kind either express or implied, including without limitation - * the implied warranties of merchantability, fitness for a particular - * purpose, or noninfringement. The Regents of the University of - * Michigan shall not be liable for any damages, including special, - * indirect, incidental, or consequential damages, with respect to any - * claim arising out of or in connection with the use of the software, - * even if it has been or is hereafter advised of the possibility of - * such damages. - */ - -#include -#include -#include -#include - -#include "internal.h" -#include "nfs4session.h" -#include "nfs4filelayout.h" - -#define NFSDBG_FACILITY NFSDBG_PNFS_LD - -static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO; -static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS; - -/* - * Data server cache - * - * Data servers can be mapped to different device ids. - * nfs4_pnfs_ds reference counting - * - set to 1 on allocation - * - incremented when a device id maps a data server already in the cache. - * - decremented when deviceid is removed from the cache. - */ -static DEFINE_SPINLOCK(nfs4_ds_cache_lock); -static LIST_HEAD(nfs4_data_server_cache); - -/* Debug routines */ -void -print_ds(struct nfs4_pnfs_ds *ds) -{ - if (ds == NULL) { - printk("%s NULL device\n", __func__); - return; - } - printk(" ds %s\n" - " ref count %d\n" - " client %p\n" - " cl_exchange_flags %x\n", - ds->ds_remotestr, - atomic_read(&ds->ds_count), ds->ds_clp, - ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); -} - -static bool -same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2) -{ - struct sockaddr_in *a, *b; - struct sockaddr_in6 *a6, *b6; - - if (addr1->sa_family != addr2->sa_family) - return false; - - switch (addr1->sa_family) { - case AF_INET: - a = (struct sockaddr_in *)addr1; - b = (struct sockaddr_in *)addr2; - - if (a->sin_addr.s_addr == b->sin_addr.s_addr && - a->sin_port == b->sin_port) - return true; - break; - - case AF_INET6: - a6 = (struct sockaddr_in6 *)addr1; - b6 = (struct sockaddr_in6 *)addr2; - - /* LINKLOCAL addresses must have matching scope_id */ - if (ipv6_addr_src_scope(&a6->sin6_addr) == - IPV6_ADDR_SCOPE_LINKLOCAL && - a6->sin6_scope_id != b6->sin6_scope_id) - return false; - - if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) && - a6->sin6_port == b6->sin6_port) - return true; - break; - - default: - dprintk("%s: unhandled address family: %u\n", - __func__, addr1->sa_family); - return false; - } - - return false; -} - -static bool -_same_data_server_addrs_locked(const struct list_head *dsaddrs1, - const struct list_head *dsaddrs2) -{ - struct nfs4_pnfs_ds_addr *da1, *da2; - - /* step through both lists, comparing as we go */ - for (da1 = list_first_entry(dsaddrs1, typeof(*da1), da_node), - da2 = list_first_entry(dsaddrs2, typeof(*da2), da_node); - da1 != NULL && da2 != NULL; - da1 = list_entry(da1->da_node.next, typeof(*da1), da_node), - da2 = list_entry(da2->da_node.next, typeof(*da2), da_node)) { - if (!same_sockaddr((struct sockaddr *)&da1->da_addr, - (struct sockaddr *)&da2->da_addr)) - return false; - } - if (da1 == NULL && da2 == NULL) - return true; - - return false; -} - -/* - * Lookup DS by addresses. nfs4_ds_cache_lock is held - */ -static struct nfs4_pnfs_ds * -_data_server_lookup_locked(const struct list_head *dsaddrs) -{ - struct nfs4_pnfs_ds *ds; - - list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) - if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs)) - return ds; - return NULL; -} - -/* - * Create an rpc connection to the nfs4_pnfs_ds data server - * Currently only supports IPv4 and IPv6 addresses - */ -static int -nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) -{ - struct nfs_client *clp = ERR_PTR(-EIO); - struct nfs4_pnfs_ds_addr *da; - int status = 0; - - dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr, - mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor); - - list_for_each_entry(da, &ds->ds_addrs, da_node) { - dprintk("%s: DS %s: trying address %s\n", - __func__, ds->ds_remotestr, da->da_remotestr); - - clp = nfs4_set_ds_client(mds_srv->nfs_client, - (struct sockaddr *)&da->da_addr, - da->da_addrlen, IPPROTO_TCP, - dataserver_timeo, dataserver_retrans); - if (!IS_ERR(clp)) - break; - } - - if (IS_ERR(clp)) { - status = PTR_ERR(clp); - goto out; - } - - status = nfs4_init_ds_session(clp, mds_srv->nfs_client->cl_lease_time); - if (status) - goto out_put; - - smp_wmb(); - ds->ds_clp = clp; - dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); -out: - return status; -out_put: - nfs_put_client(clp); - goto out; -} - -static void -destroy_ds(struct nfs4_pnfs_ds *ds) -{ - struct nfs4_pnfs_ds_addr *da; - - dprintk("--> %s\n", __func__); - ifdebug(FACILITY) - print_ds(ds); - - if (ds->ds_clp) - nfs_put_client(ds->ds_clp); - - while (!list_empty(&ds->ds_addrs)) { - da = list_first_entry(&ds->ds_addrs, - struct nfs4_pnfs_ds_addr, - da_node); - list_del_init(&da->da_node); - kfree(da->da_remotestr); - kfree(da); - } - - kfree(ds->ds_remotestr); - kfree(ds); -} - -void -nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) -{ - struct nfs4_pnfs_ds *ds; - int i; - - nfs4_print_deviceid(&dsaddr->id_node.deviceid); - - for (i = 0; i < dsaddr->ds_num; i++) { - ds = dsaddr->ds_list[i]; - if (ds != NULL) { - if (atomic_dec_and_lock(&ds->ds_count, - &nfs4_ds_cache_lock)) { - list_del_init(&ds->ds_node); - spin_unlock(&nfs4_ds_cache_lock); - destroy_ds(ds); - } - } - } - kfree(dsaddr->stripe_indices); - kfree(dsaddr); -} - -/* - * Create a string with a human readable address and port to avoid - * complicated setup around many dprinks. - */ -static char * -nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags) -{ - struct nfs4_pnfs_ds_addr *da; - char *remotestr; - size_t len; - char *p; - - len = 3; /* '{', '}' and eol */ - list_for_each_entry(da, dsaddrs, da_node) { - len += strlen(da->da_remotestr) + 1; /* string plus comma */ - } - - remotestr = kzalloc(len, gfp_flags); - if (!remotestr) - return NULL; - - p = remotestr; - *(p++) = '{'; - len--; - list_for_each_entry(da, dsaddrs, da_node) { - size_t ll = strlen(da->da_remotestr); - - if (ll > len) - goto out_err; - - memcpy(p, da->da_remotestr, ll); - p += ll; - len -= ll; - - if (len < 1) - goto out_err; - (*p++) = ','; - len--; - } - if (len < 2) - goto out_err; - *(p++) = '}'; - *p = '\0'; - return remotestr; -out_err: - kfree(remotestr); - return NULL; -} - -static struct nfs4_pnfs_ds * -nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) -{ - struct nfs4_pnfs_ds *tmp_ds, *ds = NULL; - char *remotestr; - - if (list_empty(dsaddrs)) { - dprintk("%s: no addresses defined\n", __func__); - goto out; - } - - ds = kzalloc(sizeof(*ds), gfp_flags); - if (!ds) - goto out; - - /* this is only used for debugging, so it's ok if its NULL */ - remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags); - - spin_lock(&nfs4_ds_cache_lock); - tmp_ds = _data_server_lookup_locked(dsaddrs); - if (tmp_ds == NULL) { - INIT_LIST_HEAD(&ds->ds_addrs); - list_splice_init(dsaddrs, &ds->ds_addrs); - ds->ds_remotestr = remotestr; - atomic_set(&ds->ds_count, 1); - INIT_LIST_HEAD(&ds->ds_node); - ds->ds_clp = NULL; - list_add(&ds->ds_node, &nfs4_data_server_cache); - dprintk("%s add new data server %s\n", __func__, - ds->ds_remotestr); - } else { - kfree(remotestr); - kfree(ds); - atomic_inc(&tmp_ds->ds_count); - dprintk("%s data server %s found, inc'ed ds_count to %d\n", - __func__, tmp_ds->ds_remotestr, - atomic_read(&tmp_ds->ds_count)); - ds = tmp_ds; - } - spin_unlock(&nfs4_ds_cache_lock); -out: - return ds; -} - -/* - * Currently only supports ipv4, ipv6 and one multi-path address. - */ -static struct nfs4_pnfs_ds_addr * -decode_ds_addr(struct net *net, struct xdr_stream *streamp, gfp_t gfp_flags) -{ - struct nfs4_pnfs_ds_addr *da = NULL; - char *buf, *portstr; - __be16 port; - int nlen, rlen; - int tmp[2]; - __be32 *p; - char *netid, *match_netid; - size_t len, match_netid_len; - char *startsep = ""; - char *endsep = ""; - - - /* r_netid */ - p = xdr_inline_decode(streamp, 4); - if (unlikely(!p)) - goto out_err; - nlen = be32_to_cpup(p++); - - p = xdr_inline_decode(streamp, nlen); - if (unlikely(!p)) - goto out_err; - - netid = kmalloc(nlen+1, gfp_flags); - if (unlikely(!netid)) - goto out_err; - - netid[nlen] = '\0'; - memcpy(netid, p, nlen); - - /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */ - p = xdr_inline_decode(streamp, 4); - if (unlikely(!p)) - goto out_free_netid; - rlen = be32_to_cpup(p); - - p = xdr_inline_decode(streamp, rlen); - if (unlikely(!p)) - goto out_free_netid; - - /* port is ".ABC.DEF", 8 chars max */ - if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) { - dprintk("%s: Invalid address, length %d\n", __func__, - rlen); - goto out_free_netid; - } - buf = kmalloc(rlen + 1, gfp_flags); - if (!buf) { - dprintk("%s: Not enough memory\n", __func__); - goto out_free_netid; - } - buf[rlen] = '\0'; - memcpy(buf, p, rlen); - - /* replace port '.' with '-' */ - portstr = strrchr(buf, '.'); - if (!portstr) { - dprintk("%s: Failed finding expected dot in port\n", - __func__); - goto out_free_buf; - } - *portstr = '-'; - - /* find '.' between address and port */ - portstr = strrchr(buf, '.'); - if (!portstr) { - dprintk("%s: Failed finding expected dot between address and " - "port\n", __func__); - goto out_free_buf; - } - *portstr = '\0'; - - da = kzalloc(sizeof(*da), gfp_flags); - if (unlikely(!da)) - goto out_free_buf; - - INIT_LIST_HEAD(&da->da_node); - - if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr, - sizeof(da->da_addr))) { - dprintk("%s: error parsing address %s\n", __func__, buf); - goto out_free_da; - } - - portstr++; - sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]); - port = htons((tmp[0] << 8) | (tmp[1])); - - switch (da->da_addr.ss_family) { - case AF_INET: - ((struct sockaddr_in *)&da->da_addr)->sin_port = port; - da->da_addrlen = sizeof(struct sockaddr_in); - match_netid = "tcp"; - match_netid_len = 3; - break; - - case AF_INET6: - ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port; - da->da_addrlen = sizeof(struct sockaddr_in6); - match_netid = "tcp6"; - match_netid_len = 4; - startsep = "["; - endsep = "]"; - break; - - default: - dprintk("%s: unsupported address family: %u\n", - __func__, da->da_addr.ss_family); - goto out_free_da; - } - - if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) { - dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n", - __func__, netid, match_netid); - goto out_free_da; - } - - /* save human readable address */ - len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7; - da->da_remotestr = kzalloc(len, gfp_flags); - - /* NULL is ok, only used for dprintk */ - if (da->da_remotestr) - snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep, - buf, endsep, ntohs(port)); - - dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr); - kfree(buf); - kfree(netid); - return da; - -out_free_da: - kfree(da); -out_free_buf: - dprintk("%s: Error parsing DS addr: %s\n", __func__, buf); - kfree(buf); -out_free_netid: - kfree(netid); -out_err: - return NULL; -} - -/* Decode opaque device data and return the result */ -static struct nfs4_file_layout_dsaddr* -decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) -{ - int i; - u32 cnt, num; - u8 *indexp; - __be32 *p; - u8 *stripe_indices; - u8 max_stripe_index; - struct nfs4_file_layout_dsaddr *dsaddr = NULL; - struct xdr_stream stream; - struct xdr_buf buf; - struct page *scratch; - struct list_head dsaddrs; - struct nfs4_pnfs_ds_addr *da; - - /* set up xdr stream */ - scratch = alloc_page(gfp_flags); - if (!scratch) - goto out_err; - - xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); - xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); - - /* Get the stripe count (number of stripe index) */ - p = xdr_inline_decode(&stream, 4); - if (unlikely(!p)) - goto out_err_free_scratch; - - cnt = be32_to_cpup(p); - dprintk("%s stripe count %d\n", __func__, cnt); - if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) { - printk(KERN_WARNING "NFS: %s: stripe count %d greater than " - "supported maximum %d\n", __func__, - cnt, NFS4_PNFS_MAX_STRIPE_CNT); - goto out_err_free_scratch; - } - - /* read stripe indices */ - stripe_indices = kcalloc(cnt, sizeof(u8), gfp_flags); - if (!stripe_indices) - goto out_err_free_scratch; - - p = xdr_inline_decode(&stream, cnt << 2); - if (unlikely(!p)) - goto out_err_free_stripe_indices; - - indexp = &stripe_indices[0]; - max_stripe_index = 0; - for (i = 0; i < cnt; i++) { - *indexp = be32_to_cpup(p++); - max_stripe_index = max(max_stripe_index, *indexp); - indexp++; - } - - /* Check the multipath list count */ - p = xdr_inline_decode(&stream, 4); - if (unlikely(!p)) - goto out_err_free_stripe_indices; - - num = be32_to_cpup(p); - dprintk("%s ds_num %u\n", __func__, num); - if (num > NFS4_PNFS_MAX_MULTI_CNT) { - printk(KERN_WARNING "NFS: %s: multipath count %d greater than " - "supported maximum %d\n", __func__, - num, NFS4_PNFS_MAX_MULTI_CNT); - goto out_err_free_stripe_indices; - } - - /* validate stripe indices are all < num */ - if (max_stripe_index >= num) { - printk(KERN_WARNING "NFS: %s: stripe index %u >= num ds %u\n", - __func__, max_stripe_index, num); - goto out_err_free_stripe_indices; - } - - dsaddr = kzalloc(sizeof(*dsaddr) + - (sizeof(struct nfs4_pnfs_ds *) * (num - 1)), - gfp_flags); - if (!dsaddr) - goto out_err_free_stripe_indices; - - dsaddr->stripe_count = cnt; - dsaddr->stripe_indices = stripe_indices; - stripe_indices = NULL; - dsaddr->ds_num = num; - nfs4_init_deviceid_node(&dsaddr->id_node, - NFS_SERVER(ino)->pnfs_curr_ld, - NFS_SERVER(ino)->nfs_client, - &pdev->dev_id); - - INIT_LIST_HEAD(&dsaddrs); - - for (i = 0; i < dsaddr->ds_num; i++) { - int j; - u32 mp_count; - - p = xdr_inline_decode(&stream, 4); - if (unlikely(!p)) - goto out_err_free_deviceid; - - mp_count = be32_to_cpup(p); /* multipath count */ - for (j = 0; j < mp_count; j++) { - da = decode_ds_addr(NFS_SERVER(ino)->nfs_client->cl_net, - &stream, gfp_flags); - if (da) - list_add_tail(&da->da_node, &dsaddrs); - } - if (list_empty(&dsaddrs)) { - dprintk("%s: no suitable DS addresses found\n", - __func__); - goto out_err_free_deviceid; - } - - dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags); - if (!dsaddr->ds_list[i]) - goto out_err_drain_dsaddrs; - - /* If DS was already in cache, free ds addrs */ - while (!list_empty(&dsaddrs)) { - da = list_first_entry(&dsaddrs, - struct nfs4_pnfs_ds_addr, - da_node); - list_del_init(&da->da_node); - kfree(da->da_remotestr); - kfree(da); - } - } - - __free_page(scratch); - return dsaddr; - -out_err_drain_dsaddrs: - while (!list_empty(&dsaddrs)) { - da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr, - da_node); - list_del_init(&da->da_node); - kfree(da->da_remotestr); - kfree(da); - } -out_err_free_deviceid: - nfs4_fl_free_deviceid(dsaddr); - /* stripe_indicies was part of dsaddr */ - goto out_err_free_scratch; -out_err_free_stripe_indices: - kfree(stripe_indices); -out_err_free_scratch: - __free_page(scratch); -out_err: - dprintk("%s ERROR: returning NULL\n", __func__); - return NULL; -} - -/* - * Decode the opaque device specified in 'dev' and add it to the cache of - * available devices. - */ -static struct nfs4_file_layout_dsaddr * -decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_flags) -{ - struct nfs4_deviceid_node *d; - struct nfs4_file_layout_dsaddr *n, *new; - - new = decode_device(inode, dev, gfp_flags); - if (!new) { - printk(KERN_WARNING "NFS: %s: Could not decode or add device\n", - __func__); - return NULL; - } - - d = nfs4_insert_deviceid_node(&new->id_node); - n = container_of(d, struct nfs4_file_layout_dsaddr, id_node); - if (n != new) { - nfs4_fl_free_deviceid(new); - return n; - } - - return new; -} - -/* - * Retrieve the information for dev_id, add it to the list - * of available devices, and return it. - */ -struct nfs4_file_layout_dsaddr * -filelayout_get_device_info(struct inode *inode, - struct nfs4_deviceid *dev_id, - struct rpc_cred *cred, - gfp_t gfp_flags) -{ - struct pnfs_device *pdev = NULL; - u32 max_resp_sz; - int max_pages; - struct page **pages = NULL; - struct nfs4_file_layout_dsaddr *dsaddr = NULL; - int rc, i; - struct nfs_server *server = NFS_SERVER(inode); - - /* - * Use the session max response size as the basis for setting - * GETDEVICEINFO's maxcount - */ - max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; - max_pages = nfs_page_array_len(0, max_resp_sz); - dprintk("%s inode %p max_resp_sz %u max_pages %d\n", - __func__, inode, max_resp_sz, max_pages); - - pdev = kzalloc(sizeof(struct pnfs_device), gfp_flags); - if (pdev == NULL) - return NULL; - - pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags); - if (pages == NULL) { - kfree(pdev); - return NULL; - } - for (i = 0; i < max_pages; i++) { - pages[i] = alloc_page(gfp_flags); - if (!pages[i]) - goto out_free; - } - - memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id)); - pdev->layout_type = LAYOUT_NFSV4_1_FILES; - pdev->pages = pages; - pdev->pgbase = 0; - pdev->pglen = max_resp_sz; - pdev->mincount = 0; - pdev->maxcount = max_resp_sz - nfs41_maxgetdevinfo_overhead; - - rc = nfs4_proc_getdeviceinfo(server, pdev, cred); - dprintk("%s getdevice info returns %d\n", __func__, rc); - if (rc) - goto out_free; - - /* - * Found new device, need to decode it and then add it to the - * list of known devices for this mountpoint. - */ - dsaddr = decode_and_add_device(inode, pdev, gfp_flags); -out_free: - for (i = 0; i < max_pages; i++) - __free_page(pages[i]); - kfree(pages); - kfree(pdev); - dprintk("<-- %s dsaddr %p\n", __func__, dsaddr); - return dsaddr; -} - -void -nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) -{ - nfs4_put_deviceid_node(&dsaddr->id_node); -} - -/* - * Want res = (offset - layout->pattern_offset)/ layout->stripe_unit - * Then: ((res + fsi) % dsaddr->stripe_count) - */ -u32 -nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset) -{ - struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); - u64 tmp; - - tmp = offset - flseg->pattern_offset; - do_div(tmp, flseg->stripe_unit); - tmp += flseg->first_stripe_index; - return do_div(tmp, flseg->dsaddr->stripe_count); -} - -u32 -nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j) -{ - return FILELAYOUT_LSEG(lseg)->dsaddr->stripe_indices[j]; -} - -struct nfs_fh * -nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j) -{ - struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); - u32 i; - - if (flseg->stripe_type == STRIPE_SPARSE) { - if (flseg->num_fh == 1) - i = 0; - else if (flseg->num_fh == 0) - /* Use the MDS OPEN fh set in nfs_read_rpcsetup */ - return NULL; - else - i = nfs4_fl_calc_ds_index(lseg, j); - } else - i = j; - return flseg->fh_array[i]; -} - -static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds) -{ - might_sleep(); - wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING, - nfs_wait_bit_killable, TASK_KILLABLE); -} - -static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds) -{ - smp_mb__before_clear_bit(); - clear_bit(NFS4DS_CONNECTING, &ds->ds_state); - smp_mb__after_clear_bit(); - wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING); -} - - -struct nfs4_pnfs_ds * -nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) -{ - struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr; - struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; - struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); - struct nfs4_pnfs_ds *ret = ds; - - if (ds == NULL) { - printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", - __func__, ds_idx); - filelayout_mark_devid_invalid(devid); - goto out; - } - smp_rmb(); - if (ds->ds_clp) - goto out_test_devid; - - if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) { - struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode); - int err; - - err = nfs4_ds_connect(s, ds); - if (err) - nfs4_mark_deviceid_unavailable(devid); - nfs4_clear_ds_conn_bit(ds); - } else { - /* Either ds is connected, or ds is NULL */ - nfs4_wait_ds_connect(ds); - } -out_test_devid: - if (filelayout_test_devid_unavailable(devid)) - ret = NULL; -out: - return ret; -} - -module_param(dataserver_retrans, uint, 0644); -MODULE_PARM_DESC(dataserver_retrans, "The number of times the NFSv4.1 client " - "retries a request before it attempts further " - " recovery action."); -module_param(dataserver_timeo, uint, 0644); -MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the " - "NFSv4.1 client waits for a response from a " - " data server before it retries an NFS request."); -- cgit v1.1 From abbec2da13f0e4c5d9b78b7e2c025a3e617228ba Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 5 Jun 2014 10:42:37 -0400 Subject: NFS: Use raw_write_seqcount_begin/end int nfs4_reclaim_open_state The addition of lockdep code to write_seqcount_begin/end has lead to a bunch of false positive claims of ABBA deadlocks with the so_lock spinlock. Audits show that this simply cannot happen because the read side code does not spin while holding so_lock. Cc: # 3.13.x Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 2349518..2127514 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1456,7 +1456,7 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs * server that doesn't support a grace period. */ spin_lock(&sp->so_lock); - write_seqcount_begin(&sp->so_reclaim_seqcount); + raw_write_seqcount_begin(&sp->so_reclaim_seqcount); restart: list_for_each_entry(state, &sp->so_states, open_states) { if (!test_and_clear_bit(ops->state_flag_bit, &state->flags)) @@ -1519,13 +1519,13 @@ restart: spin_lock(&sp->so_lock); goto restart; } - write_seqcount_end(&sp->so_reclaim_seqcount); + raw_write_seqcount_end(&sp->so_reclaim_seqcount); spin_unlock(&sp->so_lock); return 0; out_err: nfs4_put_open_state(state); spin_lock(&sp->so_lock); - write_seqcount_end(&sp->so_reclaim_seqcount); + raw_write_seqcount_end(&sp->so_reclaim_seqcount); spin_unlock(&sp->so_lock); return status; } -- cgit v1.1 From bf96bc0b3bb04e2a0cc9f5c8cb73179ddbca48df Mon Sep 17 00:00:00 2001 From: Tom Haynes Date: Wed, 4 Jun 2014 15:36:05 -0700 Subject: NFSv4.1: Comment is now wrong and redundant to code The save of the write offset was removed some time ago, so that part of the comment is bogus. The remainder is pretty self-evident. So off with it! Signed-off-by: Tom Haynes Signed-off-by: Trond Myklebust --- fs/nfs/filelayout/filelayout.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index a008e85..c7fbc91 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -608,10 +608,7 @@ filelayout_write_pagelist(struct nfs_pgio_data *data, int sync) fh = nfs4_fl_select_ds_fh(lseg, j); if (fh) data->args.fh = fh; - /* - * Get the file offset on the dserver. Set the write offset to - * this offset and save the original offset. - */ + data->args.offset = filelayout_get_dserver_offset(lseg, offset); /* Perform an asynchronous write */ -- cgit v1.1 From f383b7e8fd77878c24dc682ced9945ab0930ce86 Mon Sep 17 00:00:00 2001 From: Tom Haynes Date: Wed, 4 Jun 2014 15:54:57 -0700 Subject: NFSv4.1: Fix typo in dprintk Signed-off-by: Tom Haynes Signed-off-by: Trond Myklebust --- fs/nfs/filelayout/filelayout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index c7fbc91..d2eba1c 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -279,7 +279,7 @@ filelayout_set_layoutcommit(struct nfs_pgio_data *wdata) return; pnfs_set_layoutcommit(wdata); - dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, + dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); } -- cgit v1.1 From c5e20cb700c0e36fb5499093b96e80350c6eb48e Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Mon, 9 Jun 2014 17:47:26 -0400 Subject: pnfs: fix lockup caused by pnfs_generic_pg_test end_offset and req_offset both return u64 - avoid casting to u32 until it's needed, when it's less than the (u32) size returned by nfs_generic_pg_test. Also, fix the comments in pnfs_generic_pg_test. Running the cthon04 special tests caused this lockup in the "write/read at 2GB, 4GB edges" test when running against a file layout server: BUG: soft lockup - CPU#0 stuck for 22s! [bigfile2:823] Modules linked in: nfs_layout_nfsv41_files rpcsec_gss_krb5 nfsv4 nfs fscache ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_mangle ip6table_filter ip6_tables iptable_nat nf_nat_ipv4 nf_nat iptable_mangle ppdev crc32c_intel aesni_intel aes_x86_64 glue_helper lrw gf128mul ablk_helper cryptd serio_raw e1000 shpchp i2c_piix4 i2c_core parport_pc parport nfsd auth_rpcgss oid_registry exportfs nfs_acl lockd sunrpc btrfs xor zlib_deflate raid6_pq mptspi scsi_transport_spi mptscsih mptbase ata_generic floppy autofs4 irq event stamp: 205958 hardirqs last enabled at (205957): [] restore_args+0x0/0x30 hardirqs last disabled at (205958): [] apic_timer_interrupt+0x6a/0x80 softirqs last enabled at (205956): [] __do_softirq+0x1ea/0x2ab softirqs last disabled at (205951): [] irq_exit+0x44/0x9a CPU: 0 PID: 823 Comm: bigfile2 Not tainted 3.15.0-rc1-branch-pgio_plus+ #3 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/31/2013 task: ffff8800792ec480 ti: ffff880078c4e000 task.ti: ffff880078c4e000 RIP: 0010:[] [] nfs_page_group_unlock+0x3e/0x4b [nfs] RSP: 0018:ffff880078c4fab0 EFLAGS: 00000202 RAX: 0000000000000fff RBX: ffff88006bf83300 RCX: 0000000000000000 RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff88006bf83300 RBP: ffff880078c4fab8 R08: 0000000000000001 R09: 0000000000000000 R10: ffffffff8249840c R11: 0000000000000000 R12: 0000000000000035 R13: ffff88007ffc72d8 R14: 0000000000000001 R15: 0000000000000000 FS: 00007f45f11b7740(0000) GS:ffff88007f200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f3a8cb632d0 CR3: 000000007931c000 CR4: 00000000001407f0 Stack: ffff88006bf832c0 ffff880078c4fb00 ffffffffa02cec22 ffff880078c4fad8 00000fff810f9d99 ffff880078c4fca0 ffff88006bf832c0 ffff88006bf832c0 ffff880078c4fca0 ffff880078c4fd60 ffff880078c4fb28 ffffffffa02cee34 Call Trace: [] __nfs_pageio_add_request+0x298/0x34f [nfs] [] nfs_pageio_add_request+0x1f/0x42 [nfs] [] nfs_do_writepage+0x1b5/0x1e4 [nfs] [] nfs_writepages_callback+0x13/0x25 [nfs] [] ? nfs_do_writepage+0x1e4/0x1e4 [nfs] [] write_cache_pages+0x254/0x37f [] ? nfs_do_writepage+0x1e4/0x1e4 [nfs] [] ? printk+0x54/0x56 [] ? __set_page_dirty_nobuffers+0x22/0xe9 [] ? put_rpccred+0x38/0x101 [sunrpc] [] nfs_writepages+0xb4/0xf8 [nfs] [] do_writepages+0x21/0x2f [] __filemap_fdatawrite_range+0x55/0x57 [] filemap_write_and_wait_range+0x2d/0x5b [] nfs4_file_fsync+0x3a/0x98 [nfsv4] [] vfs_fsync_range+0x18/0x20 [] generic_file_aio_write+0xa7/0xbd [] nfs_file_write+0xf0/0x170 [nfs] [] do_sync_write+0x59/0x78 [] vfs_write+0xab/0x107 [] SyS_write+0x49/0x7f [] system_call_fastpath+0x16/0x1b Reported-by: Anna Schumaker Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index ce46a41..ee60c42 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1433,33 +1433,37 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { unsigned int size; - u64 end; + u64 seg_end, req_start, seg_left; size = nfs_generic_pg_test(pgio, prev, req); if (!size) return 0; /* - * Test if a nfs_page is fully contained in the pnfs_layout_range. - * Note that this test makes several assumptions: - * - that the previous nfs_page in the struct nfs_pageio_descriptor - * is known to lie within the range. - * - that the nfs_page being tested is known to be contiguous with the - * previous nfs_page. - * - Layout ranges are page aligned, so we only have to test the - * start offset of the request. + * 'size' contains the number of bytes left in the current page (up + * to the original size asked for in @req->wb_bytes). + * + * Calculate how many bytes are left in the layout segment + * and if there are less bytes than 'size', return that instead. * * Please also note that 'end_offset' is actually the offset of the * first byte that lies outside the pnfs_layout_range. FIXME? * */ if (pgio->pg_lseg) { - end = end_offset(pgio->pg_lseg->pls_range.offset, - pgio->pg_lseg->pls_range.length); - WARN_ON_ONCE(req_offset(req) > end); - if (req_offset(req) >= end) + seg_end = end_offset(pgio->pg_lseg->pls_range.offset, + pgio->pg_lseg->pls_range.length); + req_start = req_offset(req); + WARN_ON_ONCE(req_start > seg_end); + /* start of request is past the last byte of this segment */ + if (req_start >= seg_end) return 0; - size = min((unsigned int)(end - req_offset(req)), size); + + /* adjust 'size' iff there are fewer bytes left in the + * segment than what nfs_generic_pg_test returned */ + seg_left = seg_end - req_start; + if (seg_left < size) + size = (unsigned int)seg_left; } return size; -- cgit v1.1 From a914722f333b3359d2f4f12919380a334176bb89 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Tue, 10 Jun 2014 12:44:12 +0200 Subject: NFS: populate ->net in mount data when remounting Otherwise the kernel oopses when remounting with IPv6 server because net is dereferenced in dev_get_by_name. Use net ns of current thread so that dev_get_by_name does not operate on foreign ns. Changing the address is prohibited anyway so this should not affect anything. Signed-off-by: Mateusz Guzik Cc: linux-nfs@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # 3.4+ Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 1a6d7ac..084af10 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2260,6 +2260,7 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) data->nfs_server.addrlen = nfss->nfs_client->cl_addrlen; data->version = nfsvers; data->minorversion = nfss->nfs_client->cl_minorversion; + data->net = current->nsproxy->net_ns; memcpy(&data->nfs_server.address, &nfss->nfs_client->cl_addr, data->nfs_server.addrlen); -- cgit v1.1