From 378520b837cf4da769600b83690d8e825f16a611 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 7 Aug 2014 10:15:02 +0800 Subject: nfs41: add a helper function to set layoutcommit after commit Track lwb in nfs_commit_data so that we can use it to setup layoutcommit in commit_done callback. Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs/pnfs.h') diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index aca3dff..79c63114 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -219,6 +219,7 @@ void pnfs_roc_release(struct inode *ino); void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); void pnfs_set_layoutcommit(struct nfs_pgio_header *); +void pnfs_commit_set_layoutcommit(struct nfs_commit_data *data); void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); int pnfs_layoutcommit_inode(struct inode *inode, bool sync); int _pnfs_return_layout(struct inode *); -- cgit v1.1 From 47abadefad213bb7de9592d2e09a8bd282ddc3de Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 21 Aug 2014 11:09:22 -0500 Subject: pnfs: avoid using stale stateids after layoutreturn After we issued a layoutreturn operations the may free the layout stateid and will thus cause bad stateid error when the client uses it again. We currently try to avoid this case by chosing the open stateid if not lsegs are present for this inode. But various places can hold refererence on lsegs and thus cause the list not to be empty shortly after a layout return. Add an explicit flag to mark the current layout stateid invalid and force usage of the openstateid after we did a full file layoutreturn. Signed-off-by: Christoph Hellwig Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs/pnfs.h') diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 79c63114..1dd8a5e 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -65,6 +65,7 @@ enum { NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ NFS_LAYOUT_ROC, /* some lseg had roc bit set */ NFS_LAYOUT_RETURN, /* Return this layout ASAP */ + NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */ }; enum layoutdriver_policy_flags { -- cgit v1.1 From 5f919c9f10c1cf821ee5f414683214a361a1b98c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 21 Aug 2014 11:09:25 -0500 Subject: pnfs: allow splicing pre-encoded pages into the layoutcommit args Currently there is no XDR buffer space allocated for the per-layout driver layoutcommit payload, which leads to server buffer overflows in the blocklayout driver even under simple workloads. As we can't do per-layout sizes for XDR operations we'll have to splice a previously encoded list of pages into the XDR stream, similar to how we handle ACL buffers. Signed-off-by: Christoph Hellwig Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs/pnfs.h') diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 1dd8a5e..8835b5a 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -128,8 +128,8 @@ struct pnfs_layoutdriver_type { const struct nfs4_layoutreturn_args *args); void (*cleanup_layoutcommit) (struct nfs4_layoutcommit_data *data); - - void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid, + int (*prepare_layoutcommit) (struct nfs4_layoutcommit_args *args); + void (*encode_layoutcommit) (struct pnfs_layout_hdr *lo, struct xdr_stream *xdr, const struct nfs4_layoutcommit_args *args); }; -- cgit v1.1 From 612aa983a041056c3368aacfdc9febd406d245a8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 10 Sep 2014 08:23:30 -0700 Subject: pnfs: add flag to force read-modify-write in ->write_begin Like all block based filesystems, the pNFS block layout driver can't read or write at a byte granularity and thus has to perform read-modify-write cycles on writes smaller than this granularity. Add a flag so that the core NFS code always reads a whole page when starting a smaller write, so that we can do it in the place where the VFS expects it instead of doing in very deadlock prone way in the writeback handler. Note that in theory we could do less than page size reads here for disks that have a smaller sector size which are served by a server with a smaller pnfs block size. But so far that doesn't seem like a worthwhile optimization. Signed-off-by: Christoph Hellwig Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'fs/nfs/pnfs.h') diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 8835b5a..a4c530e 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -72,6 +72,7 @@ enum layoutdriver_policy_flags { /* Should the pNFS client commit and return the layout upon a setattr */ PNFS_LAYOUTRET_ON_SETATTR = 1 << 0, PNFS_LAYOUTRET_ON_ERROR = 1 << 1, + PNFS_READ_WHOLE_PAGE = 1 << 2, }; struct nfs4_deviceid_node; @@ -370,6 +371,14 @@ pnfs_ld_layoutret_on_setattr(struct inode *inode) } static inline bool +pnfs_ld_read_whole_page(struct inode *inode) +{ + if (!pnfs_enabled_sb(NFS_SERVER(inode))) + return false; + return NFS_SERVER(inode)->pnfs_curr_ld->flags & PNFS_READ_WHOLE_PAGE; +} + +static inline bool pnfs_layoutcommit_outstanding(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); @@ -445,6 +454,12 @@ pnfs_ld_layoutret_on_setattr(struct inode *inode) } static inline bool +pnfs_ld_read_whole_page(struct inode *inode) +{ + return false; +} + +static inline bool pnfs_roc(struct inode *ino) { return false; -- cgit v1.1 From c88953d87f5c8cd95bebcbd6d15f2f0cdd348136 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 10 Sep 2014 08:23:31 -0700 Subject: pnfs: add return_range method If a layout driver keeps per-inode state outside of the layout segments it needs to be notified of any layout returns or recalls on an inode, and not just about the freeing of layout segments. Add a method to acomplish this, which will allow the block layout driver to handle the case of truncated and re-expanded files properly. Signed-off-by: Christoph Hellwig Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/nfs/pnfs.h') diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index a4c530e..2c24942 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -94,6 +94,9 @@ struct pnfs_layoutdriver_type { struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr, gfp_t gfp_flags); void (*free_lseg) (struct pnfs_layout_segment *lseg); + void (*return_range) (struct pnfs_layout_hdr *lo, + struct pnfs_layout_range *range); + /* test for nfs page cache coalescing */ const struct nfs_pageio_ops *pg_read_ops; const struct nfs_pageio_ops *pg_write_ops; -- cgit v1.1 From 661373b13d0490ff410a2133d4a7a117f2dd037e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 2 Sep 2014 21:27:57 -0700 Subject: pnfs: factor GETDEVICEINFO implementations Add support to the common pNFS core to issue GETDEVICEINFO calls on a device ID cache miss. The code is taken from the well debugged file layout implementation and calls out to the layoutdriver through a new alloc_deviceid_node method. The calling conventions for nfs4_find_get_deviceid are changed so that all information needed to send a GETDEVICEINFO request is passed to the common code. Signed-off-by: Christoph Hellwig Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'fs/nfs/pnfs.h') diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 2c24942..ce89ae3 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -84,6 +84,7 @@ struct pnfs_layoutdriver_type { const char *name; struct module *owner; unsigned flags; + unsigned max_deviceinfo_size; int (*set_layoutdriver) (struct nfs_server *, const struct nfs_fh *); int (*clear_layoutdriver) (struct nfs_server *); @@ -126,6 +127,9 @@ struct pnfs_layoutdriver_type { enum pnfs_try_status (*write_pagelist)(struct nfs_pgio_header *, int); void (*free_deviceid_node) (struct nfs4_deviceid_node *); + struct nfs4_deviceid_node * (*alloc_deviceid_node) + (struct nfs_server *server, struct pnfs_device *pdev, + gfp_t gfp_flags); void (*encode_layoutreturn) (struct pnfs_layout_hdr *layoutid, struct xdr_stream *xdr, @@ -261,11 +265,12 @@ struct nfs4_deviceid_node { atomic_t ref; }; -struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); +struct nfs4_deviceid_node * +nfs4_find_get_deviceid(struct nfs_server *server, + const struct nfs4_deviceid *id, struct rpc_cred *cred, + gfp_t gfp_mask); void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); -void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, - const struct pnfs_layoutdriver_type *, - const struct nfs_client *, +void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, struct nfs_server *, const struct nfs4_deviceid *); struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *); bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *); -- cgit v1.1 From 9dd2fcd32f488ea89c2227cc56069446147376e8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 2 Sep 2014 21:27:58 -0700 Subject: pnfs: add a common GETDEVICELIST implementation At a simple helper to issue a GETDEVICELIST operation and pre-load the device id cache based on the result. Signed-off-by: Christoph Hellwig Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfs/pnfs.h') diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index ce89ae3..3eeca49 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -277,6 +277,8 @@ bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *); void nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node); bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node); void nfs4_deviceid_purge_client(const struct nfs_client *); +int nfs4_deviceid_getdevicelist(struct nfs_server *server, + const struct nfs_fh *fh); static inline struct pnfs_layout_segment * pnfs_get_lseg(struct pnfs_layout_segment *lseg) -- cgit v1.1 From 30ff0603ca4d66c8244efc80ea8470d3d04aee8b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 2 Sep 2014 21:27:59 -0700 Subject: pnfs: add a nfs4_get_deviceid helper This will be used by the block layout driver when splitting extents. Signed-off-by: Christoph Hellwig Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs/nfs/pnfs.h') diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 3eeca49..d84fe29 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -280,6 +280,13 @@ void nfs4_deviceid_purge_client(const struct nfs_client *); int nfs4_deviceid_getdevicelist(struct nfs_server *server, const struct nfs_fh *fh); +static inline struct nfs4_deviceid_node * +nfs4_get_deviceid(struct nfs4_deviceid_node *d) +{ + atomic_inc(&d->ref); + return d; +} + static inline struct pnfs_layout_segment * pnfs_get_lseg(struct pnfs_layout_segment *lseg) { -- cgit v1.1 From d4b18c3e00b8d18fbd316abe9639b91ad416e1f3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 10 Sep 2014 17:36:31 -0700 Subject: pnfs: remove GETDEVICELIST implementation The current GETDEVICELIST implementation is buggy in that it doesn't handle cursors correctly, and in that it returns an error if the server returns NFSERR_NOTSUPP. Given that there is no actual need for GETDEVICELIST, it has various issues and might get removed for NFSv4.2 stop using it in the blocklayout driver, and thus the Linux NFS client as whole. Signed-off-by: Christoph Hellwig Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'fs/nfs/pnfs.h') diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index d84fe29..b10f12f 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -180,9 +180,6 @@ extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); /* nfs4proc.c */ -extern int nfs4_proc_getdevicelist(struct nfs_server *server, - const struct nfs_fh *fh, - struct pnfs_devicelist *devlist); extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *dev, struct rpc_cred *cred); @@ -277,8 +274,6 @@ bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *); void nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node); bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node); void nfs4_deviceid_purge_client(const struct nfs_client *); -int nfs4_deviceid_getdevicelist(struct nfs_server *server, - const struct nfs_fh *fh); static inline struct nfs4_deviceid_node * nfs4_get_deviceid(struct nfs4_deviceid_node *d) -- cgit v1.1 From 88ac815cdbef93dec8382b3531ef90474dd102f2 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 12 Sep 2014 11:04:10 +0800 Subject: nfs41: change PNFS_LAYOUTRET_ON_SETATTR to only return on truncation to smaller size Both blocks layout and objects layout want to use it to avoid CB_LAYOUTRECALL but that should only happen if client is doing truncation to a smaller size. For other cases, we let server decide if it wants to recall client's layouts. Change PNFS_LAYOUTRET_ON_SETATTR to follow the logic and not to send layoutreturn unnecessarily. Cc: Christoph Hellwig Cc: Boaz Harrosh Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/nfs/pnfs.h') diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index b10f12f..693ce42 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -69,7 +69,8 @@ enum { }; enum layoutdriver_policy_flags { - /* Should the pNFS client commit and return the layout upon a setattr */ + /* Should the pNFS client commit and return the layout upon truncate to + * a smaller size */ PNFS_LAYOUTRET_ON_SETATTR = 1 << 0, PNFS_LAYOUTRET_ON_ERROR = 1 << 1, PNFS_READ_WHOLE_PAGE = 1 << 2, -- cgit v1.1